# <center> Binary Classification Of Sonar Returns

This is a dataset that describes
sonar chirp returns bouncing o↵ di↵erent surfaces. The 60 input variables are the strength of
the returns at di↵erent angles. It is a binary classification problem that requires a model to
di↵erentiate rocks from metal cylinders.
It is a well understood dataset. All of the variables are continuous and generally in the
range of 0 to 1. The output variable is a string M for mine and R for rock, which will need to be
converted to integers 1 and 0. The dataset contains 208 observations.

## Import Classes and Functions

In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
from scikeras.wrappers import KerasClassifier
from keras.constraints import maxnorm
from keras.optimizers import SGD
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from IPython.display import Markdown as md

## Initialize Random Number Generator

In [2]:
seed = 7
np.random.seed(seed)

## Load The Dataset

In [3]:
df = pd.read_csv('sonar.csv', header=None)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.0200,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.0180,0.0084,0.0090,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.0140,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.2280,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.0180,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.0100,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.0150,0.0085,0.0073,0.0050,0.0044,0.0040,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.0590,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.0110,0.0015,0.0072,0.0048,0.0107,0.0094,R
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,0.0187,0.0346,0.0168,0.0177,0.0393,0.1630,0.2028,0.1694,0.2328,0.2684,...,0.0116,0.0098,0.0199,0.0033,0.0101,0.0065,0.0115,0.0193,0.0157,M
204,0.0323,0.0101,0.0298,0.0564,0.0760,0.0958,0.0990,0.1018,0.1030,0.2154,...,0.0061,0.0093,0.0135,0.0063,0.0063,0.0034,0.0032,0.0062,0.0067,M
205,0.0522,0.0437,0.0180,0.0292,0.0351,0.1171,0.1257,0.1178,0.1258,0.2529,...,0.0160,0.0029,0.0051,0.0062,0.0089,0.0140,0.0138,0.0077,0.0031,M
206,0.0303,0.0353,0.0490,0.0608,0.0167,0.1354,0.1465,0.1123,0.1945,0.2354,...,0.0086,0.0046,0.0126,0.0036,0.0035,0.0034,0.0079,0.0036,0.0048,M


In [4]:
# split into input (X) and output (y) variables
datasets = df.values
X = datasets[:,:-1].astype(float)
y = datasets[:,-1]

## Encode The Output Variable

In [5]:
encoder = LabelEncoder()
encoder.fit(y)
encoded_y = encoder.transform(y)

## Define The Neural Network Model

In [6]:
# baseline model
def create_basemodel():
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=X.shape[1], kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [7]:
# evaluate model with standardized dataset
np.random.seed(seed)
estimator = KerasClassifier(build_fn=create_basemodel, epochs=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
result = cross_val_score(estimator, X, encoded_y, cv=kfold, verbose=0)
print('%.2f%% (%.2f%%)' %(result.mean()*100, result.std()*100))
md('Running this code produces the following output showing the mean and standard deviation of the estimated accuracy of the model on unseen data.')

  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)


77.86% (9.25%)


Running this code produces the following output showing the mean and standard deviation of the estimated accuracy of the model on unseen data.

## Improve Performance With Data Preparation

In [8]:
np.random.seed(seed)
estimator = []
estimator.append(('standardize', StandardScaler()))
estimator.append(('mlp', KerasClassifier(build_fn=create_basemodel, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimator)
kfold = StratifiedKFold(n_splits=100, shuffle=True, random_state=seed)
result_2 = cross_val_score(pipeline, X, encoded_y, cv=kfold, verbose=0)
print('Baseline: %.2f%% (%.2f%%)' %(result_2.mean()*100, result_2.std()*100))
md('There was a small but very nice lift in the mean accuracy. {}%'.format(round(result_2.mean()*100,2)))

  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)




  X, y = self._initialize(X, y)




  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y =

Baseline: 85.67% (22.61%)


There was a small but very nice lift in the mean accuracy. 85.67%

## Evaluate a Smaller Network

In [9]:
# smaller model
def create_smaller():
    # create model
    model = Sequential()
    model.add(Dense(30, input_dim=X.shape[1], kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
    
# evaluate baseline model with standardized dataset
np.random.seed(seed)
estimator = []
estimator.append(('standardize', StandardScaler()))
estimator.append(('mlp', KerasClassifier(build_fn=create_smaller, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimator)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
result_3 = cross_val_score(pipeline, X,encoded_y, cv=kfold, verbose=0)
print('Baseline: %.2f%% (%.2f%%)' %(result_3.mean()*100, result_3.std()*100))

  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)


Baseline: 83.64% (6.35%)


## Evaluate a Larger Network

In [10]:
# larger model
def create_larger():
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=X.shape[1], kernel_initializer='normal', activation='relu'))
    model.add(Dense(30, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# evaluate baseline model with standardized dataset
np.random.seed(seed)
estimator = []
estimator.append(('standardize', StandardScaler()))
estimator.append(('mlp', KerasClassifier(build_fn=create_larger, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimator)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
result_4 = cross_val_score(pipeline, X, encoded_y, cv=kfold, verbose=0)
print('Baseline: %.2f%% (%.2f%%)' %(result_4.mean()*100, result_4.std()*100))

  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)


Baseline: 84.07% (5.58%)


In [11]:
# standardized dataset
print('Standardized dataset \nBaseline: %.2f%% (%.2f%%)' %(result.mean()*100, result.std()*100))

# Improve Performance With Data Preparation
print('\nImproving Performance With Data Preparation \nBaseline: %.2f%% (%.2f%%)' %(result_2.mean()*100, result_2.std()*100))

# smaller model
print('\nsmaller model \nBaseline: %.2f%% (%.2f%%)' %(result_3.mean()*100, result_3.std()*100))

# Larger Network
print('\nLarger Network \nBaseline: %.2f%% (%.2f%%)' %(result_4.mean()*100, result_4.std()*100))

Standardized dataset 
Baseline: 77.86% (9.25%)

Improving Performance With Data Preparation 
Baseline: 85.67% (22.61%)

smaller model 
Baseline: 83.64% (6.35%)

Larger Network 
Baseline: 84.07% (5.58%)


## Implementing Dropout

In [12]:
# baseline
def create_baseline():
    # Create Model
    model = Sequential()
    model.add(Dense(60, input_dim=X.shape[1], kernel_initializer='normal', activation='relu'))
    model.add(Dense(30, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1,kernel_initializer='normal', activation='sigmoid'))
    
    # compile model
    sgd = SGD(lr=0.01, momentum=0.8, decay=0.0, nesterov=False)
    model.compile(loss='binary_crossentropy', optimiser=sgd, metrics=['accuracy'])
    return model

np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimator)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
result_5 = cross_val_score(pipeline, X,encoded_y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" %(result_5.mean()*100, result_5.std()*100))
md('Running the example for the baseline model without drop-out generates an estimated classification accuracy of {}%'.format(round(result_5.mean()*100,2)))

  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)


Baseline: 83.62% (6.35%)


Running the example for the baseline model without drop-out generates an estimated classification accuracy of 83.62%

### Using Dropout on the Visible Layer

In [13]:
# dropout in the input layer with weight constraint
def create_model_il():
    #create model
    model = Sequential()
    model.add(Dropout(0.2, input_shape=(60,)))
    model.add(Dense(60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
    model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    #compile model
    sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

In [14]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model_il, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
result_6 = cross_val_score(pipeline, X,encoded_y, cv=kfold)
print('Visible: %.2f%% (%.2f%%)'%(result_6.mean()*100, result_6.std()*100))
md('Running the example with dropout in the visible layer provides a small lift in classification accuracy to {}%'.format(round(result_6.mean()*100,2)))

  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)


Visible: 82.67% (6.90%)


Running the example with dropout in the visible layer provides a small lift in classification accuracy to 82.67%

### Using Dropout on Hidden Layers

In [15]:
# dropout in hidden layers with weight constraint
def create_model_hl():
    #create model
    model = Sequential()
    model.add(Dense(60, input_dim=X.shape[1], kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(1, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
    
    # compile model
    sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

In [16]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model_hl, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
result_7 = cross_val_score(pipeline, X,encoded_y, cv=kfold)
print('Hidden: %.2f%% (%.2f%%)' %(result_7.mean()*100, result_7.std()*100))

  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(name, **kwargs)


Hidden: 47.10% (2.37%)


In [17]:
# baseline
print('Standardized dataset \nBaseline: %.2f%% (%.2f%%)' %(result_5.mean()*100, result_5.std()*100))

# dropout in the input layer with weight constraint
print('\nImproving Performance With Data Preparation \nBaseline: %.2f%% (%.2f%%)' %(result_6.mean()*100, result_6.std()*100))

# dropout in the input layer with weight constraint
print('\nsmaller model \nBaseline: %.2f%% (%.2f%%)' %(result_7.mean()*100, result_7.std()*100))

Standardized dataset 
Baseline: 83.62% (6.35%)

Improving Performance With Data Preparation 
Baseline: 82.67% (6.90%)

smaller model 
Baseline: 47.10% (2.37%)


for this problem and for the chosen network configuration that using dropout
in the hidden layers did not lift performance. In fact, performance was worse than the baseline.
It is possible that additional training epochs are required or that further tuning is required to
the learning rate.