# Dropout Regularization Sonar Project for the Navy: 
**Mines vs. Rocks**


# Step : Dropout Regularization in Keras

In [1]:
# Baseline Model on the Sonar Dataset
import tensorflow as tf
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
from keras.optimizers import SGD
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


Using TensorFlow backend.


In [0]:
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

**Loading Sonar Dataset**

In [5]:
#Load sonar datset

device_name = tf.test.gpu_device_name()
from google.colab import files
uploaded = files.upload()
import io
dataframe = pd.read_csv('sonar.csv', header=None)
dataset = dataframe.values

Saving sonar.csv to sonar.csv


**Splitting Data**

In [0]:
# split into input (X) and output (Y) variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]

**Encoding Class Values As Integers**

In [0]:
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)


**Creating Baseline Model**

In [0]:
def create_baseline():
  
	# create model
	model = Sequential()
	model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
	model.add(Dense(30, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	
  # Compile model
	sgd = SGD(lr=0.01, momentum=0.8, decay=0.0, nesterov=False)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
	return model


In [10]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Baseline: 82.61% (7.67%)


# Step : Using Dropout on the Visible Layer
**Dropout can be applied to input neurons called the visible layer.**

In [0]:
# dropout in the input layer with weight constraint

def create_model():
  
    # create model
    model = Sequential()
    model.add(Dropout(0.2, input_shape=(60,)))
    model.add(Dense(60, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dense(30, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    # Compile model
    sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

In [14]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  


Visible: 86.00% (5.60%)


# Step : Trying to Improve Performance
**It is possible that additional training epochs are required or that further tuning is required to the learning rate.**

In [0]:
# dropout in the input layer with weight constraint

def create_model():
  
    # create model
    model = Sequential()
    model.add(Dropout(0.02, input_shape=(60,)))
    model.add(Dense(60, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dense(30, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    # Compile model
    sgd = SGD(lr=0.2, momentum=0.9, decay=0.0, nesterov=False)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

In [16]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=400, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  


Visible: 58.62% (12.68%)


# Step : Using Dropout on Hidden Layers

**Dropout can be applied to hidden neurons in the body of your network model.
In the code below Dropout will be applied between the two hidden layers and between the last hidden layer and the output layer. Again a dropout rate of 20% is used as is a weight constraint on those layers.**

In [0]:
# dropout in hidden layers with weight constraint

def create_model():
  
    # create model
    model = Sequential()
    model.add(Dense(60, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(30, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    # Compile model
    sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

In [18]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  


Visible: 84.52% (8.52%)


# Step : Tips For Using Dropout

** Try Different Dropout values**

In [0]:
def create_model():
  
    # create model
    model = Sequential()
    model.add(Dropout(0.5, input_shape=(60,)))
    model.add(Dense(60, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dense(30, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    # Compile model
    sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

In [20]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys


Visible: 51.43% (3.30%)


**Try using a Larger network**

In [0]:
def create_model():
  
    # create model
    model = Sequential()
    model.add(Dropout(0.5, input_shape=(60,)))
    model.add(Dense(60, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dense(30, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dense(10, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    # Compile model
    sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

In [23]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

  
  import sys
  
  
  import sys
  
  
  import sys
  
  
  import sys
  
  
  import sys
  
  
  import sys
  
  
  import sys
  
  
  import sys
  
  
  import sys
  
  
  import sys
  


Visible: 68.20% (14.32%)


**Try using Dropout on both visible and hidden units**

In [0]:
def create_model():
  
    # create model
    model = Sequential()
    model.add(Dropout(0.5, input_shape=(60,)))
    model.add(Dense(60, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dropout(0.5))
    model.add(Dense(30, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    # Compile model
    sgd = SGD(lr=0.2, momentum=0.9, decay=0.0, nesterov=False)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

In [31]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  


Visible: 53.38% (1.23%)


**Try using large learning rate with decay and larger momentum**

In [0]:
def create_model():
  
    # create model
    model = Sequential()
    model.add(Dropout(0.5, input_shape=(60,)))
    model.add(Dense(60, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dense(30, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(3)))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    # Compile model
    sgd = SGD(lr=0.4, momentum=0.99, decay=0.0, nesterov=False)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

In [29]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys


Visible: 52.38% (2.70%)


**Try constraining the size of the network weights**

In [0]:
def create_model():
  
    # create model
    model = Sequential()
    model.add(Dropout(0.5, input_shape=(60,)))
    model.add(Dense(60, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(5)))
    model.add(Dense(30, kernel_initializer='normal', activation='relu', W_constraint=maxnorm(5)))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    # Compile model
    sgd = SGD(lr=0.5, momentum=0.99, decay=0.0, nesterov=False)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

In [33]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys
  
  import sys


Visible: 49.95% (3.60%)
