# Dropout Regularization 
* A simple and powerful regularization for neural networks and deep learning models is **dropout**.
* Dropout is a technique where randomly selected neurons are ignored during training.
* This means that their contribution to the activation of downstream neurons is temporarily removed on the forward pass and any weight updates are not applied to the neuron on the backward pass.

## 1. Dropout Regularization in Keras
* Dropout is only used during the training of a model and is not used when evaluating the skill of the model.

## 1.1 Using Dropout on the Visible Layer
* The dropout rate is set to 20% meaning one in five inputs will be randomly excluded from each update cycle.
* A constraint is imposed on the weights for each hidden layer,ensuring that the maximum norm of the weights does not exceed a value of 3.

In [16]:
# Example of Dropout on the Sonar Dataset: Visible Layer
#import tensorflow 
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
#from keras.constraints import W_constraint
#from tensorflow.contrib.keras.python.keras.constraints import W_constraint
from keras.optimizers import SGD
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

# loading dataset
dataframe = pd.read_csv("sonar.csv",header=None)
dataset = dataframe.values

# split into input(X) and output(Y) variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]

# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

# dropout in the input layer with weight constraint
def create_model():
    model = Sequential()
    model.add(Dropout(0.2,input_shape=(60,0)))
    model.add(Dense(60,activation='relu',W_constraint=maxnorm(3)))
    model.add(Dense(30,activation='relu',W_constraint=maxnorm(3)))
    model.add(Dense(1,activation='sigmoid'))
    
    # compile model
    sgd = SGD(lr=0.1,momentum=0.9,decay=0.0,nesterov=False)
    model.compile(loss='binary_crossentropy',optimizer=sgd,metrics=['accuracy'])
    return model

np.random.seed(seed)
estimators = []
estimators.append(('standardize',StandardScaler()))
estimators.append(('mlp',KerasClassifier(build_fn=create_model,epochs=300,batch_size=16,verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10,shuffle=True,random_state=seed)
results = cross_val_score(pipeline,X,encoded_Y,cv=kfold)
print("Visible: %.2f%% (%.2f%%)" %(results.mean()*100,results.std()*100))

Visible: nan% (nan%)


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 346, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/home/kuluruvineeth/.local/lib/python3.7/site-packages/tensorflow/python/keras/wrappers/scikit_learn.py", line 223, in fit
    return super(KerasClassifier, self).fit(x, y, **kwargs)
  File "/home/kuluruvineeth/.local/lib/python3.7/site-packages/tensorflow/python/keras/wrappers/scikit_learn.py", line 157, in fit
    self.model = self.build_fn(**self.filter_sk_params(self.build_fn))
  File "<ipython-input-16-c7b1dd241f6e>", line 40, in create_model
    model.add(Dense(60,activation='relu',W_constraint=maxnorm(3)))
  File "/home/kuluruvineeth/.local/lib/python3.7/site-packages/tensorflow/python/keras/layers/core.py", line 1158, in __init_

## 1.2 Using Dropout on Hidden Layers


In [14]:
# Example of Dropout on the Sonar Dataset: Hidden Layer
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
from keras.optimizers import SGD
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataframe = pandas.read_csv("sonar.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# dropout in hidden layers with weight constraint
def create_model():
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=60, activation='relu' ,W_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(30, activation='relu' , W_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid' ))
    # Compile model
    sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
    model.compile(loss='binary_crossentropy' , optimizer=sgd, metrics=[ 'accuracy' ])
    return model
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize' , StandardScaler()))
estimators.append(('mlp' , KerasClassifier(build_fn=create_model, nb_epoch=300,
batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Hidden: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Hidden: nan% (nan%)


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py", line 346, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/home/kuluruvineeth/.local/lib/python3.7/site-packages/tensorflow/python/keras/wrappers/scikit_learn.py", line 223, in fit
    return super(KerasClassifier, self).fit(x, y, **kwargs)
  File "/home/kuluruvineeth/.local/lib/python3.7/site-packages/tensorflow/python/keras/wrappers/scikit_learn.py", line 157, in fit
    self.model = self.build_fn(**self.filter_sk_params(self.build_fn))
  File "<ipython-input-14-422cfd35446f>", line 32, in create_model
    model.add(Dense(60, input_dim=60, activation='relu' ,W_constraint=maxnorm(3)))
  File "/home/kuluruvineeth/.local/lib/python3.7/site-packages/tensorflow/python/keras/layers/core.py", line 

# Summary
* Use dropout on larger network.
* Use dropout on input as well as hidden layers.
* Use a large learning rate with decay and a large momentum.
* Constrain the size of the network weights.

* Finally discovered the dropout regularization technique for deep learning models.