# Dropout Regularization In Deep Neural Network

---

This is a dataset that describes sonar chirp returns bouncing off differnt angles. It is a binary classification problem that requires a model to differentiate rocks from metal cylinders.

Dataset information: https://archive.ics.uci.edu/ml/datasets/Connectionist+Bench+(Sonar,+Mines+vs.+Rocks)

Download it from here: https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data



In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

# Load Dataset

In [46]:
df = pd.read_csv('/content/sonar_dataset.csv', header = None)
df.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [47]:
df.shape

(208, 61)

In [48]:
# Check for nan values
df.isna().sum()

0     0
1     0
2     0
3     0
4     0
     ..
56    0
57    0
58    0
59    0
60    0
Length: 61, dtype: int64

In [49]:
df.columns

Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
       36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
       54, 55, 56, 57, 58, 59, 60],
      dtype='int64')

In [50]:
# label is not skewed
df[60].value_counts()

60
M    111
R     97
Name: count, dtype: int64

In [51]:
X = df.drop(60, axis = 1)
y = df[60]

y.head()

0    R
1    R
2    R
3    R
4    R
Name: 60, dtype: object

In [52]:
y.replace({'R': 1, 'M': 0}, inplace = True)

In [53]:
y.value_counts()

60
0    111
1     97
Name: count, dtype: int64

In [54]:
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0232,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0125,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0033,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0241,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0156,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094


In [55]:
X.columns

Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
       36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
       54, 55, 56, 57, 58, 59],
      dtype='int64')

# Split Dataset Train and Test

In [56]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=142)

In [57]:
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
138,0.0731,0.1249,0.1665,0.1496,0.1443,0.277,0.2555,0.1712,0.0466,0.1114,...,0.0361,0.0444,0.023,0.029,0.0141,0.0161,0.0177,0.0194,0.0207,0.0057
158,0.0107,0.0453,0.0289,0.0713,0.1075,0.1019,0.1606,0.2119,0.3061,0.2936,...,0.0079,0.0164,0.012,0.0113,0.0021,0.0097,0.0072,0.006,0.0017,0.0036
72,0.0208,0.0186,0.0131,0.0211,0.061,0.0613,0.0612,0.0506,0.0989,0.1093,...,0.014,0.0074,0.0063,0.0081,0.0087,0.0044,0.0028,0.0019,0.0049,0.0023
90,0.0126,0.0519,0.0621,0.0518,0.1072,0.2587,0.2304,0.2067,0.3416,0.4284,...,0.0027,0.0208,0.0048,0.0199,0.0126,0.0022,0.0037,0.0034,0.0114,0.0077
71,0.0036,0.0078,0.0092,0.0387,0.053,0.1197,0.1243,0.1026,0.1239,0.0888,...,0.0079,0.0119,0.0055,0.0035,0.0036,0.0004,0.0018,0.0049,0.0024,0.0016


# Using Deep Learning Model



## Model no Dropout layers

In [58]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(60, input_dim= 60, activation= 'relu'),
    tf.keras.layers.Dense(30, activation= 'relu'),
    tf.keras.layers.Dense(15, activation= 'relu'),
    tf.keras.layers.Dense(1, activation= 'sigmoid')
])


model.compile(
  loss = 'binary_crossentropy',
  optimizer = 'adam',
  metrics = ['accuracy']
)

model.fit(X_train, y_train, epochs= 100, batch_size= 8)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7c8958c12410>

In [59]:
model.evaluate(X_test,y_test)



[0.22792427241802216, 0.8809523582458496]

In [60]:
y_preds = model.predict(X_test).reshape(-1)
print(y_preds[:10])

y_preds = np.round(y_preds)
print(y_preds[:10])

[4.2409450e-03 8.8994122e-01 3.4303320e-04 1.0000000e+00 9.9999994e-01
 1.0472365e-03 1.0000000e+00 3.7838137e-01 1.0550822e-04 7.9872566e-01]
[0. 1. 0. 1. 1. 0. 1. 0. 0. 1.]


In [61]:
y_test[:10]

136    0
1      1
192    0
62     1
10     1
124    0
42     1
27     1
144    0
109    0
Name: 60, dtype: int64

In [62]:
from sklearn.metrics import confusion_matrix, classification_report

print(classification_report(y_test, y_preds))

              precision    recall  f1-score   support

           0       0.91      0.88      0.89        24
           1       0.84      0.89      0.86        18

    accuracy                           0.88        42
   macro avg       0.88      0.88      0.88        42
weighted avg       0.88      0.88      0.88        42




## Model without Dropout Layers

In [75]:
model_dropout = tf.keras.Sequential([
    tf.keras.layers.Dense(60, input_dim= 60, activation= 'relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(30, activation= 'relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(15, activation= 'relu'),
    tf.keras.layers.Dropout(0.05),
    tf.keras.layers.Dense(1, activation= 'sigmoid')
])

model_dropout.compile(
  loss= 'binary_crossentropy',
  optimizer= 'adam',
  metrics= ['accuracy']
)

model_dropout.fit(X_train, y_train, epochs= 100, batch_size= 8)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7c894db8d330>

In [76]:
model_dropout.evaluate(X_test, y_test)





[0.2644554674625397, 0.9047619104385376]

In [78]:
y_pred = model_dropout.predict(X_test).reshape(-1)
print(y_pred[:10])

y_pred = np.round(y_pred)
print(y_pred[:10])

[0.0097655  0.7940934  0.0557607  0.99990654 0.99875486 0.11029582
 0.99998003 0.9730384  0.0141789  0.796411  ]
[0. 1. 0. 1. 1. 0. 1. 1. 0. 1.]


In [79]:
y_test[:10]

136    0
1      1
192    0
62     1
10     1
124    0
42     1
27     1
144    0
109    0
Name: 60, dtype: int64

In [80]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.95      0.88      0.91        24
           1       0.85      0.94      0.89        18

    accuracy                           0.90        42
   macro avg       0.90      0.91      0.90        42
weighted avg       0.91      0.90      0.91        42



**You can see that by using dopout layers test accuracy increased from  accuracy= 0.88 to 0.90**