In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow import keras

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv("sonar.csv", header=None)
df.sample(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
155,0.0211,0.0128,0.0015,0.045,0.0711,0.1563,0.1518,0.1206,0.1666,0.1345,...,0.0117,0.0023,0.0047,0.0049,0.0031,0.0024,0.0039,0.0051,0.0015,M
86,0.0188,0.037,0.0953,0.0824,0.0249,0.0488,0.1424,0.1972,0.1873,0.1806,...,0.0093,0.0033,0.0113,0.003,0.0057,0.009,0.0057,0.0068,0.0024,R
138,0.0731,0.1249,0.1665,0.1496,0.1443,0.277,0.2555,0.1712,0.0466,0.1114,...,0.0444,0.023,0.029,0.0141,0.0161,0.0177,0.0194,0.0207,0.0057,M
124,0.0388,0.0324,0.0688,0.0898,0.1267,0.1515,0.2134,0.2613,0.2832,0.2718,...,0.0255,0.0071,0.0263,0.0079,0.0111,0.0107,0.0068,0.0097,0.0067,M
205,0.0522,0.0437,0.018,0.0292,0.0351,0.1171,0.1257,0.1178,0.1258,0.2529,...,0.016,0.0029,0.0051,0.0062,0.0089,0.014,0.0138,0.0077,0.0031,M


In [4]:
df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
count,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,...,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0
mean,0.029164,0.038437,0.043832,0.053892,0.075202,0.10457,0.121747,0.134799,0.178003,0.208259,...,0.016069,0.01342,0.010709,0.010941,0.00929,0.008222,0.00782,0.007949,0.007941,0.006507
std,0.022991,0.03296,0.038428,0.046528,0.055552,0.059105,0.061788,0.085152,0.118387,0.134416,...,0.012008,0.009634,0.00706,0.007301,0.007088,0.005736,0.005785,0.00647,0.006181,0.005031
min,0.0015,0.0006,0.0015,0.0058,0.0067,0.0102,0.0033,0.0055,0.0075,0.0113,...,0.0,0.0008,0.0005,0.001,0.0006,0.0004,0.0003,0.0003,0.0001,0.0006
25%,0.01335,0.01645,0.01895,0.024375,0.03805,0.067025,0.0809,0.080425,0.097025,0.111275,...,0.008425,0.007275,0.005075,0.005375,0.00415,0.0044,0.0037,0.0036,0.003675,0.0031
50%,0.0228,0.0308,0.0343,0.04405,0.0625,0.09215,0.10695,0.1121,0.15225,0.1824,...,0.0139,0.0114,0.00955,0.0093,0.0075,0.00685,0.00595,0.0058,0.0064,0.0053
75%,0.03555,0.04795,0.05795,0.0645,0.100275,0.134125,0.154,0.1696,0.233425,0.2687,...,0.020825,0.016725,0.0149,0.0145,0.0121,0.010575,0.010425,0.01035,0.010325,0.008525
max,0.1371,0.2339,0.3059,0.4264,0.401,0.3823,0.3729,0.459,0.6828,0.7106,...,0.1004,0.0709,0.039,0.0352,0.0447,0.0394,0.0355,0.044,0.0364,0.0439


In [5]:
df[60].value_counts()  # to check if binary or multi-class

60
M    111
R     97
Name: count, dtype: int64

In [6]:
X = df.drop(60, axis='columns').values
Y = df[60].values
X.shape, Y.shape

((208, 60), (208,))

In [7]:
Y_dummies = pd.get_dummies(Y, drop_first=True, dtype='int16')
print(Y_dummies.shape)
Y_dummies = Y_dummies.values.reshape(-1)
Y_dummies.shape

(208, 1)


(208,)

In [8]:
Y_dummies[:5], Y[:5]  # 1 is R, 0 is M

(array([1, 1, 1, 1, 1], dtype=int16),
 array(['R', 'R', 'R', 'R', 'R'], dtype=object))

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, Y_dummies, train_size=0.8, random_state=5)

In [10]:
values, counts = np.unique(y_train, return_counts=True)  # to ensure that we train a balanced training set
values, counts

(array([0, 1], dtype=int16), array([84, 82], dtype=int64))

In [11]:
y_train.shape, y_test.shape

((166,), (42,))

# Without Dropout

In [12]:
model_1 = keras.Sequential([
    keras.layers.Input(shape=(60, )),             
    keras.layers.Dense(45, activation='relu'),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

model_1.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model_1.fit(X_train, y_train, epochs=100)

Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.5163 - loss: 0.6871
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5909 - loss: 0.6696 
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6415 - loss: 0.6572 
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6714 - loss: 0.6413 
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6682 - loss: 0.6375 
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7853 - loss: 0.6133 
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7550 - loss: 0.6042 
Epoch 8/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7412 - loss: 0.5970 
Epoch 9/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

<keras.src.callbacks.history.History at 0x28eb2c66c90>

In [13]:
model_1.evaluate(X_test, y_test)  # training set has accuracy: 0.9579 - loss: 0.1609  

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8264 - loss: 0.3110  


[0.3136732578277588, 0.8333333134651184]

### accuracy of testing set is too low ==> OVERFITTING

---

# Using Dropout Regulalization

In [17]:
model_2 = keras.Sequential([
    keras.layers.Input(shape=(60, )),             
    keras.layers.Dense(45, activation='relu'),  # hidden layer 1
    keras.layers.Dropout(0.5),  # Drop 50% of neurons in hidden layer 1
    keras.layers.Dense(30, activation='relu'),  # hidden layer 2
    keras.layers.Dropout(0.2),  # Drop 20% of neurons in hidden layer 2
    keras.layers.Dense(1, activation='sigmoid'),
])

model_2.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics=['accuracy'])

model_2.fit(X_train, y_train, epochs=100)

Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.4646 - loss: 0.7940
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5353 - loss: 0.7082 
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5216 - loss: 0.7142 
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5669 - loss: 0.6819 
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5215 - loss: 0.6943 
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5056 - loss: 0.7157 
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5894 - loss: 0.6875 
Epoch 8/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4631 - loss: 0.7063 
Epoch 9/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

<keras.src.callbacks.history.History at 0x28eb8bb76d0>

In [18]:
model_2.evaluate(X_test, y_test)   # accuracy: 0.8775 - loss: 0.3353 

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8472 - loss: 0.3269  


[0.3261261582374573, 0.8333333134651184]

### accuracy of testing set is better ==> LESS / NO OVERFITTING