In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import warnings 
warnings.filterwarnings('ignore')

# LOAD CSV FILE AND PRINT 5 SAMPLES

In [4]:
df = pd.read_csv('./sonar_dataset.csv', header=None)
df.sample(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
49,0.0119,0.0582,0.0623,0.06,0.1397,0.1883,0.1422,0.1447,0.0487,0.0864,...,0.0025,0.0103,0.0074,0.0123,0.0069,0.0076,0.0073,0.003,0.0138,R
195,0.0129,0.0141,0.0309,0.0375,0.0767,0.0787,0.0662,0.1108,0.1777,0.2245,...,0.0124,0.0093,0.0072,0.0019,0.0027,0.0054,0.0017,0.0024,0.0029,M
162,0.0217,0.0152,0.0346,0.0346,0.0484,0.0526,0.0773,0.0862,0.1451,0.211,...,0.0123,0.0067,0.0011,0.0026,0.0049,0.0029,0.0022,0.0022,0.0032,M
193,0.0203,0.0121,0.038,0.0128,0.0537,0.0874,0.1021,0.0852,0.1136,0.1747,...,0.0134,0.0094,0.0047,0.0045,0.0042,0.0028,0.0036,0.0013,0.0016,M
70,0.0065,0.0122,0.0068,0.0108,0.0217,0.0284,0.0527,0.0575,0.1054,0.1109,...,0.0069,0.0025,0.0027,0.0052,0.0036,0.0026,0.0036,0.0006,0.0035,R


In [5]:
df.shape

(208, 61)

# Check if any cell is containing null

In [7]:
df.isna().sum()

0     0
1     0
2     0
3     0
4     0
     ..
56    0
57    0
58    0
59    0
60    0
Length: 61, dtype: int64

# print column names

In [9]:
df.columns

Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
       36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
       54, 55, 56, 57, 58, 59, 60],
      dtype='int64')

# Get the last column 60 which is the result and check how many are M and R

In [36]:
df[60].value_counts()

dtype('O')

# check data types for all the columns

In [37]:
df.dtypes

0     float64
1     float64
2     float64
3     float64
4     float64
       ...   
56    float64
57    float64
58    float64
59    float64
60     object
Length: 61, dtype: object

# Split the data as X (data) and Y (Results)

In [20]:
x = df.drop(60, axis='columns')
y = df[60]

# y have M and R we need to do One hot encoding where with only one column and make R=1 and M=0

In [21]:
y = pd.get_dummies(y, drop_first=True) # drop_first drop first column

In [39]:
y.sample(5) # R values are True and False instead of 0 and 1

Unnamed: 0,R
172,False
46,True
105,False
88,True
96,True


# Change y data type from boolean to numeric which will change True =1 and False = 0 in y

In [41]:
y.dtypes

R    bool
dtype: object

In [45]:
y = y.astype(int)

In [46]:
y.sample(5)

Unnamed: 0,R
12,1
133,0
25,1
205,0
128,0


In [47]:
y.value_counts()

R
0    111
1     97
Name: count, dtype: int64

# Split the data into training and test sets

In [51]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

# Create the model with hidden layes and train model with MINI BATCH

In [58]:
import tensorflow as tf 
from tensorflow import keras 

model = keras.Sequential([
    keras.layers.Dense(60, input_dim=60, activation='relu'),
    keras.layers.Dense(30,  activation='relu'),
    keras.layers.Dense(15,  activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.fit(X_train, y_train, epochs=100, batch_size=8)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x13f87e130>

In [60]:
model.evaluate(X_test, y_test)



[1.01786208152771, 0.738095223903656]

# Predict

In [63]:
y_pred = model.predict(X_test).reshape(-1)
print(y_pred)

[1.1316617e-09 8.8948107e-01 9.9750769e-01 2.5904335e-06 9.9997628e-01
 9.9978840e-01 5.4463267e-01 9.9998665e-01 1.1247299e-06 9.9999315e-01
 1.7978868e-01 2.7596866e-04 2.2031942e-04 3.6505195e-03 1.4605242e-02
 3.3971885e-01 9.9384755e-01 1.9537043e-05 1.0819623e-02 9.9880654e-01
 1.8447749e-07 9.9998683e-01 3.9415634e-03 9.9984276e-01 1.1305119e-04
 1.3015963e-08 3.3630792e-02 2.0902330e-02 9.9995744e-01 4.1276917e-06
 9.2757231e-01 4.8076516e-09 5.4611963e-01 8.6336732e-01 9.9943870e-01
 9.9931669e-01 9.9976867e-01 9.2524797e-02 9.9989849e-01 7.8677615e-05
 2.8180179e-01 8.5964735e-04]


# Roundoff the values

In [64]:
y_pred = np.round(y_pred)
print(y_pred)

[0. 1. 1. 0. 1. 1. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 1.
 0. 0. 0. 0. 1. 0. 1. 0. 1. 1. 1. 1. 1. 0. 1. 0. 0. 0.]


# Compare y_test and y_pred

In [65]:
print(y_pred[:10])

[0. 1. 1. 0. 1. 1. 1. 1. 0. 1.]


In [66]:
print(y_test[:10])

     R
186  0
155  0
165  0
200  0
58   1
34   1
151  0
18   1
202  0
62   1


# Print and check the classification report

In [68]:
from sklearn.metrics import confusion_matrix, classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.70      0.80      0.74        20
           1       0.79      0.68      0.73        22

    accuracy                           0.74        42
   macro avg       0.74      0.74      0.74        42
weighted avg       0.74      0.74      0.74        42



# Test with dropout layer

In [70]:
model = keras.Sequential([
    keras.layers.Dense(60, input_dim=60, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(30,  activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(15,  activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.fit(X_train, y_train, epochs=100, batch_size=8)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x13fc7d3a0>

In [71]:
model.evaluate(X_test, y_test)



[0.5691805481910706, 0.7857142686843872]

In [72]:
y_pred = model.predict(X_test).reshape(-1)
y_pred = np.round(y_pred)
from sklearn.metrics import confusion_matrix, classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.76      0.80      0.78        20
           1       0.81      0.77      0.79        22

    accuracy                           0.79        42
   macro avg       0.79      0.79      0.79        42
weighted avg       0.79      0.79      0.79        42

