## Importing dependencies

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

## Data Collection and processing

In [2]:
sonar_data = pd.read_csv("sonar_data.csv", header = None)

In [3]:
sonar_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


## INFORMATION ABOUR DATASET

We used the data set used by Gorman and Sejnowski in their study of the classification of sonar 
signals using a neural network. The task is to train a network to discriminate between sonar 
signals bounced off a metal cylinder and those bounced off a roughly cylindrical rock.

The data set was contributed to the benchmark collection by Terry Sejnowski, now at the Salk 
Institute and the University of California at San Deigo. The data set was developed in 
collaboration with R. Paul Gorman of Allied-Signal Aerospace Technology Center.

The file “sonar_data.csv” contains 111 patterns obtained by bouncing sonar signals off a metal 
cylinder at various angles and under various conditions and 97 patterns obtained from rocks 
under similar conditions. The transmitted sonar signal is a frequency-modulated chirp, rising 
in frequency. The data set contains signals obtained from a variety of different aspect angles, 
spanning 90 degrees for the cylinder and 180 degrees for the rock.

Each pattern is a set of 60 numbers in the range 0.0 to 1.0. Each number represents the energy 
within a particular frequency band, integrated over a certain period of time. The integration 
aperture for higher frequencies occurs later in time, since these frequencies are transmitted later 
during the chirp.

The label associated with each record contains the letter “R” if the object is a rock and “M” if 
it is a mine (metal cylinder). The numbers in the labels are in increasing order of aspect angle, 
but they do not encode the angle directly

In [4]:
sonar_data.shape

(208, 61)

In [5]:
sonar_data.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
count,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,...,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0
mean,0.029164,0.038437,0.043832,0.053892,0.075202,0.10457,0.121747,0.134799,0.178003,0.208259,...,0.016069,0.01342,0.010709,0.010941,0.00929,0.008222,0.00782,0.007949,0.007941,0.006507
std,0.022991,0.03296,0.038428,0.046528,0.055552,0.059105,0.061788,0.085152,0.118387,0.134416,...,0.012008,0.009634,0.00706,0.007301,0.007088,0.005736,0.005785,0.00647,0.006181,0.005031
min,0.0015,0.0006,0.0015,0.0058,0.0067,0.0102,0.0033,0.0055,0.0075,0.0113,...,0.0,0.0008,0.0005,0.001,0.0006,0.0004,0.0003,0.0003,0.0001,0.0006
25%,0.01335,0.01645,0.01895,0.024375,0.03805,0.067025,0.0809,0.080425,0.097025,0.111275,...,0.008425,0.007275,0.005075,0.005375,0.00415,0.0044,0.0037,0.0036,0.003675,0.0031
50%,0.0228,0.0308,0.0343,0.04405,0.0625,0.09215,0.10695,0.1121,0.15225,0.1824,...,0.0139,0.0114,0.00955,0.0093,0.0075,0.00685,0.00595,0.0058,0.0064,0.0053
75%,0.03555,0.04795,0.05795,0.0645,0.100275,0.134125,0.154,0.1696,0.233425,0.2687,...,0.020825,0.016725,0.0149,0.0145,0.0121,0.010575,0.010425,0.01035,0.010325,0.008525
max,0.1371,0.2339,0.3059,0.4264,0.401,0.3823,0.3729,0.459,0.6828,0.7106,...,0.1004,0.0709,0.039,0.0352,0.0447,0.0394,0.0355,0.044,0.0364,0.0439


In [6]:
sonar_data[60].value_counts()

M    111
R     97
Name: 60, dtype: int64

M = "Mine"

R = "Rock"

In [7]:
sonar_data.groupby(60).mean()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
60,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
M,0.034989,0.045544,0.05072,0.064768,0.086715,0.111864,0.128359,0.149832,0.213492,0.251022,...,0.019352,0.016014,0.011643,0.012185,0.009923,0.008914,0.007825,0.00906,0.008695,0.00693
R,0.022498,0.030303,0.035951,0.041447,0.062028,0.096224,0.11418,0.117596,0.137392,0.159325,...,0.012311,0.010453,0.00964,0.009518,0.008567,0.00743,0.007814,0.006677,0.007078,0.006024


## Preparing the data (Separating Target Variable and Dependant Variables)

In [8]:
X = sonar_data.drop(columns = 60, axis = 1)
y = sonar_data[60]

In [9]:
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0232,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0125,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0033,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0241,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0156,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094


In [10]:
y.head()

0    R
1    R
2    R
3    R
4    R
Name: 60, dtype: object

## Splitting training and test data

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y, random_state=1)

In [12]:
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
115,0.0414,0.0436,0.0447,0.0844,0.0419,0.1215,0.2002,0.1516,0.0818,0.1975,...,0.0222,0.0045,0.0136,0.0113,0.0053,0.0165,0.0141,0.0077,0.0246,0.0198
38,0.0123,0.0022,0.0196,0.0206,0.018,0.0492,0.0033,0.0398,0.0791,0.0475,...,0.0149,0.0125,0.0134,0.0026,0.0038,0.0018,0.0113,0.0058,0.0047,0.0071
56,0.0152,0.0102,0.0113,0.0263,0.0097,0.0391,0.0857,0.0915,0.0949,0.1504,...,0.0048,0.0049,0.0041,0.0036,0.0013,0.0046,0.0037,0.0011,0.0034,0.0033
123,0.027,0.0163,0.0341,0.0247,0.0822,0.1256,0.1323,0.1584,0.2017,0.2122,...,0.0197,0.0189,0.0204,0.0085,0.0043,0.0092,0.0138,0.0094,0.0105,0.0093
18,0.027,0.0092,0.0145,0.0278,0.0412,0.0757,0.1026,0.1138,0.0794,0.152,...,0.0045,0.0084,0.001,0.0018,0.0068,0.0039,0.012,0.0132,0.007,0.0088


In [13]:
X_test.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
113,0.0283,0.0599,0.0656,0.0229,0.0839,0.1673,0.1154,0.1098,0.137,0.1767,...,0.0109,0.0147,0.017,0.0158,0.0046,0.0073,0.0054,0.0033,0.0045,0.0079
23,0.0115,0.015,0.0136,0.0076,0.0211,0.1058,0.1023,0.044,0.0931,0.0734,...,0.0107,0.0091,0.0016,0.0084,0.0064,0.0026,0.0029,0.0037,0.007,0.0041
45,0.0408,0.0653,0.0397,0.0604,0.0496,0.1817,0.1178,0.1024,0.0583,0.2176,...,0.0066,0.0062,0.0129,0.0184,0.0069,0.0198,0.0199,0.0102,0.007,0.0055
81,0.01,0.0194,0.0155,0.0489,0.0839,0.1009,0.1627,0.2071,0.2696,0.299,...,0.0122,0.013,0.0073,0.0077,0.0075,0.006,0.008,0.0019,0.0053,0.0019
82,0.0409,0.0421,0.0573,0.013,0.0183,0.1019,0.1054,0.107,0.2302,0.2259,...,0.0113,0.0028,0.0036,0.0105,0.012,0.0087,0.0061,0.0061,0.003,0.0078


In [14]:
y_train.head()

115    M
38     R
56     R
123    M
18     R
Name: 60, dtype: object

In [15]:
y_test.head()

113    M
23     R
45     R
81     R
82     R
Name: 60, dtype: object

## Model Training -----> Logistic Regression

In [16]:
model = LogisticRegression()

In [17]:
model.fit(X_train, y_train)

## Model Evaluation

In [18]:
# training accuracy
X_train_predict = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_predict, y_train)
training_data_accuracy = float(f"{training_data_accuracy: .2f}")

In [19]:
print("Accuracy on training data: {}%".format(training_data_accuracy * 100))

Accuracy on training data: 83.0%


In [20]:
# test accuracy
X_test_predict = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_predict, y_test)
test_data_accuracy = float(f"{test_data_accuracy: .2f}")

In [21]:
print("Accuracy on test data: {}%".format(test_data_accuracy * 100))

Accuracy on test data: 76.0%


## Making a prediction

In [22]:
input_data = (0.0286,0.0453,0.0277,0.0174,0.0384,0.0990,0.1201,0.1833,0.2105,0.3039,0.2988,0.4250,0.6343,0.8198,1.0000,0.9988,0.9508,0.9025,0.7234,0.5122,0.2074,0.3985,0.5890,0.2872,0.2043,0.5782,0.5389,0.3750,0.3411,0.5067,0.5580,0.4778,0.3299,0.2198,0.1407,0.2856,0.3807,0.4158,0.4054,0.3296,0.2707,0.2650,0.0723,0.1238,0.1192,0.1089,0.0623,0.0494,0.0264,0.0081,0.0104,0.0045,0.0014,0.0038,0.0013,0.0089,0.0057,0.0027,0.0051,0.0062)

# Changing the data into numpy array
input_data_as_numpy_array = np.asarray(input_data)

# Reshape the np array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

prediction = model.predict(input_data_reshaped)

if prediction[0] == "R":
    print("Object is a ROCK")
else:
    print("Object is a MINE")

Object is a ROCK


In [23]:
input_data = (0.0228,0.0106,0.0130,0.0842,0.1117,0.1506,0.1776,0.0997,0.1428,0.2227,0.2621,0.3109,0.2859,0.3316,0.3755,0.4499,0.4765,0.6254,0.7304,0.8702,0.9349,0.9614,0.9126,0.9443,1.0000,0.9455,0.8815,0.7520,0.7068,0.5986,0.3857,0.2510,0.2162,0.0968,0.1323,0.1344,0.2250,0.3244,0.3939,0.3806,0.3258,0.3654,0.2983,0.1779,0.1535,0.1199,0.0959,0.0765,0.0649,0.0313,0.0185,0.0098,0.0178,0.0077,0.0074,0.0095,0.0055,0.0045,0.0063,0.0039)

# Changing the data into numpy array
input_data_as_numpy_array = np.asarray(input_data)

# Reshape the np array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

prediction = model.predict(input_data_reshaped)

if prediction[0] == "R":
    print("Object is a ROCK")
else:
    print("Object is a MINE")

Object is a MINE


In [24]:
random_input = np.random.rand(60)

# Reshape the np array as we are predicting for one instance
random_input = random_input.reshape(1, -1)

prediction = model.predict(random_input)

if prediction[0] == "R":
    print("Object is a ROCK")
else:
    print("Object is a MINE")

Object is a MINE


In [25]:
random_input = np.random.rand(60)
random_input *= 0.3

# Reshape the np array as we are predicting for one instance
random_input = random_input.reshape(1, -1)

prediction = model.predict(random_input)

if prediction[0] == "R":
    print("Object is a ROCK")
else:
    print("Object is a MINE")

Object is a ROCK
