#### Workflow

![image.png](attachment:image.png)

#### Import the packages

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import precision_score,recall_score
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier

#### Data collection and Data Processing

In [2]:
## Dataset link https://www.kaggle.com/mattcarter865/mines-vs-rocks?select=sonar.all-data.csv

#loading the data set to pandas dataframe
sonar_data=pd.read_csv('sonar.all-data.csv',header=None)

In [3]:
sonar_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [4]:
sonar_data.tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
203,0.0187,0.0346,0.0168,0.0177,0.0393,0.163,0.2028,0.1694,0.2328,0.2684,...,0.0116,0.0098,0.0199,0.0033,0.0101,0.0065,0.0115,0.0193,0.0157,M
204,0.0323,0.0101,0.0298,0.0564,0.076,0.0958,0.099,0.1018,0.103,0.2154,...,0.0061,0.0093,0.0135,0.0063,0.0063,0.0034,0.0032,0.0062,0.0067,M
205,0.0522,0.0437,0.018,0.0292,0.0351,0.1171,0.1257,0.1178,0.1258,0.2529,...,0.016,0.0029,0.0051,0.0062,0.0089,0.014,0.0138,0.0077,0.0031,M
206,0.0303,0.0353,0.049,0.0608,0.0167,0.1354,0.1465,0.1123,0.1945,0.2354,...,0.0086,0.0046,0.0126,0.0036,0.0035,0.0034,0.0079,0.0036,0.0048,M
207,0.026,0.0363,0.0136,0.0272,0.0214,0.0338,0.0655,0.14,0.1843,0.2354,...,0.0146,0.0129,0.0047,0.0039,0.0061,0.004,0.0036,0.0061,0.0115,M


In [5]:
#number of rows and columns 
sonar_data.shape

(208, 61)

In [6]:
#statsitical definition of the data > describe()
#it skips the metrics for last column which is categorical column
sonar_data.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
count,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,...,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0
mean,0.029164,0.038437,0.043832,0.053892,0.075202,0.10457,0.121747,0.134799,0.178003,0.208259,...,0.016069,0.01342,0.010709,0.010941,0.00929,0.008222,0.00782,0.007949,0.007941,0.006507
std,0.022991,0.03296,0.038428,0.046528,0.055552,0.059105,0.061788,0.085152,0.118387,0.134416,...,0.012008,0.009634,0.00706,0.007301,0.007088,0.005736,0.005785,0.00647,0.006181,0.005031
min,0.0015,0.0006,0.0015,0.0058,0.0067,0.0102,0.0033,0.0055,0.0075,0.0113,...,0.0,0.0008,0.0005,0.001,0.0006,0.0004,0.0003,0.0003,0.0001,0.0006
25%,0.01335,0.01645,0.01895,0.024375,0.03805,0.067025,0.0809,0.080425,0.097025,0.111275,...,0.008425,0.007275,0.005075,0.005375,0.00415,0.0044,0.0037,0.0036,0.003675,0.0031
50%,0.0228,0.0308,0.0343,0.04405,0.0625,0.09215,0.10695,0.1121,0.15225,0.1824,...,0.0139,0.0114,0.00955,0.0093,0.0075,0.00685,0.00595,0.0058,0.0064,0.0053
75%,0.03555,0.04795,0.05795,0.0645,0.100275,0.134125,0.154,0.1696,0.233425,0.2687,...,0.020825,0.016725,0.0149,0.0145,0.0121,0.010575,0.010425,0.01035,0.010325,0.008525
max,0.1371,0.2339,0.3059,0.4264,0.401,0.3823,0.3729,0.459,0.6828,0.7106,...,0.1004,0.0709,0.039,0.0352,0.0447,0.0394,0.0355,0.044,0.0364,0.0439


In [7]:
#checking the categorical dsitrbution 
#M-> MinE
#R-> Rock
sonar_data[60].value_counts()

M    111
R     97
Name: 60, dtype: int64

In [8]:
sonar_data.groupby(60).mean()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
60,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
M,0.034989,0.045544,0.05072,0.064768,0.086715,0.111864,0.128359,0.149832,0.213492,0.251022,...,0.019352,0.016014,0.011643,0.012185,0.009923,0.008914,0.007825,0.00906,0.008695,0.00693
R,0.022498,0.030303,0.035951,0.041447,0.062028,0.096224,0.11418,0.117596,0.137392,0.159325,...,0.012311,0.010453,0.00964,0.009518,0.008567,0.00743,0.007814,0.006677,0.007078,0.006024


In [9]:
#separating data and label 
X=sonar_data.drop(columns=60,axis=1)
Y=sonar_data[60]

In [10]:
X.shape

(208, 60)

In [11]:
Y.shape

(208,)

#### Splitting into train and test data

In [12]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.1,stratify=Y,random_state=1)

In [13]:
print(X.shape,X_train.shape,X_test.shape)

(208, 60) (187, 60) (21, 60)


#### Label Encoding for target variables

In [14]:
labelEncoder=LabelEncoder()

In [15]:
Y_test =LabelEncoder().fit_transform(Y_test)

In [16]:
Y_train =LabelEncoder().fit_transform(Y_train)

#### Model training > Logistic Regression

In [17]:
model = LogisticRegression()

In [18]:
#training the logistic regression model with training data 
model.fit(X_train,Y_train)

LogisticRegression()

##### Model Evaluation 

In [19]:
#accruacy on the training data 
X_train_prediction=model.predict(X_train)
training_Data_accuracy=accuracy_score(X_train_prediction,Y_train)


In [20]:
print('Accuracy on the training data:',training_Data_accuracy)

Accuracy on the training data: 0.8342245989304813


In [21]:
#accuracy on the test data 

X_test_prediction=model.predict(X_test)
test_Data_accuracy=accuracy_score(X_test_prediction,Y_test)

test_Data_precision=precision_score(X_test_prediction,Y_test)

test_Data_recall=recall_score(X_test_prediction,Y_test)

In [22]:
print('Accuracy on the test data:',test_Data_accuracy)
print('Precision on the test data:',test_Data_precision)
print('Recall on the test data:',test_Data_recall)

Accuracy on the test data: 0.7619047619047619
Precision on the test data: 0.7
Recall on the test data: 0.7777777777777778


In [23]:
confusion_matrix(X_test_prediction,Y_test)

array([[9, 3],
       [2, 7]], dtype=int64)

#### Making predictive system


In [24]:
input_data=(0.0079,0.0086,0.0055,0.0250,0.0344,0.0546,0.0528,0.0958,0.1009,0.1240,0.1097,0.1215,0.1874,0.3383,0.3227,0.2723,0.3943,0.6432,0.7271,0.8673,0.9674,0.9847,0.9480,0.8036,0.6833,0.5136,0.3090,0.0832,0.4019,0.2344,0.1905,0.1235,0.1717,0.2351,0.2489,0.3649,0.3382,0.1589,0.0989,0.1089,0.1043,0.0839,0.1391,0.0819,0.0678,0.0663,0.1202,0.0692,0.0152,0.0266,0.0174,0.0176,0.0127,0.0088,0.0098,0.0019,0.0059,0.0058,0.0059,0.0032)
##changing the input data to numpy array
input_data_as_numpy=np.asarray(input_data)

# rehsape np array as we are predicting for the one instance 
input_data_reshape=input_data_as_numpy.reshape(1,-1)
prediction=model.predict(input_data_reshape)
if(prediction[0]==0):
    print("The object is mine")
else :
    print("The object is rock")

The object is rock
