### Sonar (sound navigation ranging) is a technique that uses sound propagation(usually underwater, as in submarine navigation) to navigate, communicate with or detect objects on or under the surface of the water, such as other vessels.
### The data set contains the response metrics for 60 separate sonar frequencies sent out against a known mine field (and known rocks). These frequencies are then labeled with the known object they were beaming the sound at (either a rock or a mine).
## Our main goal is to create a machine learning model capable of detecting the difference between a rock or a mine based on the response of the 60 separate sonar frequencies.

In [None]:
#Import all necessary libraries
import numpy as np#import all necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
#Data overview
df=pd.read_csv("../input/sonar-dataset-suitable-for-classification/sonar.all-data.csv")

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

# **EDA**

In [None]:
df["Label"].value_counts(normalize=True)

In [None]:
plt.figure(figsize=(8,5))
sns.countplot(data=df,x="Label")
plt.title("distribution of label")

### As wee see the Lable is balance.alomost 53% of the data is R and 46% of them is M.

# Determine the feature and target lable

In [None]:
X= df.drop('Label', axis=1)
y= df['Label']

# Split the dataset to train and test

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=41)

# scaling the features

In [None]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
scaler.fit(X_train)
scaled_X_train=scaler.transform(X_train)
scaled_X_test=scaler.transform(X_test)

# train the model

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn_model=KNeighborsClassifier(n_neighbors=1)
knn_model.fit(scaled_X_train,y_train)

# Predicting test data

In [None]:
y_pred=knn_model.predict(scaled_X_test)
pd.DataFrame({"y_test":y_test,"y_pred":y_pred})

# Evaluating the model

In [None]:
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

In [None]:
accuracy_score(y_test,y_pred)

In [None]:
confusion_matrix(y_test,y_pred)

In [None]:
print(classification_report(y_test,y_pred))

# Elbow Method For Choosing Optimum Value For K

In [None]:
test_error_rate=[]

for k in range(1,40):
    knn_model=KNeighborsClassifier(n_neighbors=k)
    knn_model.fit(scaled_X_train,y_train)
    y_pred_test=knn_model.predict(scaled_X_test)
    
    test_error=1-accuracy_score(y_test,y_pred_test)
    test_error_rate.append(test_error)
    

In [None]:
test_error_rate

# Choose K Value With Elbow Method

In [None]:
plt.figure(figsize=(10,6))
plt.plot(range(1,40),test_error_rate,label="Test Error")
plt.legend()
plt.ylabel("Error Rate")
plt.xlabel("K Value")

### as we see k=4 has the least error compared to the other k values.

# Creating a pipeline to find optimum k values

In [None]:
scaler=StandardScaler()
knn=KNeighborsClassifier()
operations=[("scaler",scaler),("knn",knn)]

In [None]:
from sklearn.pipeline import Pipeline
pipe=Pipeline(operations)

In [None]:
from sklearn.model_selection import GridSearchCV#use a grid search to find the best k_value
k_values=list(range(1,30))
param_grid={'knn__n_neighbors':k_values}
full_cv_classifier=GridSearchCV(pipe,param_grid,cv=10,scoring='accuracy')

In [None]:
full_cv_classifier.fit(X_train,y_train)

In [None]:
full_cv_classifier.best_estimator_.get_params()

### according to our model the best knn neighbors=1.

# Final Model

In [None]:
scaler=StandardScaler()
knn1=KNeighborsClassifier(n_neighbors=1)
operations=[('scaler',scaler),('knn1',knn1)]
pipe=Pipeline(operations)
pipe.fit(X_train, y_train)
pipe_pred= pipe.predict(X_test)

In [None]:
print(classification_report(y_test, pipe_pred))