# Importing the libraries

In [42]:
import pandas as pd
from sklearn.impute import SimpleImputer as Imputer

# Loading the train and test datasets

In [43]:
train_dataset = pd.read_csv('train.csv')
train_dataset

Unnamed: 0,x,y,z,class
0,8.599291,9.729418,6.432371,1
1,6.592955,0.082556,1.969544,1
2,5.596471,9.815682,0.027295,1
3,2.743639,8.783177,4.041946,0
4,4.458362,5.750222,0.099070,0
...,...,...,...,...
995,4.617314,7.700236,5.907128,0
996,5.453472,1.798360,1.992616,0
997,2.553853,8.122934,3.970146,0
998,3.210456,3.342092,7.831479,0


In [44]:
test_dataset = pd.read_csv('test.csv')
test_dataset.drop('ID',inplace = True,axis = 1)
test_dataset

Unnamed: 0,x,y,z,actual-class
0,8.074807,5.988044,3.844979,1
1,4.952249,5.823205,1.612045,0
2,4.773178,0.078757,4.209442,0
3,9.845919,2.055448,3.525702,1
4,1.612492,1.320515,8.200455,0
5,7.987555,9.188111,7.222228,1
6,0.311558,3.97468,7.897371,0
7,1.219113,0.266045,2.741136,0
8,0.63634,1.831257,6.767459,0
9,0.890168,8.613714,2.884227,0


# Making the train and test split from the datasets

In [45]:
X_train = train_dataset.iloc[:, 0:3].values
y_train = train_dataset.iloc[:, 3].values


In [46]:
X_test = test_dataset.iloc[:, 0:3].values
y_test = test_dataset.iloc[:, 3].values


# Implementing KNN using manhattan as the distance measure

In [47]:
from sklearn.neighbors import KNeighborsClassifier
classifier_manhattan_distance = KNeighborsClassifier(n_neighbors=3,weights = 'distance',metric = 'manhattan')
classifier_manhattan_distance.fit(X_train,y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='manhattan',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='distance')

# Predicting the labels of the class using testing dataset

In [48]:
y_pred_manhattan_distance = classifier_manhattan_distance.predict(X_test)
y_pred_manhattan_distance

array([1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0])

# Constructing the confusion matrix and calculating acuracy, precision,recall and f1 score

In [56]:
from sklearn import metrics
print("Metrics for Manhattan Distance\n")
print("Confusion Matrix :\n",metrics.confusion_matrix(y_test, y_pred_manhattan_distance),"\n")
print("Accuracy : ", (metrics.accuracy_score(y_test, y_pred_manhattan_distance)*100),"%\n")
print("Precision : ",(metrics.precision_score(y_test, y_pred_manhattan_distance)*100),"%\n")
print("F Score : ",(metrics.f1_score(y_test, y_pred_manhattan_distance)*100),"%\n")
print("recall : ", (metrics.recall_score(y_test, y_pred_euclidean)*100),"%\n")

Metrics for Manhattan Distance

Confusion Matrix :
 [[14  0]
 [ 0  6]] 

Accuracy :  100.0 %

Precision :  100.0 %

F Score :  100.0 %

recall :  100.0 %



# Probability estimates for the final decision

In [33]:
classifier_manhattan_distance.predict_proba(X_test)

array([[0.        , 1.        ],
       [0.57249171, 0.42750829],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.59791742, 0.40208258],
       [0.        , 1.        ],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ]])

# Implementing KNN using euclidean as the distance measure

In [34]:
from sklearn.neighbors import KNeighborsClassifier
classifier_euclidean_distance = KNeighborsClassifier(n_neighbors=3,weights = 'distance',metric = 'euclidean',p = '2')
classifier_euclidean_distance.fit(X_train,y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',
                     metric_params=None, n_jobs=None, n_neighbors=3, p='2',
                     weights='distance')

#Predicting the labels of the class using testing dataset

In [35]:
y_pred_euclidean = classifier_euclidean_distance.predict(X_test)
y_pred_euclidean

array([1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0])

#Constructing the confusion matrix and calculating acuracy, precision,recall and f1 score

In [55]:
from sklearn import metrics
print("Metrics for Manhattan Distance\n")
print("Confusion Matrix :\n",metrics.confusion_matrix(y_test, y_pred_euclidean),"\n")
print("Accuracy : ", (metrics.accuracy_score(y_test, y_pred_euclidean)*100),"%\n")
print("Precision : ", (metrics.precision_score(y_test, y_pred_euclidean)*100),"%\n")
print("F Score : ", (metrics.f1_score(y_test, y_pred_euclidean)*100),"%\n")
print("recall : ", (metrics.recall_score(y_test, y_pred_euclidean)*100),"%\n")

Metrics for Manhattan Distance

Confusion Matrix :
 [[13  1]
 [ 0  6]] 

Accuracy :  95.0 %

Precision :  85.71428571428571 %

F Score :  92.3076923076923 %

recall :  100.0 %



#Probability estimates for the final decision

In [37]:
classifier_euclidean_distance.predict_proba(X_test)

array([[0.        , 1.        ],
       [0.67384254, 0.32615746],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.33961354, 0.66038646],
       [0.        , 1.        ],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ]])

# On comparing the classification reports of manhattan and euclidean, we can conclude that when we use manhattan as the distance measure,we get better performance.