#### import libraries

In [1]:
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score,classification_report
     

#### import dataset

In [4]:
df_train = pd.read_table('/content/sample_data/trainKNN.txt',delimiter=',',header=None, index_col=0,names=['RI','Na','Mg','Al','Si','K','Ca','Ba','Fe','GlassType'])
df_test = pd.read_table('/content/sample_data/testKNN.txt',delimiter=',',header=None, index_col=0,names=['RI','Na','Mg','Al','Si','K','Ca','Ba','Fe','GlassType'])

In [5]:
print(df_train.shape, df_test.shape)

(196, 10) (18, 10)


In [6]:
df_train.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,GlassType
1,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
2,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


#### check missing data

In [7]:
df_train.isnull().sum()

RI           0
Na           0
Mg           0
Al           0
Si           0
K            0
Ca           0
Ba           0
Fe           0
GlassType    0
dtype: int64

In [8]:
df_test.isnull().sum()

RI           0
Na           0
Mg           0
Al           0
Si           0
K            0
Ca           0
Ba           0
Fe           0
GlassType    0
dtype: int64

#### Splitting of data

In [9]:
x_train = df_train.iloc[:,[0,1,2,3,4,5,6,7,8]]
y_train = df_train.iloc[:,9]
x_test = df_test.iloc[:,[0,1,2,3,4,5,6,7,8]]
y_test = df_test.iloc[:,9]

#### Feature Scaling

In [11]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(x_train)
x_train_std = scaler.transform(x_train)
x_test_std = scaler.transform(x_test)

#### Build Models

In [12]:
from scipy.spatial import distance
num_neighbors = [1,2,3,4,5,6,7,8,9,10]
acc_results = []
for num in num_neighbors:
    model = KNeighborsClassifier(n_neighbors=num, metric=distance.sqeuclidean).fit(x_train_std, y_train)
    acc_results.append(accuracy_score(y_test, model.predict(x_test_std)))

In [13]:
from scipy.spatial import distance
num_neighbors = [1,2,3,4,5,6,7,8,9,10]
acc_results = []
for num in num_neighbors:
    model = KNeighborsClassifier(n_neighbors=num, metric=distance.cityblock).fit(x_train_std, y_train)
    acc_results.append(accuracy_score(y_test, model.predict(x_test_std)))
     

In [14]:
euclid_model = KNeighborsClassifier(n_neighbors=8, metric=distance.sqeuclidean).fit(x_train_std, y_train) # Square Euclidean distance model
manhattan_model = KNeighborsClassifier(n_neighbors=1, metric=distance.cityblock).fit(x_train_std, y_train) #Manhattan distance model

In [15]:
manhattan_predictions = manhattan_model.predict(x_test_std)
euclid_predictions = euclid_model.predict(x_test_std) 
df = pd.DataFrame({'actual': y_test, 'manhattan': manhattan_predictions, 'euclid': euclid_predictions})
df.head()

Unnamed: 0,actual,manhattan,euclid
68,1,1,1
69,1,1,1
70,1,1,1
144,2,2,2
145,2,1,1


In [16]:
print("Accuracy score for the model with manhattan distance: ",accuracy_score(y_test, manhattan_predictions))
print("Accuracy score for the model with squared euclidean distance: ",accuracy_score(y_test, euclid_predictions))

Accuracy score for the model with manhattan distance:  0.6666666666666666
Accuracy score for the model with squared euclidean distance:  0.6111111111111112
