In [13]:
# importing all the required libraries

import pandas as pd
import numpy as np
from sklearn import utils

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

from sklearn import metrics

import matplotlib.pyplot as plt

###  Reading the dataset

In [14]:
dataset = pd.read_csv('dataset_arranged.csv')
print(dataset.head())

   SES  MMSE  eTIV   nWBV  CDR
0    2    27  1987  0.696  0.0
1    2    30  2004  0.681  0.0
2    0    23  1678  0.736  0.5
3    0    28  1738  0.713  0.5
4    0    22  1698  0.701  0.5


### Replacing empty data with the mean of each feature

In [15]:
replace_zero = ['SES','MMSE','eTIV','nWBV']

for column in replace_zero:
    dataset[column] = dataset[column].replace(0,np.nan)
    mean = int(dataset[column].mean(skipna=True))
    dataset[column] = dataset[column].replace(np.nan,mean)

### Splitting Dataset

In [16]:
x = dataset.iloc[:,0:4]
y = dataset.iloc[:,4]
x_train, x_test, y_train, y_test = train_test_split(x,y,random_state=11,test_size=0.2)

### Feature Scaling

In [17]:
sc_x = StandardScaler()
x_train = sc_x.fit_transform(x_train)
x_test = sc_x.transform(x_test)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  This is separate from the ipykernel package so we can avoid doing imports until


### Encoding data from continuous to multiclass

In [18]:
lab_enc = LabelEncoder()

y_train_encoded = lab_enc.fit_transform(y_train)
y_test_encoded = lab_enc.fit_transform(y_test)

print(utils.multiclass.type_of_target(y_train))
print(utils.multiclass.type_of_target(y_train_encoded))

print(utils.multiclass.type_of_target(y_test))
print(utils.multiclass.type_of_target(y_test_encoded))

continuous
multiclass
continuous
multiclass


## Nearest Neighbor Model

### Euclidean Distance 

In [19]:
#Model Definition

classifier = KNeighborsClassifier(n_neighbors=1,p=2,metric='euclidean',weights='uniform')


In [20]:
#Training the Model

classifier.fit(x_train,y_train_encoded)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',
           metric_params=None, n_jobs=None, n_neighbors=1, p=2,
           weights='uniform')

In [21]:
#Predicting the test set results

y_pred = classifier.predict(x_test)
y_pred_df = pd.DataFrame(y_pred)

In [22]:
#Confusion Matrix

cm = confusion_matrix(y_test_encoded,y_pred_df)
print("Confusion Matrix\n\n",cm)
print("\n")

#Evaluating the Model

print(metrics.classification_report(y_test_encoded,y_pred_df))



Confusion Matrix

 [[38  2  0]
 [ 6 16  3]
 [ 0  2  8]]


              precision    recall  f1-score   support

           0       0.86      0.95      0.90        40
           1       0.80      0.64      0.71        25
           2       0.73      0.80      0.76        10

   micro avg       0.83      0.83      0.83        75
   macro avg       0.80      0.80      0.79        75
weighted avg       0.82      0.83      0.82        75



In [23]:
#Accuracy 

accuracy = accuracy_score(y_test_encoded,y_pred_df)*100
print("\nAccuracy= ",accuracy.round(2),'%')


Accuracy=  82.67 %


In [24]:
#recall
recall= recall_score(y_test_encoded,y_pred_df, average ='micro')*100
recall1= recall_score(y_test_encoded,y_pred_df, average ='macro')*100
print('\nRecall for micro avg is ',recall.round(2),'%')
print('\nRecall is macro avg is',recall1.round(2),'%')


Recall for micro avg is  82.67 %

Recall is macro avg is 79.67 %


In [25]:
#precision
precision = precision_score(y_test_encoded,y_pred_df, average='micro')
precision1 = precision_score(y_test_encoded,y_pred_df, average='macro')
print("\n Precision for micro is ", precision.round(2),"%")
print("\n Precision is macro", precision1.round(2),"%")


 Precision for micro is  0.83 %

 Precision is macro 0.8 %


In [26]:
#f1 score
f = f1_score(y_test_encoded,y_pred_df,average= 'micro')
fm = f1_score(y_test_encoded,y_pred_df,average= 'macro')*100
print("\n F1 measure for micro is ", f.round(2),"%")
print("\n F1 measure for micro is ", fm.round(2),"%")


 F1 measure for micro is  0.83 %

 F1 measure for micro is  79.26 %


### Manhattan distance

In [29]:
#Model Definition

classifier1 = KNeighborsClassifier(n_neighbors=1,p=2,metric='manhattan',weights='uniform')

In [30]:
#Training the Model

classifier1.fit(x_train,y_train_encoded)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='manhattan',
           metric_params=None, n_jobs=None, n_neighbors=1, p=2,
           weights='uniform')

In [31]:
#Predicting the test set results

y_pred1 = classifier1.predict(x_test)
y_pred_df1 = pd.DataFrame(y_pred1)

In [32]:
#Confusion Matrix

cm1 = confusion_matrix(y_test_encoded,y_pred_df1)
print("Confusion Matrix\n\n",cm1)
print("\n")

#Evaluating the Model

print(metrics.classification_report(y_test_encoded,y_pred_df1))

#Accuracy 

accuracy = accuracy_score(y_test_encoded,y_pred_df1)*100
print("\nAccuracy= ",accuracy.round(2),'%')

#recall
recall= recall_score(y_test_encoded,y_pred_df1, average ='micro')*100
recall1= recall_score(y_test_encoded,y_pred_df1, average ='macro')*100
print('\nRecall for micro is ',recall.round(2),'%')
print('\nRecall for macro is ',recall1.round(2),'%')

#precision
precision = precision_score(y_test_encoded,y_pred_df1, average ='micro')*100
precision1 = precision_score(y_test_encoded,y_pred_df1, average ='macro')*100
print('\nPrecision for micro is ',precision.round(2),'%')
print('\nPrecision for macro is ',precision1.round(2),'%')

#f1
f = f1_score(y_test_encoded,y_pred_df1,average= 'micro')
fm = f1_score(y_test_encoded,y_pred_df1,average= 'macro')*100
print("\n F1 measure for micro is ", f.round(2),"%")
print("\n F1 measure for micro is ", fm.round(2),"%")

Confusion Matrix

 [[36  4  0]
 [ 6 15  4]
 [ 0  3  7]]


              precision    recall  f1-score   support

           0       0.86      0.90      0.88        40
           1       0.68      0.60      0.64        25
           2       0.64      0.70      0.67        10

   micro avg       0.77      0.77      0.77        75
   macro avg       0.73      0.73      0.73        75
weighted avg       0.77      0.77      0.77        75


Accuracy=  77.33 %

Recall for micro is  77.33 %

Recall for macro is  73.33 %

Precision for micro is  77.33 %

Precision for macro is  72.51 %

 F1 measure for micro is  0.77 %

 F1 measure for micro is  72.77 %


### Minkowski Distance

In [33]:
#Model Definition

classifier2 = KNeighborsClassifier(n_neighbors=1,p=2,metric='minkowski',weights='uniform')

In [34]:
#Training the Model

classifier2.fit(x_train,y_train_encoded)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=1, p=2,
           weights='uniform')

In [35]:
#Predicting the test set results

y_pred2 = classifier2.predict(x_test)
y_pred_df2 = pd.DataFrame(y_pred2)

In [36]:
#Confusion Matrix

cm3 = confusion_matrix(y_test_encoded,y_pred_df2)
print("Confusion Matrix\n\n",cm3)
print("\n")

#Evaluating the Model

print(metrics.classification_report(y_test_encoded,y_pred_df2))

#Accuracy 

accuracy = accuracy_score(y_test_encoded,y_pred_df2)*100
print("\nAccuracy= ",accuracy.round(2),'%')

#recall
recall= recall_score(y_test_encoded,y_pred_df2, average ='micro')*100
recall1= recall_score(y_test_encoded,y_pred_df2, average ='macro')*100
print('\nRecall for micro is ',recall.round(2),'%')
print('\nRecall for macro is ',recall1.round(2),'%')

#precision
precision= precision_score(y_test_encoded,y_pred_df2, average ='micro')*100
precision1= precision_score(y_test_encoded,y_pred_df2, average ='macro')*100
print('\nPrecision for micro is ',precision.round(2),'%')
print('\Precision for macro  is ',precision1.round(2),'%')

#f1
f = f1_score(y_test_encoded,y_pred_df2,average= 'micro')
fm = f1_score(y_test_encoded,y_pred_df2,average= 'macro')*100
print("\n F1 measure for micro is ", f.round(2),"%")
print("\n F1 measure for micro is ", fm.round(2),"%")

Confusion Matrix

 [[38  2  0]
 [ 6 16  3]
 [ 0  2  8]]


              precision    recall  f1-score   support

           0       0.86      0.95      0.90        40
           1       0.80      0.64      0.71        25
           2       0.73      0.80      0.76        10

   micro avg       0.83      0.83      0.83        75
   macro avg       0.80      0.80      0.79        75
weighted avg       0.82      0.83      0.82        75


Accuracy=  82.67 %

Recall for micro is  82.67 %

Recall for macro is  79.67 %

Precision for micro is  82.67 %
\Precision for macro  is  79.7 %

 F1 measure for micro is  0.83 %

 F1 measure for micro is  79.26 %
