In [24]:
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import metrics
from sklearn.preprocessing import normalize,scale
import numpy as np
import pandas as pd 

from sklearn.decomposition import PCA as sklearnPCA
from pandas.plotting import parallel_coordinates

In [25]:
#importing dataset and converting to datasframe
header = ['pelvic_incidence', 'pelvic_tilt', 'lumbar_lordosis_angle', 'sacral_slope', 'pelvic_radius', 'grade_of_spondylolisthesis', 'label']
df = pd.read_csv('./vertebral_column_data/column_3C.dat', sep=' ', header=None, names=header)
print(df.dtypes)


pelvic_incidence              float64
pelvic_tilt                   float64
lumbar_lordosis_angle         float64
sacral_slope                  float64
pelvic_radius                 float64
grade_of_spondylolisthesis    float64
label                          object
dtype: object


In [26]:
# extracting features and lables
x = df.iloc[:,0:6]
y = df.iloc[:,6]

# split into training and test subsets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4)

In [27]:
model = KNeighborsClassifier(n_neighbors=5, weights='distance')

In [28]:
#10-fold cross validation
scores = cross_val_score(model, x, y, scoring='accuracy', cv=10)
# print scores
print ("10-Fold Accuracy : ", scores.mean()*100)

10-Fold Accuracy :  83.5483870967742


In [29]:
#creation of the confusion matrix
model.fit(x_train,y_train)
print ("Testing Accuracy : ",model.score(x_test, y_test)*100)

color_dict = {'DH' : 'cyan', 'SL' : 'magenta', 'NO' : '#1B1B1B'}
predicted = model.predict(x)
predicted_proba = model.predict_proba(x)

Testing Accuracy :  89.51612903225806


In [30]:
# confusion matrix
cm = metrics.confusion_matrix(y, predicted, labels=['DH', 'NO', 'SL'])
print (cm)
print()
print (metrics.classification_report(y, predicted))

plt.show()


[[ 55   5   0]
 [  4  96   0]
 [  1   3 146]]

             precision    recall  f1-score   support

         DH       0.92      0.92      0.92        60
         NO       0.92      0.96      0.94       100
         SL       1.00      0.97      0.99       150

avg / total       0.96      0.96      0.96       310

