In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix


In [None]:
data1 = pd.read_csv('../input/biomechanical-features-of-orthopedic-patients/column_2C_weka.csv')
data2 = pd.read_csv('../input/biomechanical-features-of-orthopedic-patients/column_3C_weka.csv')

In [None]:
data1.head(10)

In [None]:
data1.info()

In [None]:
msno.bar(data1)

In [None]:
data1.describe().T

In [None]:
data1['class'].value_counts()

In [None]:
data2['class'].value_counts()

In [None]:
colors=('red','green')
data1['class'].value_counts().plot(kind='pie',shadow=True,colors=colors,autopct='%.2f',figsize=(6,6))
plt.title('Clsses of Train data')
plt.show()

In [None]:
colors=('red','green','blue')
data2['class'].value_counts().plot(kind='pie',shadow=True,colors=colors,autopct='%.2f',figsize=(6,6))
plt.title('Clsses of Test data')
plt.show()

In [None]:
col =['pelvic_incidence', 'pelvic_tilt numeric', 'lumbar_lordosis_angle',
       'sacral_slope', 'pelvic_radius', 'degree_spondylolisthesis']
for i in col :
    for j in col :
      if i !=j :
        sns.scatterplot(x=i, y=j, hue="class" , data=data1)
        plt.xlabel(i)
        plt.ylabel(j)
        plt.legend()
        plt.show()

In [None]:
label = LabelEncoder()
data1['class'] = label.fit_transform(data1['class'])

In [None]:
X = data1.drop(['class'], axis=1, inplace=False)
y = data1['class']

In [None]:
plt.figure(figsize=(15,10))
sns.heatmap(X.corr(), linecolor='white', linewidth=1,cmap="YlGnBu", annot=True)

In [None]:
scaler = MinMaxScaler(copy=True, feature_range=(0, 1))
X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=44, shuffle =True)

In [None]:

SelectedModel = KNeighborsClassifier(n_neighbors= 5,weights ='distance',algorithm='auto') 
SelectedParameters = {'weights':('distance','uniform'),'n_neighbors':np.array(range(1,30)), 'algorithm':('auto' ,'ball_tree' ,'kd_tree' ,'brute')}
GridSearchModel = GridSearchCV(SelectedModel,SelectedParameters, cv = 2,return_train_score=True)
GridSearchModel.fit(X_train, y_train)
sorted(GridSearchModel.cv_results_.keys())

print('Best Parameters are :', GridSearchModel.best_params_)
print('Best Estimator is :', GridSearchModel.best_estimator_)

In [None]:

KNNClassifierModel = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=27, p=2,
                     weights='distance')
KNNClassifierModel.fit(X_train, y_train)


print('KNNClassifierModel Train Score is : ' , KNNClassifierModel.score(X_train, y_train))
print('KNNClassifierModel Test Score is : ' , KNNClassifierModel.score(X_test, y_test))
print('----------------------------------------------------')

y_pred = KNNClassifierModel.predict(X_test)
y_pred_prob = KNNClassifierModel.predict_proba(X_test)
print('Predicted Value for KNNClassifierModel is : ' , y_pred[:10])
print('Prediction Probabilities Value for KNNClassifierModel is : ' , y_pred_prob[:10])

In [None]:
CM = confusion_matrix(y_test, y_pred)
print('Confusion Matrix is : \n', CM)
f,ax = plt.subplots(figsize = (10,8))
sns.heatmap(CM, center = True, annot = True, fmt='d', ax = ax,)
plt.show()