In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,callbacks
import seaborn as sns
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing and exploring datasets

In [None]:
df2c=pd.read_csv('../input/biomechanical-features-of-orthopedic-patients/column_2C_weka.csv')
df3c=pd.read_csv('../input/biomechanical-features-of-orthopedic-patients/column_3C_weka.csv')

## Context
### The data have been organized in two different but related classification tasks.

#### column3Cweka.csv (file with three class labels)

** The first task consists in classifying patients as belonging to one out of three categories: 
* Normal (100 patients), 
* Disk Hernia (60 patients) or 
* Spondylolisthesis (150 patients).

** For the second task, the categories Disk Hernia and Spondylolisthesis were merged into a single category labelled as 'abnormal'.
Thus, the second task consists in classifying patients as belonging to one out of two categories: 
* Normal (100 patients) or 
* Abnormal (210 patients).

In [None]:
print('2C shape: ',df2c.shape,'3C shape: ',df3c.shape)

In [None]:
df2c.head()

In [None]:
df3c.head()

In [None]:
df2c=df2c.rename(columns={'pelvic_tilt numeric':'pelvic_tilt'})

In [None]:
df=pd.concat([df2c,df3c],axis=0)

In [None]:
df.shape

In [None]:
df.head()

#### Checking unique values in **Class** column 

In [None]:
sns.set_style('darkgrid')
fig,ax=plt.subplots(figsize=(9,4))
plt.bar(x=df['class'].unique(),height=df['class'].value_counts())
plt.title('Count of all Classes')

##### *Abnormal* class is a mixture of both **Hernia** and **Spondylolisthesis**

# Visualizing relationship between these parameters and Classes involved

#### **Pelvic incidence** is considered to have a *normal range* of values between **33 and 85 degrees**.
#### *Visualizing* this parameter

In [None]:
fig,ax=plt.subplots(figsize=(7,7))
sns.scatterplot(data=df,x='pelvic_incidence',y='pelvic_tilt',hue='class')
sns.rugplot(data=df,x='pelvic_incidence',y='pelvic_tilt',hue='class')

In [None]:
fig,ax=plt.subplots(figsize=(11,6))
sns.violinplot(y="pelvic_incidence", x="class", data=df)

#### **Pelvic tilt** is considered to have a *normal range* of values between **13** and **±6** degrees.
#### *Visualizing* this parameter

In [None]:
sns.displot(data=df,x='pelvic_tilt',aspect=14/7,hue='class',multiple='stack')

#### **lumbar_lordosis_angle** is considered to have a *normal range* of values between **41±11 degrees(Male)** and **46±11 degrees(Female)**.
#### *Visualizing* this parameter

In [None]:
fig,ax=plt.subplots(figsize=(11,7))
sns.boxplot(data=df,x='class',y='lumbar_lordosis_angle')

#### **sacral_slope** is considered to have a *normal range* of values between **36 degrees** and **50 degrees**.
#### *Visualizing* this parameter

In [None]:
fig,ax=plt.subplots(figsize=(11,6))
sns.violinplot(y="sacral_slope", x="class", data=df)

# Converting categorical columns to numerical ones

In [None]:
df.head()

#### Preparing the dataset

In [None]:
df.head()

In [None]:
df['class'].unique()

In [None]:
df['class']=df['class'].replace({'Normal':1,'Abnormal':0,'Hernia':0,'Spondylolisthesis':0})

In [None]:
xtr=df.drop('class',axis=1)
ytr=df['class']

In [None]:
from sklearn.preprocessing import StandardScaler as sc
scc=sc()
xtr=scc.fit_transform(xtr)

# Building the ML model
### **HyperParameter** will be tuned by *GridSearchCV*

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
xtr,xte,ytr,yte=train_test_split(xtr,ytr,random_state=108,test_size=0.3)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
knn=KNeighborsClassifier() 
param_grid={'n_neighbors':[4,5,6],
            'weights':['distance'],
            'algorithm':['auto','ball_tree','kd_tree','brute'],
            'leaf_size':[25,23,24],
            'metric':['manhattan']}
grid=GridSearchCV(knn,cv=5,param_grid=param_grid,verbose=1,n_jobs=1)

# **Fitting** the *model*
#### with **Testing data**(*xte* & *yte*)

In [None]:
grid.fit(xtr,ytr)

In [None]:
grid.best_params_

## **Time** for some cool *Predictions*

In [None]:
ypred=grid.predict(xte)

### Evaluating the *model*

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [None]:
print(classification_report(yte,ypred))

In [None]:
sns.heatmap(confusion_matrix(yte,ypred),annot=True)

In [None]:
print('Accuracy of model: ',accuracy_score(yte,ypred))

# *97.84%* **Accuracy** achieved