In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

# Classification of Patients with Using KNN
# First Task : 
* Categorize data according to disease types
* There are three different categories:
    1. Normal
    2. Disk Hernia
    3. Spondylolisthesis

In [None]:
df = pd.read_csv("../input/biomechanical-features-of-orthopedic-patients/column_3C_weka.csv")
df.info()

In [None]:
df.tail(10)

In [None]:
df["class"].unique() # I need to convert class type to number type.

In [None]:
class_list = []
for each in df["class"]:
    if each == "Normal":
        class_list.append(0)
    elif each == "Hernia":
        class_list.append(1)
    else:
        class_list.append(2)

df["class"] = class_list
df.tail()

In [None]:
y_data = df["class"]
x_data = df.drop("class", axis = 1)
x_data

In [None]:
# Before classification I should normalize all x features
x = (x_data - np.min(x_data)) / (np.max(x_data) - np.min(x_data))
x.tail(10)

In [None]:
# I will seperate data into test part and train part
x_train, x_test, y_train, y_test = train_test_split(x, y_data, test_size = 0.3, random_state = 42)

In [None]:
# Now I will train my model with train part of data
knn = KNeighborsClassifier(n_neighbors = 3)
model = knn.fit(x_train, y_train)

In [None]:
# Now I will examine my model with test data
print("Model Score is = ", knn.score(x_test, y_test))

In [None]:
# Now I will try to find best n_neighbors value to increase my score
score_list = []
for each in range(1, 50):
    knn = KNeighborsClassifier(n_neighbors = each)
    model = knn.fit(x_train, y_train)
    score_list.append(knn.score(x_test, y_test))

In [None]:
plt.figure(figsize = (12, 10))
plt.plot(range(1, 50), score_list, "*-")
plt.xlabel("n_neighbor value")
plt.ylabel("Score")
plt.grid("on")
plt.title("Best Score Searching")
plt.show()
plt.close()

In [None]:
# Train with the best n_neighbors value
knn = KNeighborsClassifier(n_neighbors = 13)
model = knn.fit(x_train, y_train)
print("Model Score is = ", knn.score(x_test, y_test))