# Importing Libraries

In [None]:
import pandas as pd
import numpy as np

from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, accuracy_score, mean_absolute_error, mean_squared_error, roc_curve, auc, confusion_matrix, classification_report

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

# Reading the Dataset

In [None]:
!ls ../input/indian-liver-patient-records/indian_liver_patient.csv

In [None]:
data=pd.read_csv("../input/indian-liver-patient-records/indian_liver_patient.csv")
data

# Data Preprocessing

In [None]:
data.isnull().sum()

In [None]:
data.describe()

In [None]:
data.info()

In [None]:
data["Albumin_and_Globulin_Ratio"]=data["Albumin_and_Globulin_Ratio"].bfill()

In [None]:
data.isnull().sum()

In [None]:
data["Dataset"].value_counts()

In [None]:
x=data.iloc[:,:-1]
y=data.iloc[:,-1]
y

In [None]:
#Balancing the class
oversample = RandomOverSampler()
x, y= oversample.fit_resample(x, y)
type(x)

In [None]:
new_data=pd.DataFrame(data=x, columns=x.columns)
new_data["Dataset"]=y
new_data

In [None]:
#Now class is balanced
new_data["Dataset"].value_counts()

In [None]:
new_data.shape

In [None]:
#Converting object to int for processing
la= LabelEncoder()
new_data["Gender"]=la.fit_transform(new_data["Gender"])

# Data Visualization

In [None]:
corr=new_data.corr()
plt.subplots(figsize=(12,6))
sns.heatmap(corr, annot=True)

In [None]:
sns.pairplot(new_data)

In [None]:
corr["Dataset"].sort_values(ascending=False)

# Data Splitting

In [None]:
x=new_data.iloc[:,:-1]
sc= StandardScaler()
sc.fit_transform(x)
y=new_data["Dataset"]
x

In [None]:
xtrain, xtest, ytrain, ytest= train_test_split(x, y, test_size=0.25, random_state=42)

# Applying Model

In [None]:
cla= KNeighborsClassifier(n_neighbors=5)
cla.fit(xtrain, ytrain)
cla.get_params()

In [None]:
param={
    'algorithm':['auto', 'ball_tree', 'kd_tree', 'brute'],
    'n_neighbors': [ i for i in range (5,15,2)],
    'weights':['uniform', 'distance'] 
}

In [None]:
grid = GridSearchCV(estimator=cla, param_grid=param, cv= 5)
grid.fit(xtrain, ytrain)

In [None]:
# Finding best parameters
grid.best_params_

In [None]:
cla= KNeighborsClassifier(n_neighbors=13, weights='distance', algorithm='auto')
cla.fit(xtrain, ytrain)

# Checking Accuracy

In [None]:
ytrain_pre=cla.predict(xtrain)
print("Training accuracy is {} %".format(accuracy_score(ytrain, ytrain_pre)*100))

In [None]:
ypredict=cla.predict(xtest)
print("Testing accuracy is {} %".format(accuracy_score(ytest, ypredict)*100))

In [None]:
pd.DataFrame({"Actual":ytest,"Predicted":ypredict})

In [None]:
a = confusion_matrix(ytest, ypredict)
sns.heatmap(a, annot=True)

In [None]:
print("KNN Model Classification Report")
print(classification_report(ytest, ypredict))