## Importing the Dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix

## Data Collection and Data Processing

In [None]:
df = pd.read_csv("../input/diabetesdataset/diabetes.csv")

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.describe().T

In [None]:
sns.countplot(df.Outcome)

In [None]:
plt.figure(figsize = (12,6)) 
sns.heatmap(df.corr(),robust=True,fmt='.1g',linewidths=1.3,linecolor = 'gold', annot=True)

In [None]:
X = df.drop(columns=["Outcome"], axis=1)
Y = df["Outcome"]

In [None]:
X.shape, Y.shape

In [None]:
# Data Normalization

X = (X-np.min(X))/(np.max(X)-np.min(X)).values

## Train Test Split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
x_train.shape, x_test.shape

## Model Training


1.   Decision Tree Classifier
2.   KNN Classifier
3.   Random Forest Classifier



Desicion Tree Classifier

In [None]:
DS = DecisionTreeClassifier(criterion = "entropy", max_depth = 12)

In [None]:
DS.fit(x_train, y_train)

KNN Classifier

In [None]:
KN = KNeighborsClassifier(n_neighbors=1)

In [None]:
KN.fit(x_train, y_train)

Random Forest Classifier

In [None]:
RF = RandomForestClassifier()

In [None]:
RF.fit(x_train, y_train)

## Model Evaluation

Decision Tree Score & Confusion Matrix

In [None]:
DS_result = DS.score(x_test, y_test)
print("Accuracy: %.3f%%" % (DS_result*100.0))

In [None]:
DS_pred = DS.predict(x_test) 

confusion_matrix(y_test,DS_pred)

KNN Score & Confusion Matrix

In [None]:
KN_result = KN.score(x_test, y_test)
print("Accuracy: %.3f%%" % (KN_result*100.0))

In [None]:
KN_pred = KN.predict(x_test) 

confusion_matrix(y_test,KN_pred)

Random Forest Score & Confusion Matrix

In [None]:
RF_result = RF.score(x_test, y_test)
print("Accuracy: %.3f%%" % (RF_result*100.0))

In [None]:
RF_pred = RF.predict(x_test) 

confusion_matrix(y_test,RF_pred)

## Models Comparison

In [None]:
index = ["RF", "KNN", "DS"]
scores = [RF_result, KN_result, DS_result]

fig = px.pie(df, values=scores, names=index, title='Models Comparison')
fig.show()