### Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import itertools

import matplotlib.pyplot as plt
from plotly.offline import init_notebook_mode
from plotly.offline import iplot,plot
import cufflinks as cf
import seaborn as sns

init_notebook_mode(connected=True)
cf.go_offline()


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        path = os.path.join(dirname, filename)

#### Creating Plotting functions

In [None]:
def plot_confusion_matrix(cm,
                          target_names=["False","True"],
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):


    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]*100),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]*100),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

### Reading the Dataset

In [None]:
df = pd.read_csv(path)
df.head(10)

In [None]:
df.info()

#### Data showing impact of Insulin and Glucose in Diabetic attacks 

In [None]:
fig1 = sns.lmplot(x="Insulin", y = "Glucose",data=df,hue="Outcome",markers=['o','v'])

 The data shows that glucose value more than 75 and insulin value between 100 & 200 results to diabetics

#### Graph showing the impact of pregnancy on diabetes 

In [None]:
fig2 = sns.distplot(df["Pregnancies"])

#### Graph showing impact of Blood Pressure and Age with diabetes 

In [None]:
fig3 = sns.jointplot(data=df,x="Age",y="BloodPressure")

### Plotting all the columns

In [None]:
sns.set_style('white')
sns.pairplot(data=df,hue="Outcome")

### Getting the Outcome values of the Dataset

In [None]:
df['Outcome'].value_counts()

### Importing the Calssification Libraries

In [None]:
from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import classification_report, confusion_matrix

In [None]:
X = df.drop('Outcome',axis = 1)
y = df['Outcome']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

### Decision Tree

In [None]:
dtree = DecisionTreeClassifier()

In [None]:
dtree.fit(X_train,y_train)

In [None]:
predDtree = dtree.predict(X_test)

In [None]:
dtree_CR = classification_report(y_test,predDtree)
dtree_CM = confusion_matrix(y_test,predDtree)

In [None]:
accDtree = dtree.score(X_test,y_test)

In [None]:
print("\nTest Accuracy for Decision Tree: {0:f}%\n".format(accDtree*100))
print("Confusion Matrix for Decision Tree")
plot_confusion_matrix(dtree_CM,cmap="YlOrBr")
print()
print("Classification Report for Decision Tree")
print(dtree_CR)


### Random Forest

In [None]:
rfc = RandomForestClassifier(n_estimators=200)

In [None]:
rfc.fit(X_train,y_train)

In [None]:
predRfc = rfc.predict(X_test)

In [None]:
rfc_CR = classification_report(y_test,predRfc)
rfc_CM = confusion_matrix(y_test,predRfc)
accRfc = rfc.score(X_test,y_test)

In [None]:
print("\nTest Accuracy for Random Forest: {0:f}%\n".format(accRfc*100))
print("Confusion Matrix for Random Forest")
plot_confusion_matrix(rfc_CM,cmap='PiYG')
print()
print("Classification Report for Random Forest")
print(rfc_CR)


### K-Nearest Neighbor

In [None]:
knn = KNeighborsClassifier(n_neighbors = 25)

In [None]:
knn.fit(X_train,y_train)

In [None]:
predKnn = knn.predict(X_test) 

In [None]:
knn_CR = classification_report(y_test,predKnn)
knn_CM = confusion_matrix(y_test,predKnn)
accKnn = rfc.score(X_test,y_test)

In [None]:
print("\nTest Accuracy for K-Nearest Neighbor: {0:f}%\n".format(accKnn*100))
print("Confusion Matrix for K-Nearest Neighbor")
plot_confusion_matrix(knn_CM,cmap='OrRd')
print()
print("Classification Report for K-Nearest Neighbor")
print(knn_CR)


### Comparision

In [None]:
objects = ('DT','RF','KNN')
y_pos = np.arange(len(objects))
performance = [accDtree*100,accRfc*100,accKnn*100]

plt.bar(y_pos, performance,align='center', color = '#11c2d9')
plt.xticks(y_pos, objects)
plt.title('Comparision of Algorithms')

plt.show()