In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import KMeans
from sklearn import svm
from sklearn.linear_model import LogisticRegression

In [5]:
df = pd.read_csv('diabetes.csv')
X = df[['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','Age']]
Y = df['Outcome']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

In [6]:
def TrainGuasNaiveBayes():
    Model = GaussianNB()
    return Model.fit(X_train,Y_train)

In [7]:
def TrainKNClassifier():
    clf = KNeighborsClassifier(n_neighbors=2)
    clf = clf.fit(X_train,Y_train)
    return clf.fit(X_train,Y_train)

In [8]:
def TrainMLPClassifier():
    Model = MLPClassifier(hidden_layer_sizes=(150,150,150),max_iter=500)
    Model.fit(X_train,Y_train)
    return Model

In [9]:
def TrainDecisionTree():
    clf = DecisionTreeClassifier()
    clf = clf.fit(X_train,Y_train)
    return clf

In [10]:
def TrainRandomForest():
    clf = RandomForestClassifier(n_estimators = 100)
    clf.fit(X_train, Y_train)
    return clf

In [11]:
def TrainSVM():
    clf = svm.SVC(kernel='linear')
    clf.fit(X_train, Y_train)
    return clf

In [12]:
# Train Stochastic Gradient Descent Classifier
def TrainSGD():
    clf = SGDClassifier(loss="hinge", penalty="l2", max_iter=100)
    clf.fit(X_train,Y_train)
    return clf

In [13]:
# Train Hirearichal Clustering

def TrainHC():
    clustering = AgglomerativeClustering()
    clustering.fit(X_train)
    return clustering   

In [14]:
# Train KMean Clustering

def TrainKmean():
    kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
    kmeans = kmeans.fit(X_train)
    return kmeans


In [15]:
# Logisitic Regression

def TrainLogReg():
    clf = LogisticRegression(random_state=0).fit(X_train, Y_train)
    return clf


In [16]:
def FindAccuracy():
    acc = []
    acc.append(accuracy_score(TrainDecisionTree().predict(X_test),Y_test)*100)
    acc.append(accuracy_score(TrainGuasNaiveBayes().predict(X_test),Y_test)*100)
    acc.append(accuracy_score(TrainSVM().predict(X_test),Y_test)*100)
    acc.append(accuracy_score(TrainLogReg().predict(X_test),Y_test)*100)
    acc.append(accuracy_score(TrainSGD().predict(X_test),Y_test)*100)
    acc.append(accuracy_score(TrainMLPClassifier().predict(X_test),Y_test)*100)
    acc.append(accuracy_score(TrainRandomForest().predict(X_test),Y_test)*100)
    acc.append(accuracy_score(TrainHC().fit_predict(X_test),Y_test)*100)
    acc.append(accuracy_score(TrainKNClassifier().predict(X_test),Y_test)*100)
    acc.append(accuracy_score(TrainKmean().predict(X_test),Y_test)*100)
    return acc

Accuracy = FindAccuracy()

In [17]:
def FormatData(Preg,Glucose,BP,SkinThick,Insulin,BMI,Age):
    data = [[Preg,Glucose,BP,SkinThick,Insulin,BMI,Age]]
  
    # Create the pandas DataFrame
    ff = pd.DataFrame(data, columns = ['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','Age'])
    return ff;

In [18]:
def FormatDataHC(Preg,Glucose,BP,SkinThick,Insulin,BMI,Age):
    data = [[Preg,Glucose,BP,SkinThick,Insulin,BMI,Age],[Preg,Glucose,BP,SkinThick,Insulin,BMI,Age]]
    ff = pd.DataFrame(data, columns = ['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','Age'])
    return ff;

In [19]:
def SelectChoice():
    print('---Welcome to Diabetes Prediction System---')
    print("--------------------------------------------")
    print("Key        Algorithm Name          Accuracy")
    print("--------------------------------------------")
    print("[1]  Decision Tree                 [{:.2f}%]".format(Accuracy[0]))
    print("[2]  Naive Bayes                   [{:.2f}%]".format(Accuracy[1]))
    print("[3]  Support Vector Machine        [{:.2f}%]".format(Accuracy[2]))
    print("[4]  Logistic Regression           [{:.2f}%]".format(Accuracy[3]))
    print("[5]  Stochastic Gradient Descent   [{:.2f}%]".format(Accuracy[4]))
    print("[6]  Neural Network (MLP)          [{:.2f}%]".format(Accuracy[5]))
    print("[7]  Random Forest                 [{:.2f}%]".format(Accuracy[6]))
    print("[8]  Hierarchical Clustering       [{:.2f}%]".format(Accuracy[7]))
    print("[9]  K-NN (k Nearest Neighbors)    [{:.2f}%]".format(Accuracy[8]))
    print("[10] K-Means Clustering            [{:.2f}%]".format(Accuracy[9]))
    print("[-1] Exit Program..")
    print("--------------------------------------------")
    choice = input("Please Select Which Algorith you want to Use: ")
    return choice

In [21]:
def PredictOutput(data,choice):
    diab = [-1]
    if choice == "1":
        diab = TrainDecisionTree().predict(data)
    elif choice == "2":
        diab = TrainGuasNaiveBayes().predict(data)
    elif choice == "3":
        diab = TrainSVM().predict(data)
    elif choice == "4":
        diab = TrainLogReg().predict(data)
    elif choice == "5":
        diab = TrainSGD().predict(data)
    elif choice == "6":
        diab = TrainMLPClassifier().predict(data)
    elif choice == "7":
        diab = TrainRandomForest().predict(data)
    elif choice == "8":
        diab = TrainHC().predict(data)
    elif choice == "9":
        diab = TrainKNClassifier().predict(data)
    elif choice == "10":
        diab = TrainKmean().predict(data)
    if diab[0] == 0:
        print('--------------Prediction---------------')
        print('\nYou have No Diabetes! Stay Healthy...')
        print('\n-------------------------------------')
    elif diab[0] == 1:
        print('-----------------------Prediction-----------------------')
        print('\nYou have Symptoms of Diabetes, Go visit your Doctor..!')
        print('--------------------------------------------------------')    
    else:
        print('--------------------------------------------------------') 
        print('Invalid Choice!')
        print('--------------------------------------------------------')


In [22]:
def UserInput(choice):
    Preg = (float)(input('Enter Pregnancies Value: '))
    Glucose = (float)(input('Enter Glucose Value: '))
    BP = (float)(input('Enter BloodPressure Value: '))
    SkinThick = (float)(input('Enter SkinThickness Value: '))
    Insulin = (float)(input('Enter Insulin Value: '))
    BMI = (float)(input('Enter BMI Value: '))
    Age = (float)(input('Enter Age Value: '))
    if choice == "8":
        data = FormatDataHC(Preg,Glucose,BP,SkinThick,Insulin,BMI,Age)
        PredictOutput(data,choice)
    else:
        data = FormatDataHC(Preg,Glucose,BP,SkinThick,Insulin,BMI,Age)
        PredictOutput(data,choice)   

In [23]:
def run():
    while(True):
        flag = False
        options = ["0","1","2","3","4","5","6","7","8","9"]
        try:
            choice = SelectChoice()
            if choice == "-1":
                print('Thank you for using this System..')
                print('Good Bye!!!')
                return
            else:
                for i in options:
                    if(choice == i):
                        flag = True
                if flag:
                    data = UserInput(choice)   
                else:
                    print('--------------------------------------------------------')
                    print('Invalid Choice!!')
                    print('--------------------------------------------------------')
                    
        except Exception:
            print('--------------------------------------------------------')
            print('Error...')
            print('--------------------------------------------------------')

In [26]:
run()

---Welcome to Diabetes Prediction System---
--------------------------------------------
Key        Algorithm Name          Accuracy
--------------------------------------------
[1]  Decision Tree                 [74.89%]
[2]  Naive Bayes                   [72.73%]
[3]  Support Vector Machine        [77.06%]
[4]  Logistic Regression           [77.49%]
[5]  Stochastic Gradient Descent   [69.70%]
[6]  Neural Network (MLP)          [62.34%]
[7]  Random Forest                 [75.32%]
[8]  Hierarchical Clustering       [31.17%]
[9]  K-NN (k Nearest Neighbors)    [71.43%]
[10] K-Means Clustering            [32.03%]
[-1] Exit Program..
--------------------------------------------
Please Select Which Algorith you want to Use: 1
Enter Pregnancies Value: 34
Enter Glucose Value: +++
--------------------------------------------------------
Error...
--------------------------------------------------------
---Welcome to Diabetes Prediction System---
--------------------------------------------
Key 

In [27]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [28]:
df.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0
