In [91]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, roc_curve, auc

from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from keras.wrappers.scikit_learn import KerasClassifier

import matplotlib.pyplot as plt

In [47]:
diabetics = pd.read_csv("pima_indians_diabetes.csv")
diabetics.head(2)

Unnamed: 0,6,148,72,35,0,33.6,0.627,50,1
0,1,85,66,29,0,26.6,0.351,31,0
1,8,183,64,0,0,23.3,0.672,32,1


In [48]:
labels = ["Number of times preganant",
          "Plasma glucose concentration", 
          "Diastolic bloood pressure(mm Hg)",
          "Triceps skin fold thickness (mm)",
          "2-house serum insulin (mu U/ml)",
          "Body Mass Index",
          "Diabetes pedigree function",
          "Age (years)",
          "Target"]

In [49]:
diabetics = pd.read_csv("pima-indians-diabetes.csv",names=labels)

In [50]:
diabetics.head(2)

Unnamed: 0,Number of times preganant,Plasma glucose concentration,Diastolic bloood pressure(mm Hg),Triceps skin fold thickness (mm),2-house serum insulin (mu U/ml),Body Mass Index,Diabetes pedigree function,Age (years),Target
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0


In [52]:
# Understand the Data
diabetics.shape

(768, 9)

In [53]:
# Display Columns
diabetics.columns

Index(['Number of times preganant', 'Plasma glucose concentration',
       'Diastolic bloood pressure(mm Hg)', 'Triceps skin fold thickness (mm)',
       '2-house serum insulin (mu U/ml)', 'Body Mass Index',
       'Diabetes pedigree function', 'Age (years)', 'Target'],
      dtype='object')

In [54]:
# Display index
diabetics.index

RangeIndex(start=0, stop=768, step=1)

In [55]:
# Check Ton 'n' Rows
diabetics[:3]

Unnamed: 0,Number of times preganant,Plasma glucose concentration,Diastolic bloood pressure(mm Hg),Triceps skin fold thickness (mm),2-house serum insulin (mu U/ml),Body Mass Index,Diabetes pedigree function,Age (years),Target
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1


In [56]:
# #Summary statistics of each column
diabetics.describe()

Unnamed: 0,Number of times preganant,Plasma glucose concentration,Diastolic bloood pressure(mm Hg),Triceps skin fold thickness (mm),2-house serum insulin (mu U/ml),Body Mass Index,Diabetes pedigree function,Age (years),Target
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [58]:
# Check data type of each attribute
diabetics.dtypes

Number of times preganant             int64
Plasma glucose concentration          int64
Diastolic bloood pressure(mm Hg)      int64
Triceps skin fold thickness (mm)      int64
2-house serum insulin (mu U/ml)       int64
Body Mass Index                     float64
Diabetes pedigree function          float64
Age (years)                           int64
Target                                int64
dtype: object

In [None]:
# Observations
# All attributes are numerical integers

In [59]:
# Missing Data
diabetics.isnull().sum()

Number of times preganant           0
Plasma glucose concentration        0
Diastolic bloood pressure(mm Hg)    0
Triceps skin fold thickness (mm)    0
2-house serum insulin (mu U/ml)     0
Body Mass Index                     0
Diabetes pedigree function          0
Age (years)                         0
Target                              0
dtype: int64

In [60]:
# Target attribute distribution
pd.value_counts(diabetics['Target'])

0    500
1    268
Name: Target, dtype: int64

In [None]:
# Split the data in to train and test
#sklearn.model_selection.train_test_split

#Split arrays or matrices into random train and test subsets

In [62]:
# Performing Train and Test Split on the data
X,y = diabetics.loc[:,diabetics.columns!='Target'].values, diabetics.loc[:,'Target'].values

In [63]:
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size = 0.3, random_state=123)

In [64]:
# Standardize the Data
from sklearn.preprocessing import StandardScaler

In [66]:
std = StandardScaler()
std.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [67]:
X_train = std.transform(X_train)
X_test = std.transform(X_test)

In [71]:
X_train.shape
 

(537, 8)

In [73]:
diabetics.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
Number of times preganant           768 non-null int64
Plasma glucose concentration        768 non-null int64
Diastolic bloood pressure(mm Hg)    768 non-null int64
Triceps skin fold thickness (mm)    768 non-null int64
2-house serum insulin (mu U/ml)     768 non-null int64
Body Mass Index                     768 non-null float64
Diabetes pedigree function          768 non-null float64
Age (years)                         768 non-null int64
Target                              768 non-null int64
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [75]:
# Perceptron Model Building
perceptron_model = Sequential()
perceptron_model.add(Dense(1, input_dim=8,activation='sigmoid',kernel_initializer='normal'))

In [77]:
perceptron_model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [79]:
perceptron_model.fit(X_train, y_train, epochs = 30, batch_size = 64)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x1e69e734e80>

In [81]:
# Predictions
y_pred = perceptron_model.predict_classes(X_test)
y_pred_train = perceptron_model.predict_classes(X_train)

In [84]:
# Eaaluation of Algorithm
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

0.7575757575757576
[[119  24]
 [ 32  56]]
             precision    recall  f1-score   support

          0       0.79      0.83      0.81       143
          1       0.70      0.64      0.67        88

avg / total       0.75      0.76      0.76       231



In [83]:
print("Train data target \n", pd.value_counts(y_train))
confusion_matrix_train = confusion_matrix(y_train, y_pred_train)
print("\n Confusion matrix \n", confusion_matrix_train)

print("\n Test data target \n", pd.value_counts(y_test))
confusion_matrix_test = confusion_matrix(y_test, y_pred)
print("\n Confusion matrix \n", confusion_matrix_test)

Train data target 
 0    357
1    180
dtype: int64

 Confusion matrix 
 [[283  74]
 [ 76 104]]

 Test data target 
 0    143
1     88
dtype: int64

 Confusion matrix 
 [[119  24]
 [ 32  56]]


In [85]:
Accuracy_Train=(confusion_matrix_train[0,0]+confusion_matrix_train[1,1])/(confusion_matrix_train[0,0]+
                                                                          confusion_matrix_train[0,1]+
                                                                          confusion_matrix_train[1,0]+
                                                                          confusion_matrix_train[1,1])
TNR_Train= confusion_matrix_train[0,0]/(confusion_matrix_train[0,0]+confusion_matrix_train[0,1])
TPR_Train= confusion_matrix_train[1,1]/(confusion_matrix_train[1,0]+confusion_matrix_train[1,1])

print("Train TNR: ",TNR_Train)
print("Train TPR: ",TPR_Train)
print("Train Accuracy: ",Accuracy_Train)

Train TNR:  0.7927170868347339
Train TPR:  0.5777777777777777
Train Accuracy:  0.7206703910614525


In [86]:
Accuracy_Test=(confusion_matrix_test[0,0]+confusion_matrix_test[1,1])/(confusion_matrix_test[0,0]+confusion_matrix_test[0,1]+confusion_matrix_test[1,0]+confusion_matrix_test[1,1])
TNR_Test= confusion_matrix_test[0,0]/(confusion_matrix_test[0,0] +confusion_matrix_test[0,1])
TPR_Test= confusion_matrix_test[1,1]/(confusion_matrix_test[1,0] +confusion_matrix_test[1,1])

print("Test TNR: ",TNR_Test)
print("Test TPR: ",TPR_Test)
print("Test Accuracy: ",Accuracy_Test)

Test TNR:  0.8321678321678322
Test TPR:  0.6363636363636364
Test Accuracy:  0.7575757575757576


In [87]:
## Define function to Create Model
def create_model():
    model=Sequential()
    model.add(Dense(12,input_dim=8,activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
# Model Compile
    model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model

In [93]:
# Create Model
model = KerasClassifier(build_fn=create_model,verbose=0)

In [94]:
# Parameter Tuning
from sklearn.model_selection import GridSearchCV
Perceptron_grid = Sequential()
param_grid = {
    'epochs': [10,20,30,40],
'batch_size': [10,20,40,60,80,100]
}
# OR param_grid = dict(batch_size,epochs)
grid = GridSearchCV(estimator=model,param_grid=param_grid,n_jobs=-1)

In [None]:
## Fit the grid search model
grid_result = grid.fit(X_train,y_train)

In [None]:
## Print best score and parameters
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

In [None]:
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))