In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from keras.models import Sequential
from keras.layers import Dense,Dropout
import os,warnings
warnings.filterwarnings('ignore')
from keras.utils import to_categorical
from sklearn.metrics import confusion_matrix,classification_report

In [None]:
path='../input/pima-indians-diabetes-database'
filename=os.path.join(path,'diabetes.csv')
df=pd.read_csv(filename)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe().T

<h4>Filling 0 values with mean of column values</h4>

In [None]:
cols_to_fill=['Glucose','BloodPressure','SkinThickness','Insulin','BMI']
for i in cols_to_fill:
    df[i]=df[i].replace(0,df[i].mean())

In [None]:
df.describe().T

<h4>Correlation Matrix</h4>

In [None]:
plt.figure(figsize=(10,10))
corr_matrix=df.corr()
sns.heatmap(corr_matrix,annot=True,square=True,fmt='.2f',cmap='RdYlGn')

<h4>Lets not remove any features as all features seems to be significant</h4>

<h4>Distribution of outcome variable</h4>

In [None]:
sns.countplot(df['Outcome'])

In [None]:
df['Outcome'].value_counts()

In [None]:
class_0=df['Outcome'].value_counts()[0]
class_1=df['Outcome'].value_counts()[1]
sum=class_0+class_1
sum

In [None]:
print('0 class has distribution of {:.2f}%'.format(100*(class_0/sum)))
print('1 class has distribution of {:.2f}%'.format(100*(class_1/sum)))

<h4>Since class 1 has a distribution of 35% we cannot call this dataset as imbalanced</h4>

<h4>Splitting into features and target</h4>

In [None]:
X=df.drop(['Outcome'],axis=1)
y=df['Outcome']

<h3>Scaling the features</h3>

In [None]:
X_cols=X.columns
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_transformed=sc.fit_transform(X)
X_transformed_df=pd.DataFrame(X_transformed,columns=X_cols)

<h4>Splitting the data into training and validation datasets</h4>

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_val,y_train,y_val=train_test_split(X_transformed_df,y,test_size=0.25,stratify=y)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)

<h3>Modelling the data using Artificial Neural Network</h3>

In [None]:
model=Sequential()
model.add(Dense(128,input_dim=8,activation='relu'))
model.add(Dense(256,activation='relu'))
model.add(Dense(512,activation='relu'))
model.add(Dense(1024,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='SGD',loss='binary_crossentropy',metrics=['accuracy'])
model.fit(X_train,y_train,batch_size=50,epochs=100)

In [None]:
y_preds=model.predict_classes(X_val,verbose=0)


In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_val,y_preds)

In [None]:
def evaluate_predictions(y_test,y_preds):
    cnf_matrix=confusion_matrix(y_test,y_preds)
    sns.heatmap(pd.DataFrame(cnf_matrix),annot=True,square=True,cmap='YlGnBu',fmt='g')    
    print(classification_report(y_test,y_preds))
evaluate_predictions(y_val,y_preds)    