In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
df=pd.read_csv('/kaggle/input/health-care-data-set-on-heart-attack-possibility/heart.csv')
df.head()

# Basic Data Analysis

In [None]:
df.describe()

In [None]:
df.isnull().value_counts()

In [None]:
#From the above data analysis it has been  clear that the categorical and numercal columns.
#Categorical columns are:- sex,fbs,cp,restecg,exang,slope,ca,thal,target
#Numerical columns are:-'age','chol','trestbps','thalach','oldpeak'

# **Univariate and Bivariate Analysis**

In [None]:
def countplot(variable):
    var=df[variable]
    varCount=var.value_counts()
    fig,ax=plt.subplots(figsize=(5,5))
    ax.bar(varCount.index,varCount)
    plt.xticks(varCount.index,varCount.index.values)
    plt.ylabel('Frequency')
    plt.xlabel(variable)
    plt.show()

In [None]:
categorical=['sex','fbs','cp','restecg','exang','slope','ca','thal','target']
for c in categorical:
    countplot(c)

In [None]:
def hist_plot(variable):
    var=df[variable]
    n_bins=20
    fig,ax=plt.subplots(figsize=(5,5))
    ax.hist(var,bins=n_bins)
    plt.ylabel('Frequency')
    plt.show()

In [None]:
numerical=['age','chol','trestbps','thalach','oldpeak']
for c in numerical:
    hist_plot(c)

In [None]:
def box_plot(variable):
    var=df[variable]
    fig,axes=plt.subplots(figsize=(6,6))
    sns.boxplot('target',variable,data=df)
    plt.xlabel(variable)
    plt.ylabel('Frequency')
    plt.show()

In [None]:
categorical1=['age','chol','trestbps','thalach','oldpeak']
for c in categorical1:
    box_plot(c)

In [None]:
fig,ax=plt.subplots(figsize=(8,8))
sns.heatmap(df.corr(),cmap ="YlGnBu")

# **Data Preprocessing**

In [None]:
import tensorflow as tf
from tensorflow import keras
from sklearn import model_selection,preprocessing

In [None]:
#converting data to numpy arrays
X=np.array(df.drop('target',1))
y=np.array(df['target'])
y.shape=(len(y),1)
print(X.shape,y.shape)

In [None]:
X_train,X_test,y_train,y_test=model_selection.train_test_split(X,y,test_size=0.25)

In [None]:
from keras.utils import to_categorical
X1_train=preprocessing.scale(X_train)
X1_test=preprocessing.scale(X_test)
y1_train=to_categorical(y_train)
y1_test=to_categorical(y_test)

# Modelling with tensorflow backend

In [None]:
model=tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(40,activation=tf.nn.relu))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(40,activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(2,activation=tf.nn.softmax))

In [None]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
model_training=model.fit(X1_train,y1_train,epochs=15,validation_split=0.2)

# Plotting Accuracy and Loss Curves

In [None]:
plt.plot(model_training.history['accuracy'],color='blue')
plt.plot(model_training.history['val_accuracy'],color='violet')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.show()

In [None]:
plt.plot(model_training.history['loss'],color='blue')
plt.plot(model_training.history['val_loss'],color='violet')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.show()

# Improving model with GridSearchCV

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.activations import relu, sigmoid

In [None]:
def create_model(layers):
    model1=Sequential()
    for nodes in layers:
        model1.add(Dense(nodes,activation='relu'))
    model1.add(Dense(2,activation='softmax'))
    model1.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
    return model1

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
model2= KerasClassifier(build_fn=create_model, verbose=0)
model2

In [None]:
layers = [(40),(40,70),(60,40),(50,70),(90,70),(80,100)]
param_grid = dict(layers=layers, epochs=[20,40,60])
grid = GridSearchCV(estimator=model2, param_grid=param_grid,cv=5)
grid_result = grid.fit(X1_train, y1_train)

In [None]:
[grid_result.best_score_,grid_result.best_params_]

# Comparision between the models

In [None]:
bestClassifier=create_model((60,40))
HbestClassifier=bestClassifier.fit(X1_train,y1_train,batch_size=16,validation_split=0.25,epochs=20)

In [None]:
plt.plot(HbestClassifier.history['accuracy'],label='GridSearchCV Best Parameters')
plt.plot(model_training.history['accuracy'],label='Initial parameters')
plt.ylabel('Accuracy')
plt.xlabel('No. epoch')
plt.legend(loc="lower right")

# Model Evaluation

In [None]:
from sklearn.metrics import mean_squared_error
y_pred_best=bestClassifier.predict(X1_test)
y_pred_initial=model.predict(X1_test)
rmse=np.sqrt(mean_squared_error(y_pred_initial,y1_test))
print("Root mean squared error:{}.".format(rmse),"\nSize of test data :{}".format(y1_test.shape))

In [None]:
threshold=0.5
y_pred_initial=np.where(y_pred_initial>threshold,1.,0.)
y2_pred_initial=y_pred_initial.flatten()
y2_test_initial=y1_test.flatten()

In [None]:
threshold=0.5
y_pred_best=np.where(y_pred_best>threshold,1.,0.)
y2_pred_best=y_pred_best.flatten()
y2_test_best=y1_test.flatten()

In [None]:
model.evaluate(X1_test,y1_test)

In [None]:
bestClassifier.evaluate(X1_test,y1_test)

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix,classification_report
cm=confusion_matrix(y2_test_best,y2_pred_best)
sns.heatmap(cm,annot=True)

In [None]:
print(classification_report(y2_test_best,y2_pred_best))