In [None]:
from __future__ import print_function

# for data analysis
import pandas as pd 
import numpy as np

# for data visuals
import seaborn as sns 
import matplotlib.pyplot as plt 
%matplotlib inline 

# for deep neural network
import keras


from sklearn.preprocessing import MinMaxScaler


### Step 1: Reading data
#####  splitting data into 2 sets-train and test sets
##### anything with last column more than 1, we make to 1 as its a binary class problem problem
##### done by pandas to do so


In [None]:
# names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang','oldpeak','slope','ca','thal','num']
trainData = pd.read_csv('processed_cleveland_data_train.csv')
testData = pd.read_csv('processed_cleveland_data_test.csv')


# Parameter of interest is num, considering as binary problem thus not considering levels of severity 
def replace_predict(df):
    df['num'] = df['num'].replace([1, 2, 3, 4, 5, 6], 1)

        
replace_predict(trainData)
replace_predict(testData)
scaler = MinMaxScaler(feature_range=(0, 1))

Xtrain = trainData.drop(['num'], axis=1)
Ytrain = trainData['num']


Xtest = testData.drop(['num'], axis=1)
Ytest = testData['num']

Xtrain = scaler.fit_transform(Xtrain)
Xtest = scaler.fit_transform(Xtest)


In [None]:
sns.pointplot(x='age', y='num', data=trainData)

### Step 2: Define Model
#### This is binary classification model, so the model output is yes or no, thus making it ideal for the problem 
#### making use of keras sequential model using tensorflow, tensor flow has main algorthims, tensor flow looks diff., keras API using tensorflow algorthims to solve the problem



In [None]:

from keras.models import Sequential
from keras.layers import Dense,Dropout
# creating model
model = Sequential()
model.add(Dense(13, input_dim=13, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(6, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(5,activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))



 

### Step 3: Compile Model


In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

### Step 4: Fit model

In [None]:

model.fit(Xtrain, Ytrain, epochs=350, batch_size=8)


### Step 5: Evaluate Model


In [None]:
# evaluate the model
scores = model.evaluate(Xtest, Ytest)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

In [None]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve
from matplotlib import pyplot
from sklearn.metrics import roc_auc_score



probs = model.predict_proba(Xtest)


# calculate roc curve
fpr, tpr, thresholds = roc_curve(Ytest, probs)
# plot no skill
pyplot.plot([0, 1], [0, 1], linestyle='--')
# plot the roc curve for the model
pyplot.plot(fpr, tpr)
# show the plot
pyplot.show()
#calulate ROC AUC score
loss = roc_auc_score(Ytest, probs)
print(loss)

### Step 6: Save Model

In [None]:
from sklearn.metrics import matthews_corrcoef
count=0
matthewsCoeff=matthews_corrcoef(Ytest, scores )   
print(matthewsCoeff)

In [None]:
from keras.models import load_model

model.save('/Users/Sahithi/HeartModels/HeartDiseaseKeras.h5')

### Optional step- Prediction with the Model

In [None]:
predictions= model.predict(Xtrain)
rounded = [round(x[0]) for x in predictions]
print(rounded)


### Step 7: Cross Validation

In [None]:
from sklearn.model_selection import StratifiedKFold

seed = 7
np.random.seed(seed)

alldata = pd.read_csv('processed_cleveland_data.csv')
replace_predict(alldata)
Xall = alldata.drop(['num'], axis=1)
Yall = alldata['num']

# define 10-fold cross validation test harness
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
cvscores = []
for train_index, test_index in kfold.split(Xall, Yall):
  # create model
 model = Sequential()
 model.add(Dense(12, input_dim=13, activation='relu'))
 model.add(Dense(8, activation='relu'))
 model.add(Dense(1, activation='sigmoid'))
 # Compile model
 model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
 # Fit the model
 Xtrain, Xtest = Xall.values[train_index], Xall.values[test_index]
 Ytrain, Ytest = Yall.values[train_index], Yall.values[test_index]
 #print(Xtrain)
 #print(Ytrain)
 model.fit(Xtrain, Ytrain, epochs=350, batch_size=8, verbose=0)
 # evaluate the model
 scores = model.evaluate(Xall.values[test_index], Yall.values[test_index], verbose=0)
 print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
 cvscores.append(scores[1] * 100)
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))