<a href="https://colab.research.google.com/github/skjdfhkskjds/machine-learning-projects/blob/main/heart_disease_predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q git+https://github.com/tensorflow/docs

  Building wheel for tensorflow-docs (setup.py) ... [?25l[?25hdone


In [70]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.cluster import KMeans

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

In [71]:
url = 'https://raw.githubusercontent.com/skjdfhkskjds/machine-learning-projects/main/heart.csv'
df = pd.read_csv(url)
df.tail()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
913,45,M,TA,110,264,0,Normal,132,N,1.2,Flat,1
914,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat,1
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat,1
916,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat,1
917,38,M,NAP,138,175,0,Normal,173,N,0.0,Up,0


In [72]:
df.isnull().sum().sort_values(ascending=False)/df.shape[0]

HeartDisease      0.0
ST_Slope          0.0
Oldpeak           0.0
ExerciseAngina    0.0
MaxHR             0.0
RestingECG        0.0
FastingBS         0.0
Cholesterol       0.0
RestingBP         0.0
ChestPainType     0.0
Sex               0.0
Age               0.0
dtype: float64

In [73]:
features = ['Age','Sex','ChestPainType','RestingBP','Cholesterol','FastingBS','RestingECG','MaxHR','ExerciseAngina','Oldpeak','ST_Slope']
numerical_features = ['Age','RestingBP','Cholesterol','FastingBS','MaxHR','Oldpeak']
categorical_features = ['Sex','ChestPainType','RestingECG','ExerciseAngina','ST_Slope']

In [74]:
#Instantiate SimpleImputer 
si=SimpleImputer(missing_values = np.nan, strategy="median")
si.fit(df[numerical_features])
  
#Filling missing data with median
df[numerical_features] = si.transform(df[numerical_features])

In [75]:
for category in categorical_features:
  df[category] = pd.factorize(df[category])[0]

#Adds a new feature called type of patient based on the kmeans clustering of the dataset
kmeans = KMeans(n_clusters=2)
kmeans.fit(df[features])
df['typeOfPatient'] = kmeans.predict(df[features])

In [76]:
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease,typeOfPatient
0,40.0,0,0,140.0,289.0,0.0,0,172.0,0,0.0,0,0,0
1,49.0,1,1,160.0,180.0,0.0,0,156.0,0,1.0,1,1,0
2,37.0,0,0,130.0,283.0,0.0,1,98.0,0,0.0,0,0,0
3,48.0,1,2,138.0,214.0,0.0,0,108.0,1,1.5,1,1,0
4,54.0,0,1,150.0,195.0,0.0,0,122.0,0,0.0,0,0,0


In [77]:
#splits the dataset into 80:20 training and testing data
y = df.pop('HeartDisease')
dftrain, dftest, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=0)

In [78]:
from sklearn.preprocessing import StandardScaler
#Scaling numeric features using sklearn StandardScalar
sc=StandardScaler()
dftrain[numerical_features]=sc.fit_transform(dftrain[numerical_features])
dftest[numerical_features]=sc.transform(dftest[numerical_features])

In [79]:
model = keras.Sequential([
    keras.layers.Flatten(),
    keras.layers.Dense(16, activation=tf.nn.relu),
  	keras.layers.Dense(16, activation=tf.nn.relu),
    keras.layers.Dense(1, activation=tf.nn.sigmoid),
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [80]:
model.fit(dftrain, y_train, epochs=60, batch_size=1)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<keras.callbacks.History at 0x7fc20b97a890>

In [83]:
test_loss, test_acc = model.evaluate(dftest, y_test)
print('Test accuracy:', test_acc)

Test accuracy: 0.85326087474823


In [84]:
model.save('heart_disease_prediction.h5')