**Machine Learning Prototype Model For Detecting Heart Disease**

## **Load Data**

In [None]:
import pandas as pd
import numpy as np
dataframe = pd.read_csv("https://raw.githubusercontent.com/dataprofessor/data/master/heart-disease-cleveland.csv")
dataframe = dataframe.replace(r'^\s*$', np.nan, regex=True)
dataframe.replace('?', np.nan, inplace=True)
dataframe.dropna(inplace=True)
dataframe

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,diagnosis
0,63,1,1,145,233,1,2,150,0,2.3,3,0.0,6.0,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3.0,3.0,2
2,67,1,4,120,229,0,2,129,1,2.6,2,2.0,7.0,1
3,37,1,3,130,250,0,0,187,0,3.5,3,0.0,3.0,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0.0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
297,57,0,4,140,241,0,0,123,1,0.2,2,0.0,7.0,1
298,45,1,1,110,264,0,0,132,0,1.2,2,0.0,7.0,1
299,68,1,4,144,193,1,0,141,0,3.4,2,2.0,7.0,2
300,57,1,4,130,131,0,0,115,1,1.2,2,1.0,7.0,3


## **Prep Data**

### Seperation of Data as x & y

In [None]:
y = dataframe[' diagnosis']
y

0      0
1      2
2      1
3      0
4      0
      ..
297    1
298    1
299    2
300    3
301    1
Name:  diagnosis, Length: 297, dtype: int64

In [None]:
x = dataframe.drop(' diagnosis', axis=1)
x

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,63,1,1,145,233,1,2,150,0,2.3,3,0.0,6.0
1,67,1,4,160,286,0,2,108,1,1.5,2,3.0,3.0
2,67,1,4,120,229,0,2,129,1,2.6,2,2.0,7.0
3,37,1,3,130,250,0,0,187,0,3.5,3,0.0,3.0
4,41,0,2,130,204,0,2,172,0,1.4,1,0.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
297,57,0,4,140,241,0,0,123,1,0.2,2,0.0,7.0
298,45,1,1,110,264,0,0,132,0,1.2,2,0.0,7.0
299,68,1,4,144,193,1,0,141,0,3.4,2,2.0,7.0
300,57,1,4,130,131,0,0,115,1,1.2,2,1.0,7.0


## **Split Data**

### Data splits into the traning set and the testing set

In [None]:
from sklearn.model_selection import train_test_split

x_train , x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=100)

In [None]:
x_train

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
119,65,1,4,135,254,0,2,127,0,2.8,2,1.0,7.0
292,44,1,4,120,169,0,0,144,1,2.8,3,0.0,6.0
59,51,1,1,125,213,0,2,125,1,1.4,1,1.0,3.0
69,46,1,3,150,231,0,0,147,0,3.6,2,0.0,3.0
161,77,1,4,125,304,0,2,162,1,0.0,1,3.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
66,60,1,3,140,185,0,2,155,0,3.0,2,0.0,3.0
53,44,1,2,130,219,0,2,188,0,0.0,1,0.0,3.0
79,58,1,4,150,270,0,2,111,1,0.8,1,0.0,7.0
284,61,1,4,148,203,0,0,161,0,0.0,1,1.0,7.0


In [None]:
x_test

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
223,53,1,4,123,282,0,0,95,1,2.0,2,2.0,7.0
140,59,1,2,140,221,0,0,164,1,0.0,1,0.0,3.0
226,47,1,4,112,204,0,0,143,0,0.1,1,0.0,3.0
177,56,1,4,132,184,0,2,105,1,2.1,2,1.0,6.0
232,49,1,3,118,149,0,2,126,0,0.8,1,3.0,3.0
135,55,0,2,135,250,0,2,161,0,1.4,2,0.0,3.0
129,62,0,4,124,209,0,0,163,0,0.0,1,0.0,3.0
89,51,0,3,130,256,0,2,149,0,0.5,1,0.0,3.0
27,66,0,1,150,226,0,0,114,0,2.6,3,0.0,3.0
296,59,1,4,164,176,1,2,90,0,1.0,2,2.0,6.0


# **Model Build**

## **Linear Regression**

In [None]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(x_train, y_train)

### **Prediction**

In [None]:
y_lr_train_prediction = lr.predict(x_train)
y_lr_test_prediction = lr.predict(x_test)

In [None]:
y_lr_train_prediction

array([ 2.07964128,  1.66061044,  0.81661023,  0.78918821,  1.84865503,
        0.04001404,  0.32348598,  2.03973295,  0.02437289,  2.78911852,
        0.6852664 , -0.05216778,  0.17475995,  1.65259888,  1.70728283,
        1.89361187,  2.03855784,  1.63716511,  0.80793806,  1.77765008,
        0.01741333,  0.25494099,  0.88953267, -0.4774165 ,  0.52766416,
        0.25712931,  1.96305029,  0.98553977,  0.43085723,  0.57705333,
        2.41395864,  0.31976887,  0.17436904,  0.44786338,  0.67956617,
        1.82982853,  2.20946275,  2.66821619,  0.02054695,  0.98202528,
        1.49336692,  0.05485977,  3.15132113,  2.10734093,  0.27054283,
        1.20810148,  2.26963922,  0.98264393,  2.933495  , -0.11086425,
        1.57341977,  1.98165411,  1.64830345,  0.23514462,  1.35451915,
       -0.07740637,  1.37420624,  0.47918349,  2.76222392,  2.76761688,
       -0.32256069, -0.18641826,  0.33837249,  2.49611149,  0.36904946,
        0.58670785,  2.39750419,  0.21563557,  0.53569193,  0.18

In [None]:
y_lr_test_prediction

array([ 2.62894286,  0.02893768,  0.30872144,  2.39393446,  1.87258158,
        0.1103663 , -0.06003947,  0.17173684,  0.26200521,  2.67033596,
        1.73504538,  0.16326789,  0.98695486, -0.44885599, -0.01360375,
       -0.04976004,  2.68885267,  0.36747946,  1.19551878,  2.17896717,
        0.58923508, -0.28495622, -0.08922241,  2.76507812,  0.65345094,
        0.94065287,  0.6613139 ,  1.31179743,  1.59430981,  2.07936722,
        1.54748836,  1.20591455,  0.21570396,  1.94525936, -0.47888135,
        1.16503063, -0.62494396,  1.45015242,  0.21758138, -0.11425529,
        0.2470878 ,  3.2004657 ,  1.2449256 , -0.0247847 ,  0.05572587,
       -0.05568949,  0.7303852 ,  1.01503969,  3.21111183, -0.01568559,
        1.78403412,  1.79578817,  2.87213974,  1.83253191,  0.43403638,
        2.29732099,  0.95074335, -0.27918323,  0.16370876,  0.43869171])

### **Model Performance Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

lr_train_mse = mean_squared_error(y_train, y_lr_train_prediction)
lr_train_r2 = r2_score(y_train, y_lr_train_prediction)

lr_test_mse = mean_squared_error(y_test, y_lr_test_prediction)
lr_test_r2 = r2_score(y_test, y_lr_test_prediction)

In [None]:
lr_results = pd.DataFrame(['Linear regression', lr_train_mse, lr_train_r2, lr_test_mse, lr_test_r2]).transpose()
lr_results.columns = ['Method', 'Training MSE', 'Training R2', 'Test MSE', 'Test R2']

In [None]:
lr_results

Unnamed: 0,Method,Training MSE,Training R2,Test MSE,Test R2
0,Linear regression,0.684583,0.548788,0.623015,0.591019


## **Random Forest**

In [None]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(max_depth=2, random_state=100)
rf.fit(x_train, y_train)

### **Prediction**

In [None]:
y_rf_train_prediction = rf.predict(x_train)
y_rf_test_prediction = rf.predict(x_test)

### **Model Performance Evaluation**

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

rf_train_mse = mean_squared_error(y_train, y_rf_train_prediction)
rf_train_r2 = r2_score(y_train, y_rf_train_prediction)

rf_test_mse = mean_squared_error(y_test, y_rf_test_prediction)
rf_test_r2 = r2_score(y_test, y_rf_test_prediction)

In [None]:
rf_results = pd.DataFrame(['Random forest', rf_train_mse, rf_train_r2, rf_test_mse, rf_test_r2]).transpose()
rf_results.columns = ['Method', 'Training MSE', 'Training R2', 'Test MSE', 'Test R2']
rf_results

Unnamed: 0,Method,Training MSE,Training R2,Test MSE,Test R2
0,Random forest,0.715096,0.528676,0.779958,0.487993


## **Logistic Regression**

Sigmoid Function

In [94]:
import tensorflow as tf

model = tf.keras.models.Sequential()

In [99]:
model = tf.keras.models.Sequential()

scaler = StandardScaler()

x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

model.add(tf.keras.layers.Dense(256, input_shape=(x_train_scaled.shape[1],), activation='sigmoid'))
model.add(tf.keras.layers.Dense(256, activation='sigmoid'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(x_train_scaled, y_train, epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.src.callbacks.History at 0x7cb9cc526110>

In [100]:
model.evaluate(x_test, y_test)



[-396.3887634277344, 0.5833333134651184]