In [1]:
import pandas as pd

file_path = 'https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data'

cols = ['age','sex','cp','trestbps','chol','fbs','restecg','thalach','exang','oldpeak','slope','ca','label']

data = pd.read_csv(file_path, header=None, names = cols)
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,label
63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0


In [2]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Float64Index: 303 entries, 63.0 to 38.0
Data columns (total 13 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    float64
 1   sex       303 non-null    float64
 2   cp        303 non-null    float64
 3   trestbps  303 non-null    float64
 4   chol      303 non-null    float64
 5   fbs       303 non-null    float64
 6   restecg   303 non-null    float64
 7   thalach   303 non-null    float64
 8   exang     303 non-null    float64
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    object 
 11  ca        303 non-null    object 
 12  label     303 non-null    int64  
dtypes: float64(10), int64(1), object(2)
memory usage: 33.1+ KB


In [3]:
cont_cols = ['age', 'trestbps', 'chol','thalach','oldpeak']

In [4]:
#count label distribution
from collections import Counter
print('classes and number of values in dataset', Counter(data['label']))

classes and number of values in dataset Counter({0: 164, 1: 55, 2: 36, 3: 35, 4: 13})


In [5]:
labels = data['label']
features = data.drop(['label'], axis = 1)


In [6]:
#dummy the objects features
features = pd.get_dummies(features)
features

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope_0.0,slope_1.0,slope_2.0,slope_3.0,slope_?,ca_3.0,ca_6.0,ca_7.0,ca_?
63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,1,0,0,0,0,0,1,0,0
67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,0,0,0,1,0,1,0,0,0
67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,0,0,1,0,0,0,0,1,0
37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,1,0,0,0,0,1,0,0,0
41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,1,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45.0,1.0,1.0,110.0,264.0,0.0,0.0,132.0,0.0,1.2,2.0,1,0,0,0,0,0,0,1,0
68.0,1.0,4.0,144.0,193.0,1.0,0.0,141.0,0.0,3.4,2.0,0,0,1,0,0,0,0,1,0
57.0,1.0,4.0,130.0,131.0,0.0,0.0,115.0,1.0,1.2,2.0,0,1,0,0,0,0,0,1,0
57.0,0.0,2.0,130.0,236.0,0.0,2.0,174.0,0.0,0.0,2.0,0,1,0,0,0,1,0,0,0


In [7]:
from sklearn.model_selection import train_test_split
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size = 0.2, random_state=23)


In [8]:
#scale the numeric features
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

ct = ColumnTransformer([('numeric', StandardScaler(), cont_cols)])

#transform the features
features_train=ct.fit_transform(features_train)
features_test=ct.transform(features_test)

In [9]:
#now the label
from sklearn.preprocessing import LabelEncoder
import tensorflow
from tensorflow.keras.utils import to_categorical

le = LabelEncoder()
#convert integer encoded labels into binary vector
labels_train = le.fit_transform(labels_train.astype(str))
labels_test = le.transform(labels_test.astype(str))

labels_train = to_categorical(labels_train, dtype='int64')
labels_test = to_categorical(labels_test, dtype='int64')


In [10]:
features_train.shape


(242, 5)

In [11]:
labels_train.shape

(242, 5)

In [15]:
#initiate the model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense
model = Sequential()

model.add(InputLayer(input_shape=(features_train.shape[1],)))

model.add(Dense(12, activation='relu'))
#number of neurons corresponding to number of classes in labels
model.add(Dense(5, activation='softmax'))

In [16]:
#compile
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics='accuracy')

In [17]:
#train and evaluate
model.fit(features_train, labels_train, epochs=100, batch_size=16, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7efc303212b0>

In [21]:
#evaluate model

loss, acc = model.evaluate(features_test,labels_test, verbose=1)
print('Loss: ',loss, "| Accuracy: ", acc)

Loss:  1.1254990100860596 | Accuracy:  0.5737704634666443


In [24]:
#generate classification report
from sklearn.metrics import classification_report
import numpy as np
labels_estimate=model.predict(features_test, verbose=1)

labels_estimate=np.argmax(labels_estimate, axis=1)
labels_true = np.argmax(labels_test, axis=1)
print(classification_report(labels_estimate,labels_true))

              precision    recall  f1-score   support

           0       0.97      0.62      0.75        52
           1       0.07      0.33      0.12         3
           2       0.50      0.33      0.40         6
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0

    accuracy                           0.57        61
   macro avg       0.31      0.26      0.25        61
weighted avg       0.88      0.57      0.69        61

