In [1]:
from sklearn import tree
import pandas as pd
import os
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv(os.path.join("data/HeartData.csv"))
df.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [3]:
target = df["DEATH_EVENT"]
target_names = ["Fatal", "Survived"]

In [4]:
data = df.drop("DEATH_EVENT", axis=1)
# heartdata = data.drop("time", axis=1)
feature_names = data.columns
# heartdata.head()

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=630)

# Decision Tree:

In [6]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.7733333333333333

In [7]:
clf

DecisionTreeClassifier()

# Random Forrest:

In [8]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=500)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.8666666666666667

In [9]:
sorted(zip(rf.feature_importances_, feature_names), reverse=True)

[(0.36068502451439916, 'time'),
 (0.1557019509639322, 'serum_creatinine'),
 (0.11174293847736799, 'ejection_fraction'),
 (0.08245906529646502, 'age'),
 (0.07916901938229899, 'creatinine_phosphokinase'),
 (0.0787424474921524, 'platelets'),
 (0.06711837849336781, 'serum_sodium'),
 (0.014407566420916791, 'sex'),
 (0.013327949056570953, 'diabetes'),
 (0.012854495989672424, 'smoking'),
 (0.012524959985569663, 'anaemia'),
 (0.011266203927286356, 'high_blood_pressure')]

# Logistic Regression:

In [10]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier

LogisticRegression()

In [11]:
classifier.fit(X_train, y_train)

LogisticRegression()

In [12]:
print(f"Training Data Score: {classifier.score(X_train, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test, y_test)}")

Training Data Score: 0.8169642857142857
Testing Data Score: 0.8666666666666667


In [13]:
predictions = classifier.predict(X_test)
print(f"First 10 Predictions:   {predictions[:10]}")
print(f"First 10 Actual labels: {y_test[:10].tolist()}")

First 10 Predictions:   [0 1 0 0 0 0 0 0 0 0]
First 10 Actual labels: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]


In [14]:
pd.DataFrame({"Prediction": predictions, "Actual": y_test}).reset_index(drop=True)

Unnamed: 0,Prediction,Actual
0,0,0
1,1,1
2,0,0
3,0,0
4,0,0
...,...,...
70,0,1
71,0,0
72,0,0
73,0,0


# SVC:

In [15]:
from sklearn.svm import SVC 
model = SVC(kernel='linear')
model.fit(X_train, y_train)

SVC(kernel='linear')

In [16]:
print('Test Acc: %.3f' % model.score(X_test, y_test))

Test Acc: 0.893


In [17]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
print(classification_report(y_test, predictions,
                            target_names=target_names))

              precision    recall  f1-score   support

       Fatal       0.93      0.93      0.93        59
    Survived       0.75      0.75      0.75        16

    accuracy                           0.89        75
   macro avg       0.84      0.84      0.84        75
weighted avg       0.89      0.89      0.89        75



# OneHot Encoding:

In [18]:
from tensorflow.keras.utils import to_categorical

y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)
y_train_categorical

array([[1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.

# Scaling the Data:

In [19]:
from sklearn.preprocessing import StandardScaler

X_scaler = StandardScaler().fit(X_train)

In [20]:
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [21]:
from tensorflow.keras.utils import to_categorical

In [22]:
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=24, activation='relu', input_dim=12))
model.add(Dense(units=2, activation='softmax'))

In [24]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 24)                312       
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 50        
Total params: 362
Trainable params: 362
Non-trainable params: 0
_________________________________________________________________


In [25]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [26]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 224 samples
Epoch 1/100
224/224 - 1s - loss: 0.7943 - accuracy: 0.4866
Epoch 2/100
224/224 - 0s - loss: 0.7607 - accuracy: 0.5134
Epoch 3/100
224/224 - 0s - loss: 0.7288 - accuracy: 0.5536
Epoch 4/100
224/224 - 0s - loss: 0.7021 - accuracy: 0.5759
Epoch 5/100
224/224 - 0s - loss: 0.6778 - accuracy: 0.6116
Epoch 6/100
224/224 - 0s - loss: 0.6539 - accuracy: 0.6384
Epoch 7/100
224/224 - 0s - loss: 0.6336 - accuracy: 0.6473
Epoch 8/100
224/224 - 0s - loss: 0.6147 - accuracy: 0.6518
Epoch 9/100
224/224 - 0s - loss: 0.5964 - accuracy: 0.6741
Epoch 10/100
224/224 - 0s - loss: 0.5800 - accuracy: 0.6875
Epoch 11/100
224/224 - 0s - loss: 0.5644 - accuracy: 0.6875
Epoch 12/100
224/224 - 0s - loss: 0.5493 - accuracy: 0.7188
Epoch 13/100
224/224 - 0s - loss: 0.5354 - accuracy: 0.7321
Epoch 14/100
224/224 - 0s - loss: 0.5232 - accuracy: 0.7321
Epoch 15/100
224/224 - 0s - loss: 0.5101 - accuracy: 0.7589
Epoch 16/100
224/224 - 0s - loss: 0.4982 - accuracy: 0.7723
Epoch 17/100
224/224 - 0s - 

<tensorflow.python.keras.callbacks.History at 0x24653b546c8>

In [27]:
deep_model = Sequential()
deep_model.add(Dense(units=24, activation='relu', input_dim=12))
deep_model.add(Dense(units=6, activation='relu'))
deep_model.add(Dense(units=2, activation='softmax'))

In [28]:
deep_model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 224 samples
Epoch 1/100
224/224 - 1s - loss: 0.7871 - accuracy: 0.4286
Epoch 2/100
224/224 - 0s - loss: 0.7469 - accuracy: 0.4732
Epoch 3/100
224/224 - 0s - loss: 0.7195 - accuracy: 0.5134
Epoch 4/100
224/224 - 0s - loss: 0.6957 - accuracy: 0.5714
Epoch 5/100
224/224 - 0s - loss: 0.6774 - accuracy: 0.6071
Epoch 6/100
224/224 - 0s - loss: 0.6650 - accuracy: 0.6339
Epoch 7/100
224/224 - 0s - loss: 0.6548 - accuracy: 0.6607
Epoch 8/100
224/224 - 0s - loss: 0.6446 - accuracy: 0.6741
Epoch 9/100
224/224 - 0s - loss: 0.6361 - accuracy: 0.6830
Epoch 10/100
224/224 - 0s - loss: 0.6278 - accuracy: 0.6964
Epoch 11/100
224/224 - 0s - loss: 0.6201 - accuracy: 0.7009
Epoch 12/100
224/224 - 0s - loss: 0.6104 - accuracy: 0.7054
Epoch 13/100
224/224 - 0s - loss: 0.6021 - accuracy: 0.7009
Epoch 14/100
224/224 - 0s - loss: 0.5918 - accuracy: 0.7188
Epoch 15/100
224/224 - 0s - loss: 0.5835 - accuracy: 0.7232
Epoch 16/100
224/224 - 0s - loss: 0.5736 - accuracy: 0.7366
Epoch 17/100
224/224 - 0s - 

<tensorflow.python.keras.callbacks.History at 0x24654ee7c88>

In [29]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

75/75 - 0s - loss: 0.4477 - accuracy: 0.8533
Normal Neural Network - Loss: 0.44774781703948974, Accuracy: 0.8533333539962769


In [30]:
model_loss, model_accuracy = deep_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

75/75 - 0s - loss: 0.4280 - accuracy: 0.8400
Deep Neural Network - Loss: 0.4280357313156128, Accuracy: 0.8399999737739563
