In [4]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [5]:
df = pd.read_csv("../data/heart.csv")

In [6]:
X = df.drop("HeartDisease", axis=1)
y = df["HeartDisease"]

# Vorverarbeitung (z. B. OneHotEncoding für Kategorien)
X = pd.get_dummies(X)





In [7]:
print(X)

     Age  RestingBP  Cholesterol  FastingBS  MaxHR  Oldpeak  Sex_F  Sex_M  \
0     40        140          289          0    172      0.0  False   True   
1     49        160          180          0    156      1.0   True  False   
2     37        130          283          0     98      0.0  False   True   
3     48        138          214          0    108      1.5   True  False   
4     54        150          195          0    122      0.0  False   True   
..   ...        ...          ...        ...    ...      ...    ...    ...   
913   45        110          264          0    132      1.2  False   True   
914   68        144          193          1    141      3.4  False   True   
915   57        130          131          0    115      1.2  False   True   
916   57        130          236          0    174      0.0   True  False   
917   38        138          175          0    173      0.0  False   True   

     ChestPainType_ASY  ChestPainType_ATA  ChestPainType_NAP  \
0          

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


First sklearn

In [9]:
model = RandomForestClassifier()

precision: $p=tp/(tp+fp)$,
recall: $r=tp/(tp+fn)$,
$F_\beta$: weighted precision and recall mean= $(1+\beta)^2 \frac{p \cdot r}{\beta^2 p+recall}$

In [10]:
model.fit(X_train, y_train)

print(classification_report(y_test, model.predict(X_test)))
joblib.dump(model, "../ml/model_sklearn.pkl")

              precision    recall  f1-score   support

           0       0.85      0.88      0.87       112
           1       0.92      0.90      0.91       164

    accuracy                           0.89       276
   macro avg       0.89      0.89      0.89       276
weighted avg       0.89      0.89      0.89       276



['../ml/model_sklearn.pkl']

High scores on first try 

Now PyTorch


In [40]:
import torch
import numpy as np
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

In [35]:
X_train_tensor = torch.tensor(X_train.values.astype(np.float32))
y_train_tensor = torch.tensor(y_train.values.astype(np.int64))

X_test_tensor = torch.tensor(X_test.values.astype(np.float32))
y_test_tensor = torch.tensor(y_test.values.astype(np.int64))

In [38]:
class HeartModel(nn.Module):
    def __init__(self, input_dim):
        super(HeartModel, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 2)
        )

    def forward(self, x):
        return self.layers(x)

In [41]:
model = HeartModel(input_dim=X_train.shape[1])
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train_data = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

# Training Loop
for epoch in range(20):
    for xb, yb in train_loader:
        pred = model(xb)
        loss = loss_fn(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}: Loss = {loss.item():.4f}")

Epoch 1: Loss = 1.0825
Epoch 2: Loss = 0.8711
Epoch 3: Loss = 0.2558
Epoch 4: Loss = 0.6479
Epoch 5: Loss = 0.3641
Epoch 6: Loss = 0.2575
Epoch 7: Loss = 0.5156
Epoch 8: Loss = 0.1942
Epoch 9: Loss = 0.6388
Epoch 10: Loss = 0.4615
Epoch 11: Loss = 0.3900
Epoch 12: Loss = 0.3403
Epoch 13: Loss = 0.1589
Epoch 14: Loss = 0.1086
Epoch 15: Loss = 0.2051
Epoch 16: Loss = 0.2639
Epoch 17: Loss = 0.2509
Epoch 18: Loss = 0.3568
Epoch 19: Loss = 0.1087
Epoch 20: Loss = 0.1219


Overfitting in epoch 20?

In [42]:
from sklearn.metrics import accuracy_score

In [43]:
model.eval()
with torch.no_grad():
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs, 1)

In [44]:
y_pred = predicted.numpy()
y_true = y_test_tensor.numpy()

In [45]:
print("Accuracy:", accuracy_score(y_true, y_pred))
print("Classification Report:")
print(classification_report(y_true, y_pred))

Accuracy: 0.8623188405797102
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.77      0.82       112
           1       0.85      0.93      0.89       164

    accuracy                           0.86       276
   macro avg       0.87      0.85      0.85       276
weighted avg       0.86      0.86      0.86       276



In [46]:
torch.save(model.state_dict(), "../ml/model_pytorch.pt")

In [47]:
#Load model
#model = HeartModel(input_dim=X_test.shape[1])
#model.load_state_dict(torch.load("ml/heart_model.pt"))
#model.eval()