In [10]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [11]:
df = pd.read_csv("../data/heart.csv", encoding="latin1")

In [12]:
X = df.drop("HeartDisease", axis=1)
y = df["HeartDisease"]

# Vorverarbeitung (z. B. OneHotEncoding für Kategorien)
X = pd.get_dummies(X)
#speichern der reihenfolge für API
with open("../ml/training_columns.txt", "w") as f:
    for col in X.columns:
        f.write(f"{col}\n")



In [13]:
print(X)

     Age  RestingBP  Cholesterol  FastingBS  MaxHR  Oldpeak  Sex_F  Sex_M  \
0     40        140          289          0    172      0.0  False   True   
1     49        160          180          0    156      1.0   True  False   
2     37        130          283          0     98      0.0  False   True   
3     48        138          214          0    108      1.5   True  False   
4     54        150          195          0    122      0.0  False   True   
..   ...        ...          ...        ...    ...      ...    ...    ...   
913   45        110          264          0    132      1.2  False   True   
914   68        144          193          1    141      3.4  False   True   
915   57        130          131          0    115      1.2  False   True   
916   57        130          236          0    174      0.0   True  False   
917   38        138          175          0    173      0.0  False   True   

     ChestPainType_ASY  ChestPainType_ATA  ChestPainType_NAP  \
0          

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


First sklearn

In [15]:
model = RandomForestClassifier()

precision: $p=tp/(tp+fp)$,
recall: $r=tp/(tp+fn)$,
$F_\beta$: weighted precision and recall mean= $(1+\beta)^2 \frac{p \cdot r}{\beta^2 p+recall}$

In [16]:
model.fit(X_train, y_train)

print(classification_report(y_test, model.predict(X_test)))
joblib.dump(model, "../ml/model_sklearn.pkl")

              precision    recall  f1-score   support

           0       0.82      0.88      0.85       112
           1       0.92      0.87      0.89       164

    accuracy                           0.88       276
   macro avg       0.87      0.88      0.87       276
weighted avg       0.88      0.88      0.88       276



['../ml/model_sklearn.pkl']

In [17]:
df_sql=df.assign( prediction=model.predict(X))
print(df_sql)

     Age Sex ChestPainType  RestingBP  Cholesterol  FastingBS RestingECG  \
0     40   M           ATA        140          289          0     Normal   
1     49   F           NAP        160          180          0     Normal   
2     37   M           ATA        130          283          0         ST   
3     48   F           ASY        138          214          0     Normal   
4     54   M           NAP        150          195          0     Normal   
..   ...  ..           ...        ...          ...        ...        ...   
913   45   M            TA        110          264          0     Normal   
914   68   M           ASY        144          193          1     Normal   
915   57   M           ASY        130          131          0     Normal   
916   57   F           ATA        130          236          0        LVH   
917   38   M           NAP        138          175          0     Normal   

     MaxHR ExerciseAngina  Oldpeak ST_Slope  HeartDisease  prediction  
0      172     

In [18]:
from sqlalchemy import create_engine

In [19]:
engine = create_engine("postgresql://postgres:xamyadt123@localhost:5432/patients",echo=True)
df_sql.to_sql("patients_with_prediction", engine, if_exists="replace", index=False)

2025-07-30 15:52:09,447 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2025-07-30 15:52:09,448 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-07-30 15:52:09,450 INFO sqlalchemy.engine.Engine select current_schema()
2025-07-30 15:52:09,451 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-07-30 15:52:09,454 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2025-07-30 15:52:09,454 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-07-30 15:52:09,456 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-30 15:52:09,462 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s, %(param_2)s, %(param_3)s, %(param_4)s, %(param_5)s]) AND pg_catalog.pg_table_is_visible(pg_catalog.pg_class.oid) AND pg_catalog.pg_namespace.nspname != %(nspname

918

High scores on first try 

Now PyTorch


In [20]:
import torch
import numpy as np
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import sys
import os
sys.path.append(os.path.abspath(".."))
from ml.model import HeartModel

In [21]:
X_train_tensor = torch.tensor(X_train.values.astype(np.float32))
y_train_tensor = torch.tensor(y_train.values.astype(np.int64))

X_test_tensor = torch.tensor(X_test.values.astype(np.float32))
y_test_tensor = torch.tensor(y_test.values.astype(np.int64))

In [22]:
model = HeartModel(input_dim=X_train.shape[1])
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

train_data = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

# Training Loop
for epoch in range(30):
    for xb, yb in train_loader:
        pred = model(xb)
        loss = loss_fn(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}: Loss = {loss.item():.4f}")

Epoch 1: Loss = 0.6238
Epoch 2: Loss = 0.0061
Epoch 3: Loss = 0.1428
Epoch 4: Loss = 0.0688
Epoch 5: Loss = 1.2099
Epoch 6: Loss = 0.0160
Epoch 7: Loss = 0.2019
Epoch 8: Loss = 0.1068
Epoch 9: Loss = 0.0642
Epoch 10: Loss = 0.1198
Epoch 11: Loss = 1.2605
Epoch 12: Loss = 0.0280
Epoch 13: Loss = 0.1046
Epoch 14: Loss = 0.3113
Epoch 15: Loss = 0.0828
Epoch 16: Loss = 0.1754
Epoch 17: Loss = 0.1401
Epoch 18: Loss = 0.1041
Epoch 19: Loss = 0.1906
Epoch 20: Loss = 0.6942
Epoch 21: Loss = 0.0246
Epoch 22: Loss = 0.1293
Epoch 23: Loss = 0.2569
Epoch 24: Loss = 2.0976
Epoch 25: Loss = 2.0964
Epoch 26: Loss = 1.2105
Epoch 27: Loss = 0.4577
Epoch 28: Loss = 0.1188
Epoch 29: Loss = 0.1515
Epoch 30: Loss = 0.0252


In [23]:
from sklearn.metrics import accuracy_score

In [24]:
model.eval()
with torch.no_grad():
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs, 1)

In [25]:
y_pred = predicted.numpy()
y_true = y_test_tensor.numpy()

In [26]:
print("Accuracy:", accuracy_score(y_true, y_pred))
print("Classification Report:")
print(classification_report(y_true, y_pred))

Accuracy: 0.8913043478260869
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.83      0.86       112
           1       0.89      0.93      0.91       164

    accuracy                           0.89       276
   macro avg       0.89      0.88      0.89       276
weighted avg       0.89      0.89      0.89       276



In [27]:
torch.save(model.state_dict(), "../ml/model_pytorch.pt")

In [28]:
#Load model
#model = HeartModel(input_dim=X_test.shape[1])
#model.load_state_dict(torch.load("ml/heart_model.pt"))
#model.eval()