# **4. Model Training & Evaluation**

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split

df_reduced = pd.read_csv("../data/heart_reduced.csv")

X_reduced = df_reduced.drop(columns='num')
y = df_reduced['num'].astype(int)

## **4.1 Supervised Learning – Classification Models**


In [7]:
# 1. Train/Test Split
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_reduced, y, test_size=0.2, stratify=y, random_state=42)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (237, 9)
X_test shape: (60, 9)
y_train shape: (237,)
y_test shape: (60,)


In [8]:
# 2. Train Models
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Define models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000, solver="liblinear", class_weight="balanced"),
    "Decision Tree": DecisionTreeClassifier(random_state=42, class_weight="balanced"),
    "Random Forest": RandomForestClassifier(n_estimators=300, random_state=42, class_weight="balanced_subsample"),
    "SVM": SVC(kernel="rbf", probability=True, class_weight="balanced", decision_function_shape="ovr", random_state=42)
}

# Train models
for name, model in models.items():
    model.fit(X_train, y_train)
    print(f"{name} trained.")


Logistic Regression trained.
Decision Tree trained.
Random Forest trained.
SVM trained.


In [None]:
# 3. Evaluate Models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

results = []

for name, model in models.items():
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

    results.append([name, acc, prec, rec, f1])

# Show Results in a table
results_df = pd.DataFrame(results, columns=["Model", "Accuracy", "Precision", "Recall", "F1 Score"]) # Updated column names
results_df.set_index("Model", inplace=True)
print(results_df)

                     Accuracy  Precision    Recall  F1 Score
Model                                                       
Logistic Regression  0.766667   0.822796  0.766667  0.790929
Decision Tree        0.816667   0.794937  0.816667  0.803225
Random Forest        0.816667   0.692090  0.816667  0.749235
SVM                  0.716667   0.872917  0.716667  0.773232




---

