# Part 03: Supervised Learning - Heart Disease

This notebook focuses on training various classification models to predict heart disease.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import joblib
import warnings

warnings.filterwarnings('ignore')

## 1. Load and Split Data

In [2]:
df = pd.read_csv('heart_preprocessed.csv')
X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 2. Model Training

In [3]:
models = {
    "Logistic Regression": LogisticRegression(penalty='l2'),
    "KNN": KNeighborsClassifier(),
    "SVM (RBF)": SVC(kernel='rbf'),
    "Decision Tree": DecisionTreeClassifier(),
    "AdaBoost": AdaBoostClassifier(random_state=42),
    "MLP": MLPClassifier(max_iter=500, random_state=42)
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    results[name] = accuracy_score(y_test, y_pred)

## 3. Hyperparameter Optimization (Random Forest)

In [4]:
param_grid = {'n_estimators': [50, 100], 'max_depth': [None, 10]}
grid_rf = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5)
grid_rf.fit(X_train, y_train)
best_rf = grid_rf.best_estimator_
results["Random Forest (Optimized)"] = accuracy_score(y_test, best_rf.predict(X_test))

print("Training complete.")

# Save results and best model for the final notebook
joblib.dump(results, 'models_results.pkl')
joblib.dump(best_rf, 'best_model.pkl') # Assuming RF is good, or we pick real best later
X_test.to_csv('X_test.csv', index=False)
y_test.to_csv('y_test.csv', index=False)
print("Results and test data saved.")

Training complete.
Results and test data saved.
