# Notebook 04 — Model Training & Evaluation

Train 6 scikit-learn classifiers on extracted features, evaluate with cross-validation,
and produce detailed metrics.

**Classifiers:**
1. Support Vector Machine (SVM)
2. Random Forest
3. Decision Tree
4. K-Nearest Neighbors (KNN)
5. Logistic Regression
6. Gradient Boosting

In [None]:
import sys, os
sys.path.insert(0, os.path.abspath('../src'))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    classification_report, confusion_matrix, accuracy_score,
    f1_score, precision_score, recall_score
)
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

from visualization import plot_confusion_matrix, plot_model_comparison

sns.set_theme(style='whitegrid')
%matplotlib inline

## 1. Load Extracted Features

In [None]:
# Load XPQRS features (from Notebook 03)
xpqrs_df = pd.read_csv('../results/tables/xpqrs_features.csv')
print(f'XPQRS features: {xpqrs_df.shape}')

# Load PQ Disturbances features
pq_df = pd.read_csv('../results/tables/pq_features.csv')
print(f'PQ Disturbances features: {pq_df.shape}')

## 2. Define Classifiers and Training Pipeline

In [None]:
CLASSIFIERS = {
    'SVM': SVC(kernel='rbf', C=1, gamma='scale', random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, max_depth=None, random_state=42, n_jobs=-1),
    'Decision Tree': DecisionTreeClassifier(max_depth=20, criterion='entropy', random_state=42),
    'KNN': KNeighborsClassifier(n_neighbors=5, weights='distance', n_jobs=-1),
    'Logistic Regression': LogisticRegression(C=1.0, solver='lbfgs', max_iter=2000, random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, learning_rate=0.1,
                                                    max_depth=5, random_state=42),
}


def train_and_evaluate(df, dataset_name, classifiers=CLASSIFIERS):
    """Train all classifiers and return results."""
    # Prepare data
    le = LabelEncoder()
    feature_cols = [c for c in df.columns if c != 'label']
    X = df[feature_cols].values
    y = le.fit_transform(df['label'])
    class_names = le.classes_

    # Replace inf/nan
    X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0)

    # Stratified train-test split (80/20)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    print(f'\n{"="*60}')
    print(f'Dataset: {dataset_name}')
    print(f'Train: {X_train.shape}, Test: {X_test.shape}')
    print(f'Classes: {len(class_names)}')
    print(f'{"="*60}')

    results = {}
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    for name, clf in classifiers.items():
        print(f'\n--- {name} ---')

        # Build pipeline
        pipe = Pipeline([
            ('scaler', StandardScaler()),
            ('clf', clf)
        ])

        # 5-fold cross-validation
        cv_results = cross_validate(
            pipe, X_train, y_train, cv=cv,
            scoring=['accuracy', 'f1_macro'],
            return_train_score=False, n_jobs=-1
        )

        cv_acc = cv_results['test_accuracy']
        cv_f1  = cv_results['test_f1_macro']
        print(f'  CV Accuracy: {cv_acc.mean():.4f} (+/- {cv_acc.std():.4f})')
        print(f'  CV F1 Macro: {cv_f1.mean():.4f} (+/- {cv_f1.std():.4f})')

        # Train on full training set, evaluate on test set
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        f1  = f1_score(y_test, y_pred, average='macro')
        prec = precision_score(y_test, y_pred, average='macro')
        rec  = recall_score(y_test, y_pred, average='macro')

        print(f'  Test Accuracy : {acc:.4f}')
        print(f'  Test F1 Macro : {f1:.4f}')
        print(f'  Test Precision: {prec:.4f}')
        print(f'  Test Recall   : {rec:.4f}')

        results[name] = {
            'pipeline': pipe,
            'accuracy': acc,
            'f1_macro': f1,
            'precision_macro': prec,
            'recall_macro': rec,
            'cv_accuracy_mean': cv_acc.mean(),
            'cv_accuracy_std': cv_acc.std(),
            'cv_f1_mean': cv_f1.mean(),
            'cv_f1_std': cv_f1.std(),
            'y_test': y_test,
            'y_pred': y_pred,
            'class_names': class_names,
        }

        # Save model
        model_path = f'../results/models/{dataset_name}_{name.replace(" ", "_").lower()}.pkl'
        joblib.dump(pipe, model_path)

    return results, le, class_names

## 3. Train on XPQRS Dataset

In [None]:
%%time
xpqrs_results, xpqrs_le, xpqrs_classes = train_and_evaluate(xpqrs_df, 'xpqrs')

In [None]:
# Results summary table
summary_rows = []
for name, res in xpqrs_results.items():
    summary_rows.append({
        'Model': name,
        'CV Accuracy': f"{res['cv_accuracy_mean']:.4f} ± {res['cv_accuracy_std']:.4f}",
        'Test Accuracy': f"{res['accuracy']:.4f}",
        'Test F1 (Macro)': f"{res['f1_macro']:.4f}",
        'Test Precision': f"{res['precision_macro']:.4f}",
        'Test Recall': f"{res['recall_macro']:.4f}",
    })

xpqrs_summary = pd.DataFrame(summary_rows).sort_values('Test Accuracy', ascending=False)
xpqrs_summary.to_csv('../results/tables/xpqrs_model_results.csv', index=False)
xpqrs_summary

In [None]:
# Model comparison bar chart
fig = plot_model_comparison(
    {name: res for name, res in xpqrs_results.items()},
    metric='accuracy',
    title='XPQRS — Model Accuracy Comparison'
)
fig.savefig('../results/figures/xpqrs_model_accuracy.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Confusion matrices for top 3 models
sorted_models = sorted(xpqrs_results.items(), key=lambda x: x[1]['accuracy'], reverse=True)

for name, res in sorted_models[:3]:
    fig = plot_confusion_matrix(
        res['y_test'], res['y_pred'], xpqrs_classes,
        title=f'XPQRS — {name} (Accuracy: {res["accuracy"]:.4f})',
        figsize=(14, 12)
    )
    fig.savefig(f'../results/figures/xpqrs_cm_{name.replace(" ", "_").lower()}.png',
                dpi=150, bbox_inches='tight')
    plt.show()

In [None]:
# Detailed classification report for best model
best_name, best_res = sorted_models[0]
print(f'Best model: {best_name} (Accuracy: {best_res["accuracy"]:.4f})\n')
print(classification_report(best_res['y_test'], best_res['y_pred'],
                            target_names=xpqrs_classes))

## 4. Train on PQ Disturbances Dataset

In [None]:
%%time
pq_results, pq_le, pq_classes = train_and_evaluate(pq_df, 'pq_disturbances')

In [None]:
# Results summary table
pq_summary_rows = []
for name, res in pq_results.items():
    pq_summary_rows.append({
        'Model': name,
        'CV Accuracy': f"{res['cv_accuracy_mean']:.4f} ± {res['cv_accuracy_std']:.4f}",
        'Test Accuracy': f"{res['accuracy']:.4f}",
        'Test F1 (Macro)': f"{res['f1_macro']:.4f}",
        'Test Precision': f"{res['precision_macro']:.4f}",
        'Test Recall': f"{res['recall_macro']:.4f}",
    })

pq_summary = pd.DataFrame(pq_summary_rows).sort_values('Test Accuracy', ascending=False)
pq_summary.to_csv('../results/tables/pq_model_results.csv', index=False)
pq_summary

In [None]:
fig = plot_model_comparison(
    {name: res for name, res in pq_results.items()},
    metric='accuracy',
    title='PQ Disturbances — Model Accuracy Comparison'
)
fig.savefig('../results/figures/pq_model_accuracy.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Confusion matrices for top 3 models
pq_sorted = sorted(pq_results.items(), key=lambda x: x[1]['accuracy'], reverse=True)

for name, res in pq_sorted[:3]:
    fig = plot_confusion_matrix(
        res['y_test'], res['y_pred'], pq_classes,
        title=f'PQ Disturbances — {name} (Accuracy: {res["accuracy"]:.4f})',
        figsize=(12, 10)
    )
    fig.savefig(f'../results/figures/pq_cm_{name.replace(" ", "_").lower()}.png',
                dpi=150, bbox_inches='tight')
    plt.show()

In [None]:
# Detailed classification report for best PQ model
pq_best_name, pq_best_res = pq_sorted[0]
print(f'Best model: {pq_best_name} (Accuracy: {pq_best_res["accuracy"]:.4f})\n')
print(classification_report(pq_best_res['y_test'], pq_best_res['y_pred'],
                            target_names=pq_classes))

---
**Next:** [05_results_comparison.ipynb](05_results_comparison.ipynb) — Cross-dataset comparison and detailed analysis.