# Tests of different classifiers – ISCHAEMIA

## OCPC with SVD

In [20]:
import pandas as pd
import numpy as np
import joblib
import os
import cv2
import time
from sklearn.model_selection import KFold, train_test_split
from sklearn.decomposition import TruncatedSVD

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
from ocpc_py import MultiClassPC
from codecarbon import EmissionsTracker
import json

# DATA LOADING
base_path = "../data/ischaemia"
image_size = (64, 64)

dataset = []
for class_name, label in zip(["Aug-Positive", "Aug-Negative"], [1, 0]):
    folder = os.path.join(base_path, class_name)
    for image_name in os.listdir(folder):
        image_path = os.path.join(folder, image_name)
        dataset.append((image_path, label))

df = pd.DataFrame(dataset, columns=["image", "label"])

# Load, resize, normalize, and flatten images
def load_images(df, image_size):
    images, labels = [], []
    for _, row in df.iterrows():
        img = cv2.imread(row["image"])
        if img is not None:
            img = cv2.resize(img, image_size)
            img = img.astype("float32") / 255.0
            images.append(img.flatten())
            labels.append(row["label"])
        else:
            print(f"Image not loaded: {row['image']}")
    return np.array(images), np.array(labels)

X_all, y_all = load_images(df, image_size)
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# CROSS-VALIDATION
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accs, precs, recs, f1s, aucs = [], [], [], [], []

print("\n[Cross-Validation - Training]")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train), 1):
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train[train_idx], y_train[val_idx]

    svd = TruncatedSVD(n_components=50, random_state=80)
    X_tr_svd = svd.fit_transform(X_tr)
    X_val_svd = svd.transform(X_val)

    clf = MultiClassPC()
    clf.fit(X_tr_svd, y_tr)
    y_pred = clf.predict(X_val_svd)
    y_proba = clf.predict_proba(X_val_svd)[:, 1]

    accs.append(accuracy_score(y_val, y_pred))
    precs.append(precision_score(y_val, y_pred))
    recs.append(recall_score(y_val, y_pred))
    f1s.append(f1_score(y_val, y_pred))
    aucs.append(roc_auc_score(y_val, y_proba))

    print(f"\n[Fold {fold}]")
    print(f"Accuracy: {accs[-1]:.4f} | Precision: {precs[-1]:.4f} | Recall: {recs[-1]:.4f} | F1: {f1s[-1]:.4f} | AUC: {aucs[-1]:.4f}")

# Final K-Fold Metrics
print("\n[Average Metrics - Cross-Validation]")
print(f"Accuracy: {np.mean(accs):.4f} ± {np.std(accs):.4f}")
print(f"Precision: {np.mean(precs):.4f} ± {np.std(precs):.4f}")
print(f"Recall:   {np.mean(recs):.4f} ± {np.std(recs):.4f}")
print(f"F1-Score: {np.mean(f1s):.4f} ± {np.std(f1s):.4f}")
print(f"AUC:      {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")

# Final training and test evaluation
print("\n[Final Training and Test Evaluation]")
tracker = EmissionsTracker(log_level="ERROR")
tracker.start()

# Final SVD
svd_final = TruncatedSVD(n_components=50, random_state=80)
X_train_svd = svd_final.fit_transform(X_train)
X_test_svd = svd_final.transform(X_test)

# Final Classifier Training
clf_final = MultiClassPC()
clf_final.fit(X_train_svd, y_train)

# Stop tracker and save emissions
emissions = tracker.stop()

# Emissions
emissions_dir = "../reports/emissions"
os.makedirs(emissions_dir, exist_ok=True)
emissions_path = os.path.join(emissions_dir, "emissions_ischaemia.json")
with open(emissions_path, "w") as f:
    json.dump({"emissions_kgCO2eq": emissions}, f)

print(f"\n[Carbon Footprint]")
print(f"Estimated emissions during the experiment: {emissions:.6f} kg CO₂eq")

# Test set evaluation
y_pred_test = clf_final.predict(X_test_svd)
y_proba_test = clf_final.predict_proba(X_test_svd)[:, 1]

print("\n[Test Set Performance]")
print(f"Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_test):.4f}")
print(f"Recall:   {recall_score(y_test, y_pred_test):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred_test):.4f}")
print(f"AUC:      {roc_auc_score(y_test, y_proba_test):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))

# Measuring Inference Time
inference_times = []
for x in X_test_svd:
    start = time.time()
    _ = clf_final.predict(x.reshape(1, -1))
    end = time.time()
    inference_times.append(end - start)

inference_times = np.array(inference_times)
print("\n[Inference Time]")
print(f"Average time per image: {np.mean(inference_times):.6f} s ± {np.std(inference_times):.6f} s")

# Save SVD and Model
output_dir = "../models/ischaemia"
os.makedirs(output_dir, exist_ok=True)
svd_path = os.path.join(output_dir, "OCPC_svd.pkl")
clf_path = os.path.join(output_dir, "OCPC_model.pkl")
joblib.dump(svd_final, svd_path)
joblib.dump(clf_final, clf_path)


[Cross-Validation - Training]

[Fold 1]
Accuracy: 0.7120 | Precision: 0.7316 | Recall: 0.6675 | F1: 0.6981 | AUC: 0.7838

[Fold 2]
Accuracy: 0.7049 | Precision: 0.7384 | Recall: 0.6622 | F1: 0.6982 | AUC: 0.7713

[Fold 3]
Accuracy: 0.7226 | Precision: 0.7454 | Recall: 0.6671 | F1: 0.7041 | AUC: 0.7961

[Fold 4]
Accuracy: 0.7099 | Precision: 0.7277 | Recall: 0.6525 | F1: 0.6880 | AUC: 0.7819

[Fold 5]
Accuracy: 0.6928 | Precision: 0.7079 | Recall: 0.6587 | F1: 0.6824 | AUC: 0.7753

[Average Metrics - Cross-Validation]
Accuracy: 0.7085 ± 0.0097
Precision: 0.7302 ± 0.0127
Recall:   0.6616 ± 0.0056
F1-Score: 0.6941 ± 0.0078
AUC:      0.7817 ± 0.0085

[Final Training and Test Evaluation]

[Carbon Footprint]
Estimated emissions during the experiment: 0.000082 kg CO₂eq

[Test Set Performance]
Accuracy: 0.7082
Precision: 0.7231
Recall:   0.6748
F1-Score: 0.6981
AUC:      0.7744
Confusion Matrix:
[[732 255]
 [321 666]]

[Inference Time]
Average time per image: 0.000759 s ± 0.000132 s


['../models/ischaemia\\OCPC_model.pkl']

## OCPC with PCA

In [22]:
import pandas as pd
import numpy as np
import joblib
import os
import cv2
import time
from sklearn.model_selection import KFold, train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
from ocpc_py import MultiClassPC
from codecarbon import EmissionsTracker
import json

# Configuration
base_path = "../data/ischaemia"
image_size = (64, 64)

# Load dataset
dataset = []
for class_name, label in zip(["Aug-Positive", "Aug-Negative"], [1, 0]):
    folder = os.path.join(base_path, class_name)
    for image_name in os.listdir(folder):
        image_path = os.path.join(folder, image_name)
        dataset.append((image_path, label))

df = pd.DataFrame(dataset, columns=["image", "label"])

# Function to load and preprocess images
def load_images(df, image_size):
    images, labels = [], []
    for _, row in df.iterrows():
        img = cv2.imread(row["image"])
        if img is not None:
            img = cv2.resize(img, image_size)
            img = img.astype("float32") / 255.0
            images.append(img.flatten())
            labels.append(row["label"])
        else:
            print(f"Image not loaded: {row['image']}")
    return np.array(images), np.array(labels)

X_all, y_all = load_images(df, image_size)

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# Cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accs, precs, recs, f1s, aucs = [], [], [], [], []

print("\n[Cross-Validation - Training]")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train), 1):
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train[train_idx], y_train[val_idx]

    # PCA transformation
    pca = PCA(n_components=50)
    X_tr_pca = pca.fit_transform(X_tr)
    X_val_pca = pca.transform(X_val)

    # Train classifier
    clf = MultiClassPC()
    clf.fit(X_tr_pca, y_tr)
    y_pred = clf.predict(X_val_pca)
    y_proba = clf.predict_proba(X_val_pca)[:, 1]

    # Calculate metrics
    accs.append(accuracy_score(y_val, y_pred))
    precs.append(precision_score(y_val, y_pred))
    recs.append(recall_score(y_val, y_pred))
    f1s.append(f1_score(y_val, y_pred))
    aucs.append(roc_auc_score(y_val, y_proba))

    print(f"\n[Fold {fold}]")
    print(f"Accuracy: {accs[-1]:.4f} | Precision: {precs[-1]:.4f} | Recall: {recs[-1]:.4f} | F1: {f1s[-1]:.4f} | AUC: {aucs[-1]:.4f}")

# Average metrics across folds
print("\n[Average Metrics - Cross-Validation]")
print(f"Accuracy: {np.mean(accs):.4f} ± {np.std(accs):.4f}")
print(f"Precision: {np.mean(precs):.4f} ± {np.std(precs):.4f}")
print(f"Recall:   {np.mean(recs):.4f} ± {np.std(recs):.4f}")
print(f"F1-Score: {np.mean(f1s):.4f} ± {np.std(f1s):.4f}")
print(f"AUC:      {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")

# Final training and test evaluation
print("\n[Final Training and Test Evaluation]")
tracker = EmissionsTracker(log_level="ERROR")
tracker.start()

# Final PCA transformation
pca_final = PCA(n_components=50)
X_train_pca = pca_final.fit_transform(X_train)
X_test_pca = pca_final.transform(X_test)

# Train final classifier
clf_final = MultiClassPC()
clf_final.fit(X_train_pca, y_train)

# Stop carbon tracker and save emissions
emissions = tracker.stop()
emissions_dir = "../reports/emissions"
os.makedirs(emissions_dir, exist_ok=True)
emissions_path = os.path.join(emissions_dir, "emissions_ischaemia.json")
with open(emissions_path, "w") as f:
    json.dump({"emissions_kgCO2eq": emissions}, f)

print(f"\n[Carbon Footprint]")
print(f"Estimated emissions during the experiment: {emissions:.6f} kg CO₂eq")

# Evaluate on test set
y_pred_test = clf_final.predict(X_test_pca)
y_proba_test = clf_final.predict_proba(X_test_pca)[:, 1]

print("\n[Test Set Performance]")
print(f"Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_test):.4f}")
print(f"Recall:   {recall_score(y_test, y_pred_test):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred_test):.4f}")
print(f"AUC:      {roc_auc_score(y_test, y_proba_test):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))

# Measure inference time
inference_times = []
for x in X_test_pca:
    start = time.time()
    _ = clf_final.predict(x.reshape(1, -1))
    end = time.time()
    inference_times.append(end - start)

inference_times = np.array(inference_times)
print("\n[Inference Time]")
print(f"Average time per image: {np.mean(inference_times):.6f} s ± {np.std(inference_times):.6f} s")

# Save PCA and final model
output_dir = "../models/ischaemia"
os.makedirs(output_dir, exist_ok=True)
pca_path = os.path.join(output_dir, "OCPC_pca.pkl")
clf_path = os.path.join(output_dir, "OCPC_pca_model.pkl")
joblib.dump(pca_final, pca_path)
joblib.dump(clf_final, clf_path)



[Cross-Validation - Training]

[Fold 1]
Accuracy: 0.7177 | Precision: 0.7388 | Recall: 0.6713 | F1: 0.7035 | AUC: 0.7873

[Fold 2]
Accuracy: 0.7049 | Precision: 0.7417 | Recall: 0.6560 | F1: 0.6962 | AUC: 0.7753

[Fold 3]
Accuracy: 0.7112 | Precision: 0.7386 | Recall: 0.6440 | F1: 0.6881 | AUC: 0.7867

[Fold 4]
Accuracy: 0.7131 | Precision: 0.7323 | Recall: 0.6537 | F1: 0.6908 | AUC: 0.7870

[Fold 5]
Accuracy: 0.7049 | Precision: 0.7229 | Recall: 0.6662 | F1: 0.6934 | AUC: 0.7818

[Average Metrics - Cross-Validation]
Accuracy: 0.7104 ± 0.0050
Precision: 0.7349 ± 0.0067
Recall:   0.6583 ± 0.0096
F1-Score: 0.6944 ± 0.0053
AUC:      0.7836 ± 0.0046

[Final Training and Test Evaluation]

[Carbon Footprint]
Estimated emissions during the experiment: 0.000075 kg CO₂eq

[Test Set Performance]
Accuracy: 0.7087
Precision: 0.7220
Recall:   0.6788
F1-Score: 0.6997
AUC:      0.7751
Confusion Matrix:
[[729 258]
 [317 670]]

[Inference Time]
Average time per image: 0.000756 s ± 0.000142 s


['../models/ischaemia\\OCPC_pca_model.pkl']

# Random Forest

## Random Forest without PCA

In [23]:
import pandas as pd
import numpy as np
import os
import cv2
import time
from sklearn.model_selection import train_test_split, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
import joblib
from codecarbon import EmissionsTracker
import json

# Configuration
base_path = "../data/ischaemia"
image_size = (64, 64)

# Load dataset
dataset = []
for class_name, label in zip(["Aug-Positive", "Aug-Negative"], [1, 0]):
    folder = os.path.join(base_path, class_name)
    for image_name in os.listdir(folder):
        image_path = os.path.join(folder, image_name)
        dataset.append((image_path, label))

df = pd.DataFrame(dataset, columns=["image", "label"])

# Function to load and preprocess images
def load_images(df, image_size):
    images, labels = [], []
    for _, row in df.iterrows():
        img = cv2.imread(row["image"])
        if img is not None:
            img = cv2.resize(img, image_size)
            img = img.astype("float32") / 255.0
            images.append(img.flatten())
            labels.append(row["label"])
        else:
            print(f"Image not loaded: {row['image']}")
    return np.array(images), np.array(labels)

X_all, y_all = load_images(df, image_size)

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# Cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accs, precs, recs, f1s, aucs = [], [], [], [], []

print("\n[Cross-Validation - Training]")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train), 1):
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train[train_idx], y_train[val_idx]

    # Train Random Forest classifier
    clf = RandomForestClassifier(
        n_estimators=300,
        max_depth=20,
        min_samples_split=2,
        min_samples_leaf=1,
        class_weight='balanced',
        random_state=42,
        n_jobs=-1
    )
    clf.fit(X_tr, y_tr)

    y_pred = clf.predict(X_val)
    y_proba = clf.predict_proba(X_val)[:, 1]

    # Compute metrics
    accs.append(accuracy_score(y_val, y_pred))
    precs.append(precision_score(y_val, y_pred))
    recs.append(recall_score(y_val, y_pred))
    f1s.append(f1_score(y_val, y_pred))
    aucs.append(roc_auc_score(y_val, y_proba))

    print(f"\n[Fold {fold}]")
    print(f"Accuracy: {accs[-1]:.4f} | Precision: {precs[-1]:.4f} | Recall: {recs[-1]:.4f} | F1: {f1s[-1]:.4f} | AUC: {aucs[-1]:.4f}")

# Average metrics across folds
print("\n[Average Metrics - Cross-Validation]")
print(f"Accuracy: {np.mean(accs):.4f} ± {np.std(accs):.4f}")
print(f"Precision: {np.mean(precs):.4f} ± {np.std(precs):.4f}")
print(f"Recall:   {np.mean(recs):.4f} ± {np.std(recs):.4f}")
print(f"F1-Score: {np.mean(f1s):.4f} ± {np.std(f1s):.4f}")
print(f"AUC:      {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")

# Final training and test evaluation
print("\n[Final Training and Test Evaluation]")
tracker = EmissionsTracker(log_level="ERROR")
tracker.start()

clf_final = RandomForestClassifier(
    n_estimators=300,
    max_depth=20,
    min_samples_split=2,
    min_samples_leaf=1,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)
clf_final.fit(X_train, y_train)

# Stop carbon tracker and save emissions
emissions = tracker.stop()
emissions_dir = "../reports/emissions"
os.makedirs(emissions_dir, exist_ok=True)
emissions_path = os.path.join(emissions_dir, "emissions_ischaemia_rf.json")
with open(emissions_path, "w") as f:
    json.dump({"emissions_kgCO2eq": emissions}, f)

print(f"\n[Carbon Footprint]")
print(f"Estimated emissions during training: {emissions:.6f} kg CO₂eq")

# Evaluate on test set
y_pred_test = clf_final.predict(X_test)
y_proba_test = clf_final.predict_proba(X_test)[:, 1]

print("\n[Test Set Performance]")
print(f"Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_test):.4f}")
print(f"Recall:   {recall_score(y_test, y_pred_test):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred_test):.4f}")
print(f"AUC:      {roc_auc_score(y_test, y_proba_test):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))

# Measure inference time
inference_times = []
for x in X_test:
    start = time.time()
    _ = clf_final.predict([x])
    end = time.time()
    inference_times.append(end - start)

inference_times = np.array(inference_times)
print("\n[Inference Time]")
print(f"Average time per image: {np.mean(inference_times):.6f} s ± {np.std(inference_times):.6f} s")

# Save final model
output_dir = "../models/ischaemia"
os.makedirs(output_dir, exist_ok=True)
clf_path = os.path.join(output_dir, "random_forest_ischaemia.pkl")
joblib.dump(clf_final, clf_path)



[Cross-Validation - Training]

[Fold 1]
Accuracy: 0.8684 | Precision: 0.8607 | Recall: 0.8782 | F1: 0.8693 | AUC: 0.9446

[Fold 2]
Accuracy: 0.8740 | Precision: 0.8745 | Recall: 0.8821 | F1: 0.8783 | AUC: 0.9444

[Fold 3]
Accuracy: 0.8835 | Precision: 0.8681 | Recall: 0.9014 | F1: 0.8844 | AUC: 0.9513

[Fold 4]
Accuracy: 0.8765 | Precision: 0.8535 | Recall: 0.9031 | F1: 0.8776 | AUC: 0.9490

[Fold 5]
Accuracy: 0.8847 | Precision: 0.8745 | Recall: 0.8989 | F1: 0.8865 | AUC: 0.9566

[Average Metrics - Cross-Validation]
Accuracy: 0.8774 ± 0.0061
Precision: 0.8663 ± 0.0082
Recall:   0.8927 ± 0.0105
F1-Score: 0.8792 ± 0.0060
AUC:      0.9492 ± 0.0045

[Final Training and Test Evaluation]

[Carbon Footprint]
Estimated emissions during training: 0.000280 kg CO₂eq

[Test Set Performance]
Accuracy: 0.8906
Precision: 0.8806
Recall:   0.9037
F1-Score: 0.8920
AUC:      0.9594
Confusion Matrix:
[[866 121]
 [ 95 892]]

[Inference Time]
Average time per image: 0.051098 s ± 0.019895 s


['../models/ischaemia\\random_forest_ischaemia.pkl']