# Tests of different classifiers – ISCHAEMIA

# OCPC

## OCPC with SVD

In [3]:
import pandas as pd
import numpy as np
import joblib
import os
import cv2
import time
from sklearn.model_selection import KFold, train_test_split
from sklearn.decomposition import TruncatedSVD

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
from ocpc_py import MultiClassPC
from codecarbon import EmissionsTracker
import json

# DATA LOADING
base_path = "../data/infection"
image_size = (64, 64)

dataset = []
for class_name, label in zip(["Aug-Positive", "Aug-Negative"], [1, 0]):
    folder = os.path.join(base_path, class_name)
    for image_name in os.listdir(folder):
        image_path = os.path.join(folder, image_name)
        dataset.append((image_path, label))

df = pd.DataFrame(dataset, columns=["image", "label"])

# Load, resize, normalize, and flatten images
def load_images(df, image_size):
    images, labels = [], []
    for _, row in df.iterrows():
        img = cv2.imread(row["image"])
        if img is not None:
            img = cv2.resize(img, image_size)
            img = img.astype("float32") / 255.0
            images.append(img.flatten())
            labels.append(row["label"])
        else:
            print(f"Image not loaded: {row['image']}")
    return np.array(images), np.array(labels)

X_all, y_all = load_images(df, image_size)
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# CROSS-VALIDATION
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accs, precs, recs, f1s, aucs = [], [], [], [], []

print("\n[Cross-Validation - Training]")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train), 1):
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train[train_idx], y_train[val_idx]

    svd = TruncatedSVD(n_components=50, random_state=80)
    X_tr_svd = svd.fit_transform(X_tr)
    X_val_svd = svd.transform(X_val)

    clf = MultiClassPC()
    clf.fit(X_tr_svd, y_tr)
    y_pred = clf.predict(X_val_svd)
    y_proba = clf.predict_proba(X_val_svd)[:, 1]

    accs.append(accuracy_score(y_val, y_pred))
    precs.append(precision_score(y_val, y_pred))
    recs.append(recall_score(y_val, y_pred))
    f1s.append(f1_score(y_val, y_pred))
    aucs.append(roc_auc_score(y_val, y_proba))

    print(f"\n[Fold {fold}]")
    print(f"Accuracy: {accs[-1]:.4f} | Precision: {precs[-1]:.4f} | Recall: {recs[-1]:.4f} | F1: {f1s[-1]:.4f} | AUC: {aucs[-1]:.4f}")

# Final K-Fold Metrics
print("\n[Average Metrics - Cross-Validation]")
print(f"Accuracy: {np.mean(accs):.4f} ± {np.std(accs):.4f}")
print(f"Precision: {np.mean(precs):.4f} ± {np.std(precs):.4f}")
print(f"Recall:   {np.mean(recs):.4f} ± {np.std(recs):.4f}")
print(f"F1-Score: {np.mean(f1s):.4f} ± {np.std(f1s):.4f}")
print(f"AUC:      {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")

# Final training and test evaluation
print("\n[Final Training and Test Evaluation]")
tracker = EmissionsTracker(log_level="ERROR")
tracker.start()

# Final SVD
svd_final = TruncatedSVD(n_components=50, random_state=80)
X_train_svd = svd_final.fit_transform(X_train)
X_test_svd = svd_final.transform(X_test)

# Final Classifier Training
clf_final = MultiClassPC()
clf_final.fit(X_train_svd, y_train)

# Stop tracker and save emissions
emissions = tracker.stop()

# Emissions
emissions_dir = "../reports/emissions"
os.makedirs(emissions_dir, exist_ok=True)
emissions_path = os.path.join(emissions_dir, "emissions_infection.json")
with open(emissions_path, "w") as f:
    json.dump({"emissions_kgCO2eq": emissions}, f)

print(f"\n[Carbon Footprint]")
print(f"Estimated emissions during the experiment: {emissions:.6f} kg CO₂eq")

# Test set evaluation
y_pred_test = clf_final.predict(X_test_svd)
y_proba_test = clf_final.predict_proba(X_test_svd)[:, 1]

print("\n[Test Set Performance]")
print(f"Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_test):.4f}")
print(f"Recall:   {recall_score(y_test, y_pred_test):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred_test):.4f}")
print(f"AUC:      {roc_auc_score(y_test, y_proba_test):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))

# Measuring Inference Time
inference_times = []
for x in X_test_svd:
    start = time.time()
    _ = clf_final.predict(x.reshape(1, -1))
    end = time.time()
    inference_times.append(end - start)

inference_times = np.array(inference_times)
print("\n[Inference Time]")
print(f"Average time per image: {np.mean(inference_times):.6f} s ± {np.std(inference_times):.6f} s")

# Save SVD and Model
output_dir = "../models/infection"
os.makedirs(output_dir, exist_ok=True)
svd_path = os.path.join(output_dir, "OCPC_svd.pkl")
clf_path = os.path.join(output_dir, "OCPC_model.pkl")
joblib.dump(svd_final, svd_path)
joblib.dump(clf_final, clf_path)


[Cross-Validation - Training]

[Fold 1]
Accuracy: 0.5313 | Precision: 0.5274 | Recall: 0.4569 | F1: 0.4896 | AUC: 0.5508

[Fold 2]
Accuracy: 0.5610 | Precision: 0.5659 | Recall: 0.5275 | F1: 0.5461 | AUC: 0.5789

[Fold 3]
Accuracy: 0.5594 | Precision: 0.5461 | Recall: 0.4845 | F1: 0.5135 | AUC: 0.5781

[Fold 4]
Accuracy: 0.5372 | Precision: 0.5482 | Recall: 0.5208 | F1: 0.5342 | AUC: 0.5448





[Fold 5]
Accuracy: 0.5743 | Precision: 0.5973 | Recall: 0.5471 | F1: 0.5711 | AUC: 0.6066

[Average Metrics - Cross-Validation]
Accuracy: 0.5526 ± 0.0160
Precision: 0.5570 ± 0.0236
Recall:   0.5074 ± 0.0324
F1-Score: 0.5309 ± 0.0278
AUC:      0.5718 ± 0.0222

[Final Training and Test Evaluation]

[Carbon Footprint]
Estimated emissions during the experiment: 0.000034 kg CO₂eq

[Test Set Performance]
Accuracy: 0.5424
Precision: 0.5470
Recall:   0.4941
F1-Score: 0.5192
AUC:      0.5640
Confusion Matrix:
[[348 241]
 [298 291]]

[Inference Time]
Average time per image: 0.000838 s ± 0.000206 s


['../models/infection\\OCPC_model.pkl']

## OCPC with PCA

In [1]:
import pandas as pd
import numpy as np
import joblib
import os
import cv2
import time
from sklearn.model_selection import KFold, train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
from ocpc_py import MultiClassPC
from codecarbon import EmissionsTracker
import json

# Configuration
base_path = "../data/infection"
image_size = (64, 64)

# Load dataset
dataset = []
for class_name, label in zip(["Aug-Positive", "Aug-Negative"], [1, 0]):
    folder = os.path.join(base_path, class_name)
    for image_name in os.listdir(folder):
        image_path = os.path.join(folder, image_name)
        dataset.append((image_path, label))

df = pd.DataFrame(dataset, columns=["image", "label"])

# Function to load and preprocess images
def load_images(df, image_size):
    images, labels = [], []
    for _, row in df.iterrows():
        img = cv2.imread(row["image"])
        if img is not None:
            img = cv2.resize(img, image_size)
            img = img.astype("float32") / 255.0
            images.append(img.flatten())
            labels.append(row["label"])
        else:
            print(f"Image not loaded: {row['image']}")
    return np.array(images), np.array(labels)

X_all, y_all = load_images(df, image_size)

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# Cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accs, precs, recs, f1s, aucs = [], [], [], [], []

print("\n[Cross-Validation - Training]")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train), 1):
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train[train_idx], y_train[val_idx]

    # PCA transformation
    pca = PCA(n_components=50)
    X_tr_pca = pca.fit_transform(X_tr)
    X_val_pca = pca.transform(X_val)

    # Train classifier
    clf = MultiClassPC()
    clf.fit(X_tr_pca, y_tr)
    y_pred = clf.predict(X_val_pca)
    y_proba = clf.predict_proba(X_val_pca)[:, 1]

    # Calculate metrics
    accs.append(accuracy_score(y_val, y_pred))
    precs.append(precision_score(y_val, y_pred))
    recs.append(recall_score(y_val, y_pred))
    f1s.append(f1_score(y_val, y_pred))
    aucs.append(roc_auc_score(y_val, y_proba))

    print(f"\n[Fold {fold}]")
    print(f"Accuracy: {accs[-1]:.4f} | Precision: {precs[-1]:.4f} | Recall: {recs[-1]:.4f} | F1: {f1s[-1]:.4f} | AUC: {aucs[-1]:.4f}")

# Average metrics across folds
print("\n[Average Metrics - Cross-Validation]")
print(f"Accuracy: {np.mean(accs):.4f} ± {np.std(accs):.4f}")
print(f"Precision: {np.mean(precs):.4f} ± {np.std(precs):.4f}")
print(f"Recall:   {np.mean(recs):.4f} ± {np.std(recs):.4f}")
print(f"F1-Score: {np.mean(f1s):.4f} ± {np.std(f1s):.4f}")
print(f"AUC:      {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")

# Final training and test evaluation
print("\n[Final Training and Test Evaluation]")
tracker = EmissionsTracker(log_level="ERROR")
tracker.start()

# Final PCA transformation
pca_final = PCA(n_components=50)
X_train_pca = pca_final.fit_transform(X_train)
X_test_pca = pca_final.transform(X_test)

# Train final classifier
clf_final = MultiClassPC()
clf_final.fit(X_train_pca, y_train)

# Stop carbon tracker and save emissions
emissions = tracker.stop()
emissions_dir = "../reports/emissions"
os.makedirs(emissions_dir, exist_ok=True)
emissions_path = os.path.join(emissions_dir, "emissions_infection.json")
with open(emissions_path, "w") as f:
    json.dump({"emissions_kgCO2eq": emissions}, f)

print(f"\n[Carbon Footprint]")
print(f"Estimated emissions during the experiment: {emissions:.6f} kg CO₂eq")

# Evaluate on test set
y_pred_test = clf_final.predict(X_test_pca)
y_proba_test = clf_final.predict_proba(X_test_pca)[:, 1]

print("\n[Test Set Performance]")
print(f"Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_test):.4f}")
print(f"Recall:   {recall_score(y_test, y_pred_test):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred_test):.4f}")
print(f"AUC:      {roc_auc_score(y_test, y_proba_test):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))

# Measure inference time
inference_times = []
for x in X_test_pca:
    start = time.time()
    _ = clf_final.predict(x.reshape(1, -1))
    end = time.time()
    inference_times.append(end - start)

inference_times = np.array(inference_times)
print("\n[Inference Time]")
print(f"Average time per image: {np.mean(inference_times):.6f} s ± {np.std(inference_times):.6f} s")

# Save PCA and final model
output_dir = "../models/infection"
os.makedirs(output_dir, exist_ok=True)
pca_path = os.path.join(output_dir, "OCPC_pca_infection.pkl")
clf_path = os.path.join(output_dir, "OCPC_pca_model_infection.pkl")
joblib.dump(pca_final, pca_path)
joblib.dump(clf_final, clf_path)



[Cross-Validation - Training]

[Fold 1]
Accuracy: 0.5398 | Precision: 0.5375 | Recall: 0.4634 | F1: 0.4977 | AUC: 0.5702

[Fold 2]
Accuracy: 0.5461 | Precision: 0.5468 | Recall: 0.5445 | F1: 0.5456 | AUC: 0.5565

[Fold 3]
Accuracy: 0.5998 | Precision: 0.5831 | Recall: 0.5819 | F1: 0.5825 | AUC: 0.6097

[Fold 4]
Accuracy: 0.5393 | Precision: 0.5498 | Recall: 0.5292 | F1: 0.5393 | AUC: 0.5381





[Fold 5]
Accuracy: 0.5679 | Precision: 0.5976 | Recall: 0.5082 | F1: 0.5493 | AUC: 0.5859

[Average Metrics - Cross-Validation]
Accuracy: 0.5586 ± 0.0231
Precision: 0.5630 ± 0.0232
Recall:   0.5254 ± 0.0393
F1-Score: 0.5429 ± 0.0271
AUC:      0.5721 ± 0.0245

[Final Training and Test Evaluation]

[Carbon Footprint]
Estimated emissions during the experiment: 0.000054 kg CO₂eq

[Test Set Performance]
Accuracy: 0.5238
Precision: 0.5293
Recall:   0.4295
F1-Score: 0.4742
AUC:      0.5427
Confusion Matrix:
[[364 225]
 [336 253]]

[Inference Time]
Average time per image: 0.001608 s ± 0.000600 s


['../models/infection\\OCPC_pca_model_infection.pkl']