# Tests of different classifiers – ISCHAEMIA

# OCPC

## OCPC with SVD

In [9]:
import pandas as pd
import numpy as np
import joblib
import os
import cv2
import time
from sklearn.model_selection import KFold, train_test_split
from sklearn.decomposition import TruncatedSVD

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
from ocpc_py import MultiClassPC
from codecarbon import EmissionsTracker
import json

# DATA LOADING
base_path = "../data/ischaemia"
image_size = (256, 256)

dataset = []
for class_name, label in zip(["Aug-Positive", "Aug-Negative"], [1, 0]):
    folder = os.path.join(base_path, class_name)
    for image_name in os.listdir(folder):
        image_path = os.path.join(folder, image_name)
        dataset.append((image_path, label))

df = pd.DataFrame(dataset, columns=["image", "label"])

# Load, resize, normalize, and flatten images
def load_images(df, image_size):
    images, labels = [], []
    for _, row in df.iterrows():
        img = cv2.imread(row["image"])
        if img is not None:
            img = cv2.resize(img, image_size)
            img = img.astype("float32") / 255.0
            images.append(img.flatten())
            labels.append(row["label"])
        else:
            print(f"Image not loaded: {row['image']}")
    return np.array(images), np.array(labels)

X_all, y_all = load_images(df, image_size)
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# CROSS-VALIDATION
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accs, precs, recs, f1s, aucs = [], [], [], [], []

print("\n[Cross-Validation - Training]")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train), 1):
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train[train_idx], y_train[val_idx]

    svd = TruncatedSVD(n_components=100, random_state=80)
    X_tr_svd = svd.fit_transform(X_tr)
    X_val_svd = svd.transform(X_val)

    clf = MultiClassPC()
    clf.fit(X_tr_svd, y_tr)
    y_pred = clf.predict(X_val_svd)
    y_proba = clf.predict_proba(X_val_svd)[:, 1]

    accs.append(accuracy_score(y_val, y_pred))
    precs.append(precision_score(y_val, y_pred))
    recs.append(recall_score(y_val, y_pred))
    f1s.append(f1_score(y_val, y_pred))
    aucs.append(roc_auc_score(y_val, y_proba))

    print(f"\n[Fold {fold}]")
    print(f"Accuracy: {accs[-1]:.4f} | Precision: {precs[-1]:.4f} | Recall: {recs[-1]:.4f} | F1: {f1s[-1]:.4f} | AUC: {aucs[-1]:.4f}")

# Final K-Fold Metrics
print("\n[Average Metrics - Cross-Validation]")
print(f"Accuracy: {np.mean(accs):.4f} ± {np.std(accs):.4f}")
print(f"Precision: {np.mean(precs):.4f} ± {np.std(precs):.4f}")
print(f"Recall:   {np.mean(recs):.4f} ± {np.std(recs):.4f}")
print(f"F1-Score: {np.mean(f1s):.4f} ± {np.std(f1s):.4f}")
print(f"AUC:      {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")

# Final training and test evaluation
print("\n[Final Training and Test Evaluation]")
tracker = EmissionsTracker(log_level="ERROR")
tracker.start()

# Final SVD
svd_final = TruncatedSVD(n_components=50, random_state=80)
X_train_svd = svd_final.fit_transform(X_train)
X_test_svd = svd_final.transform(X_test)

# Final Classifier Training
clf_final = MultiClassPC()
clf_final.fit(X_train_svd, y_train)

# Stop tracker and save emissions
emissions = tracker.stop()

# Emissions
emissions_dir = "../reports/emissions"
os.makedirs(emissions_dir, exist_ok=True)
emissions_path = os.path.join(emissions_dir, "emissions_ischaemia.json")
with open(emissions_path, "w") as f:
    json.dump({"emissions_kgCO2eq": emissions}, f)

print(f"\n[Carbon Footprint]")
print(f"Estimated emissions during the experiment: {emissions:.6f} kg CO₂eq")

# Test set evaluation
y_pred_test = clf_final.predict(X_test_svd)
y_proba_test = clf_final.predict_proba(X_test_svd)[:, 1]

print("\n[Test Set Performance]")
print(f"Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_test):.4f}")
print(f"Recall:   {recall_score(y_test, y_pred_test):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred_test):.4f}")
print(f"AUC:      {roc_auc_score(y_test, y_proba_test):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))

# Measuring Inference Time
inference_times = []
for x in X_test_svd:
    start = time.time()
    _ = clf_final.predict(x.reshape(1, -1))
    end = time.time()
    inference_times.append(end - start)

inference_times = np.array(inference_times)
print("\n[Inference Time]")
print(f"Average time per image: {np.mean(inference_times):.6f} s ± {np.std(inference_times):.6f} s")

# Save SVD and Model
output_dir = "../models/ischaemia"
os.makedirs(output_dir, exist_ok=True)
svd_path = os.path.join(output_dir, "OCPC_svd.pkl")
clf_path = os.path.join(output_dir, "OCPC_model.pkl")
joblib.dump(svd_final, svd_path)
joblib.dump(clf_final, clf_path)


[Cross-Validation - Training]

[Fold 1]
Accuracy: 0.7329 | Precision: 0.7622 | Recall: 0.6751 | F1: 0.7160 | AUC: 0.8018

[Fold 2]
Accuracy: 0.6966 | Precision: 0.7248 | Recall: 0.6634 | F1: 0.6928 | AUC: 0.7667

[Fold 3]
Accuracy: 0.7099 | Precision: 0.7291 | Recall: 0.6581 | F1: 0.6918 | AUC: 0.7853

[Fold 4]
Accuracy: 0.7080 | Precision: 0.7183 | Recall: 0.6654 | F1: 0.6908 | AUC: 0.7824

[Fold 5]
Accuracy: 0.7087 | Precision: 0.7270 | Recall: 0.6700 | F1: 0.6974 | AUC: 0.7855

[Average Metrics - Cross-Validation]
Accuracy: 0.7112 ± 0.0118
Precision: 0.7323 ± 0.0154
Recall:   0.6664 ± 0.0058
F1-Score: 0.6977 ± 0.0094
AUC:      0.7843 ± 0.0112

[Final Training and Test Evaluation]

[Carbon Footprint]
Estimated emissions during the experiment: 0.000042 kg CO₂eq

[Test Set Performance]
Accuracy: 0.6976
Precision: 0.7106
Recall:   0.6667
F1-Score: 0.6879
AUC:      0.7698
Confusion Matrix:
[[719 268]
 [329 658]]

[Inference Time]
Average time per image: 0.000403 s ± 0.001676 s


['../models/ischaemia\\OCPC_model.pkl']

## OCPC with PCA

In [11]:
import pandas as pd
import numpy as np
import joblib
import os
import cv2
import time
from sklearn.model_selection import KFold, train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
from ocpc_py import MultiClassPC
from codecarbon import EmissionsTracker
import json

# Configuration
base_path = "../data/ischaemia"
image_size = (256, 256)

# Load dataset
dataset = []
for class_name, label in zip(["Aug-Positive", "Aug-Negative"], [1, 0]):
    folder = os.path.join(base_path, class_name)
    for image_name in os.listdir(folder):
        image_path = os.path.join(folder, image_name)
        dataset.append((image_path, label))

df = pd.DataFrame(dataset, columns=["image", "label"])

# Function to load and preprocess images
def load_images(df, image_size):
    images, labels = [], []
    for _, row in df.iterrows():
        img = cv2.imread(row["image"])
        if img is not None:
            img = cv2.resize(img, image_size)
            img = img.astype("float32") / 255.0
            images.append(img.flatten())
            labels.append(row["label"])
        else:
            print(f"Image not loaded: {row['image']}")
    return np.array(images), np.array(labels)

X_all, y_all = load_images(df, image_size)

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# Cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accs, precs, recs, f1s, aucs = [], [], [], [], []

print("\n[Cross-Validation - Training]")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train), 1):
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train[train_idx], y_train[val_idx]

    # PCA transformation
    pca = PCA(n_components=50)
    X_tr_pca = pca.fit_transform(X_tr)
    X_val_pca = pca.transform(X_val)

    # Train classifier
    clf = MultiClassPC()
    clf.fit(X_tr_pca, y_tr)
    y_pred = clf.predict(X_val_pca)
    y_proba = clf.predict_proba(X_val_pca)[:, 1]

    # Calculate metrics
    accs.append(accuracy_score(y_val, y_pred))
    precs.append(precision_score(y_val, y_pred))
    recs.append(recall_score(y_val, y_pred))
    f1s.append(f1_score(y_val, y_pred))
    aucs.append(roc_auc_score(y_val, y_proba))

    print(f"\n[Fold {fold}]")
    print(f"Accuracy: {accs[-1]:.4f} | Precision: {precs[-1]:.4f} | Recall: {recs[-1]:.4f} | F1: {f1s[-1]:.4f} | AUC: {aucs[-1]:.4f}")

# Average metrics across folds
print("\n[Average Metrics - Cross-Validation]")
print(f"Accuracy: {np.mean(accs):.4f} ± {np.std(accs):.4f}")
print(f"Precision: {np.mean(precs):.4f} ± {np.std(precs):.4f}")
print(f"Recall:   {np.mean(recs):.4f} ± {np.std(recs):.4f}")
print(f"F1-Score: {np.mean(f1s):.4f} ± {np.std(f1s):.4f}")
print(f"AUC:      {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")

# Final training and test evaluation
print("\n[Final Training and Test Evaluation]")
tracker = EmissionsTracker(log_level="ERROR")
tracker.start()

# Final PCA transformation
pca_final = PCA(n_components=50)
X_train_pca = pca_final.fit_transform(X_train)
X_test_pca = pca_final.transform(X_test)

# Train final classifier
clf_final = MultiClassPC()
clf_final.fit(X_train_pca, y_train)

# Stop carbon tracker and save emissions
emissions = tracker.stop()
emissions_dir = "../reports/emissions"
os.makedirs(emissions_dir, exist_ok=True)
emissions_path = os.path.join(emissions_dir, "emissions_ischaemia.json")
with open(emissions_path, "w") as f:
    json.dump({"emissions_kgCO2eq": emissions}, f)

print(f"\n[Carbon Footprint]")
print(f"Estimated emissions during the experiment: {emissions:.6f} kg CO₂eq")

# Evaluate on test set
y_pred_test = clf_final.predict(X_test_pca)
y_proba_test = clf_final.predict_proba(X_test_pca)[:, 1]

print("\n[Test Set Performance]")
print(f"Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_test):.4f}")
print(f"Recall:   {recall_score(y_test, y_pred_test):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred_test):.4f}")
print(f"AUC:      {roc_auc_score(y_test, y_proba_test):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))

# Measure inference time
inference_times = []
for x in X_test_pca:
    start = time.time()
    _ = clf_final.predict(x.reshape(1, -1))
    end = time.time()
    inference_times.append(end - start)

inference_times = np.array(inference_times)
print("\n[Inference Time]")
print(f"Average time per image: {np.mean(inference_times):.6f} s ± {np.std(inference_times):.6f} s")

# Save PCA and final model
output_dir = "../models/ischaemia"
os.makedirs(output_dir, exist_ok=True)
pca_path = os.path.join(output_dir, "OCPC_pca.pkl")
clf_path = os.path.join(output_dir, "OCPC_pca_model.pkl")
joblib.dump(pca_final, pca_path)
joblib.dump(clf_final, clf_path)



[Cross-Validation - Training]

[Fold 1]
Accuracy: 0.7222 | Precision: 0.7504 | Recall: 0.6637 | F1: 0.7044 | AUC: 0.7950

[Fold 2]
Accuracy: 0.6890 | Precision: 0.7271 | Recall: 0.6351 | F1: 0.6780 | AUC: 0.7653

[Fold 3]
Accuracy: 0.7144 | Precision: 0.7405 | Recall: 0.6504 | F1: 0.6926 | AUC: 0.7882

[Fold 4]
Accuracy: 0.7106 | Precision: 0.7248 | Recall: 0.6602 | F1: 0.6910 | AUC: 0.7848

[Fold 5]
Accuracy: 0.7074 | Precision: 0.7238 | Recall: 0.6726 | F1: 0.6972 | AUC: 0.7804

[Average Metrics - Cross-Validation]
Accuracy: 0.7087 ± 0.0110
Precision: 0.7333 ± 0.0104
Recall:   0.6564 ± 0.0128
F1-Score: 0.6926 ± 0.0087
AUC:      0.7828 ± 0.0100

[Final Training and Test Evaluation]

[Carbon Footprint]
Estimated emissions during the experiment: 0.000052 kg CO₂eq

[Test Set Performance]
Accuracy: 0.7107
Precision: 0.7227
Recall:   0.6839
F1-Score: 0.7028
AUC:      0.7703
Confusion Matrix:
[[728 259]
 [312 675]]

[Inference Time]
Average time per image: 0.000393 s ± 0.001662 s


['../models/ischaemia\\OCPC_pca_model.pkl']

# Random Forest

## Random Forest without PCA

In [23]:
import pandas as pd
import numpy as np
import os
import cv2
import time
from sklearn.model_selection import train_test_split, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
import joblib
from codecarbon import EmissionsTracker
import json

# Configuration
base_path = "../data/ischaemia"
image_size = (64, 64)

# Load dataset
dataset = []
for class_name, label in zip(["Aug-Positive", "Aug-Negative"], [1, 0]):
    folder = os.path.join(base_path, class_name)
    for image_name in os.listdir(folder):
        image_path = os.path.join(folder, image_name)
        dataset.append((image_path, label))

df = pd.DataFrame(dataset, columns=["image", "label"])

# Function to load and preprocess images
def load_images(df, image_size):
    images, labels = [], []
    for _, row in df.iterrows():
        img = cv2.imread(row["image"])
        if img is not None:
            img = cv2.resize(img, image_size)
            img = img.astype("float32") / 255.0
            images.append(img.flatten())
            labels.append(row["label"])
        else:
            print(f"Image not loaded: {row['image']}")
    return np.array(images), np.array(labels)

X_all, y_all = load_images(df, image_size)

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# Cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accs, precs, recs, f1s, aucs = [], [], [], [], []

print("\n[Cross-Validation - Training]")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train), 1):
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train[train_idx], y_train[val_idx]

    # Train Random Forest classifier
    clf = RandomForestClassifier(
        n_estimators=300,
        max_depth=20,
        min_samples_split=2,
        min_samples_leaf=1,
        class_weight='balanced',
        random_state=42,
        n_jobs=-1
    )
    clf.fit(X_tr, y_tr)

    y_pred = clf.predict(X_val)
    y_proba = clf.predict_proba(X_val)[:, 1]

    # Compute metrics
    accs.append(accuracy_score(y_val, y_pred))
    precs.append(precision_score(y_val, y_pred))
    recs.append(recall_score(y_val, y_pred))
    f1s.append(f1_score(y_val, y_pred))
    aucs.append(roc_auc_score(y_val, y_proba))

    print(f"\n[Fold {fold}]")
    print(f"Accuracy: {accs[-1]:.4f} | Precision: {precs[-1]:.4f} | Recall: {recs[-1]:.4f} | F1: {f1s[-1]:.4f} | AUC: {aucs[-1]:.4f}")

# Average metrics across folds
print("\n[Average Metrics - Cross-Validation]")
print(f"Accuracy: {np.mean(accs):.4f} ± {np.std(accs):.4f}")
print(f"Precision: {np.mean(precs):.4f} ± {np.std(precs):.4f}")
print(f"Recall:   {np.mean(recs):.4f} ± {np.std(recs):.4f}")
print(f"F1-Score: {np.mean(f1s):.4f} ± {np.std(f1s):.4f}")
print(f"AUC:      {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")

# Final training and test evaluation
print("\n[Final Training and Test Evaluation]")
tracker = EmissionsTracker(log_level="ERROR")
tracker.start()

clf_final = RandomForestClassifier(
    n_estimators=300,
    max_depth=20,
    min_samples_split=2,
    min_samples_leaf=1,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)
clf_final.fit(X_train, y_train)

# Stop carbon tracker and save emissions
emissions = tracker.stop()
emissions_dir = "../reports/emissions"
os.makedirs(emissions_dir, exist_ok=True)
emissions_path = os.path.join(emissions_dir, "emissions_ischaemia_rf.json")
with open(emissions_path, "w") as f:
    json.dump({"emissions_kgCO2eq": emissions}, f)

print(f"\n[Carbon Footprint]")
print(f"Estimated emissions during training: {emissions:.6f} kg CO₂eq")

# Evaluate on test set
y_pred_test = clf_final.predict(X_test)
y_proba_test = clf_final.predict_proba(X_test)[:, 1]

print("\n[Test Set Performance]")
print(f"Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_test):.4f}")
print(f"Recall:   {recall_score(y_test, y_pred_test):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred_test):.4f}")
print(f"AUC:      {roc_auc_score(y_test, y_proba_test):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))

# Measure inference time
inference_times = []
for x in X_test:
    start = time.time()
    _ = clf_final.predict([x])
    end = time.time()
    inference_times.append(end - start)

inference_times = np.array(inference_times)
print("\n[Inference Time]")
print(f"Average time per image: {np.mean(inference_times):.6f} s ± {np.std(inference_times):.6f} s")

# Save final model
output_dir = "../models/ischaemia"
os.makedirs(output_dir, exist_ok=True)
clf_path = os.path.join(output_dir, "random_forest_ischaemia.pkl")
joblib.dump(clf_final, clf_path)



[Cross-Validation - Training]

[Fold 1]
Accuracy: 0.8684 | Precision: 0.8607 | Recall: 0.8782 | F1: 0.8693 | AUC: 0.9446

[Fold 2]
Accuracy: 0.8740 | Precision: 0.8745 | Recall: 0.8821 | F1: 0.8783 | AUC: 0.9444

[Fold 3]
Accuracy: 0.8835 | Precision: 0.8681 | Recall: 0.9014 | F1: 0.8844 | AUC: 0.9513

[Fold 4]
Accuracy: 0.8765 | Precision: 0.8535 | Recall: 0.9031 | F1: 0.8776 | AUC: 0.9490

[Fold 5]
Accuracy: 0.8847 | Precision: 0.8745 | Recall: 0.8989 | F1: 0.8865 | AUC: 0.9566

[Average Metrics - Cross-Validation]
Accuracy: 0.8774 ± 0.0061
Precision: 0.8663 ± 0.0082
Recall:   0.8927 ± 0.0105
F1-Score: 0.8792 ± 0.0060
AUC:      0.9492 ± 0.0045

[Final Training and Test Evaluation]

[Carbon Footprint]
Estimated emissions during training: 0.000280 kg CO₂eq

[Test Set Performance]
Accuracy: 0.8906
Precision: 0.8806
Recall:   0.9037
F1-Score: 0.8920
AUC:      0.9594
Confusion Matrix:
[[866 121]
 [ 95 892]]

[Inference Time]
Average time per image: 0.051098 s ± 0.019895 s


['../models/ischaemia\\random_forest_ischaemia.pkl']

## Random Forest with PCA

In [6]:
import pandas as pd
import numpy as np
import os
import cv2
from sklearn.model_selection import KFold, train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
from sklearn.ensemble import RandomForestClassifier
from codecarbon import EmissionsTracker
import joblib

# Configuration
base_path = "../data/ischaemia"
image_size = (64, 64)

# Load dataset
dataset = []
for classe, label in zip(["Aug-Positive", "Aug-Negative"], [1, 0]):
    pasta = os.path.join(base_path, classe)
    for imagem in os.listdir(pasta):
        caminho_imagem = os.path.join(pasta, imagem)
        dataset.append((caminho_imagem, label))

df = pd.DataFrame(dataset, columns=["imagem", "label"])

# Function to load and preprocess images
def load_images(df, image_size):
    imagens, labels = [], []
    for _, row in df.iterrows():
        img = cv2.imread(row["imagem"])
        if img is not None:
            img = cv2.resize(img, image_size)
            img = img.astype("float32") / 255.0
            imagens.append(img.flatten())
            labels.append(row["label"])
        else:
            print(f"Imagem não carregada: {row['imagem']}")
    return np.array(imagens), np.array(labels)

X_all, y_all = load_images(df, image_size)

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# Cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accs, precs, recs, f1s, aucs = [], [], [], [], []

print("\n[Cross-Validation - Training]")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train), 1):
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train[train_idx], y_train[val_idx]

    pca = PCA(n_components=50)
    X_tr_pca = pca.fit_transform(X_tr)
    X_val_pca = pca.transform(X_val)

    # Train Random Forest classifier
    clf = RandomForestClassifier(
        n_estimators=300,
        max_depth=20,
        min_samples_split=2,
        min_samples_leaf=1,
        class_weight='balanced',
        random_state=42,
        n_jobs=-1
    )
    clf.fit(X_tr_pca, y_tr)
    y_pred = clf.predict(X_val_pca)
    y_proba = clf.predict_proba(X_val_pca)[:, 1]

    # Compute metrics
    accs.append(accuracy_score(y_val, y_pred))
    precs.append(precision_score(y_val, y_pred))
    recs.append(recall_score(y_val, y_pred))
    f1s.append(f1_score(y_val, y_pred))
    aucs.append(roc_auc_score(y_val, y_proba))

    print(f"\n[Fold {fold}]")
    print(f"Acurácia: {accs[-1]:.4f} | Precisão: {precs[-1]:.4f} | Recall: {recs[-1]:.4f} | F1: {f1s[-1]:.4f} | AUC: {aucs[-1]:.4f}")

# Average metrics across folds
print("\n[Average Metrics - Cross-Validation]")
print(f"Accuracy: {np.mean(accs):.4f} ± {np.std(accs):.4f}")
print(f"Precision: {np.mean(precs):.4f} ± {np.std(precs):.4f}")
print(f"Recall:   {np.mean(recs):.4f} ± {np.std(recs):.4f}")
print(f"F1-Score: {np.mean(f1s):.4f} ± {np.std(f1s):.4f}")
print(f"AUC:      {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")

# Final training and test evaluation
print("\n[Final Training and Test Evaluation]")
tracker = EmissionsTracker(log_level="ERROR")
tracker.start()

pca_final = PCA(n_components=50)
X_train_pca = pca_final.fit_transform(X_train)
X_test_pca = pca_final.transform(X_test)

clf_final = RandomForestClassifier(
    n_estimators=300,
    max_depth=20,
    min_samples_split=2,
    min_samples_leaf=1,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)
clf_final.fit(X_train_pca, y_train)

# Stop carbon tracker and save emissions
emissions = tracker.stop()
emissions_dir = "../reports/emissions"
os.makedirs(emissions_dir, exist_ok=True)
emissions_path = os.path.join(emissions_dir, "emissions_ischaemia_rf.json")
with open(emissions_path, "w") as f:
    json.dump({"emissions_kgCO2eq": emissions}, f)

print(f"\n[Carbon Footprint]")
print(f"Estimated emissions during training: {emissions:.6f} kg CO₂eq")

# Evaluate on test set
y_pred_test = clf_final.predict(X_test_pca)
y_proba_test = clf_final.predict_proba(X_test_pca)[:, 1]

print("\n[Test Set Performance]")
print(f"Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_test):.4f}")
print(f"Recall:   {recall_score(y_test, y_pred_test):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred_test):.4f}")
print(f"AUC:      {roc_auc_score(y_test, y_proba_test):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))

# Measure inference time
inference_times = []
for x in X_test_pca:
    start = time.time()
    _ = clf_final.predict([x])
    end = time.time()
    inference_times.append(end - start)

inference_times = np.array(inference_times)
print("\n[Inference Time]")
print(f"Average time per image: {np.mean(inference_times):.6f} s ± {np.std(inference_times):.6f} s")

# Save final model
output_dir = "../models/ischaemia"
os.makedirs(output_dir, exist_ok=True)
pca_path = os.path.join(output_dir, "random_forest_PCA_ischaemia.pkl")
clf_path = os.path.join(output_dir, "random_forest_modelo_PCA_ischaemia.pkl")
joblib.dump(pca_final, pca_path)
joblib.dump(clf_final, clf_path)




[Cross-Validation - Training]

[Fold 1]
Acurácia: 0.8538 | Precisão: 0.8273 | Recall: 0.8934 | F1: 0.8591 | AUC: 0.9365

[Fold 2]
Acurácia: 0.8600 | Precisão: 0.8444 | Recall: 0.8931 | F1: 0.8681 | AUC: 0.9411

[Fold 3]
Acurácia: 0.8771 | Precisão: 0.8393 | Recall: 0.9296 | F1: 0.8821 | AUC: 0.9502

[Fold 4]
Acurácia: 0.8670 | Precisão: 0.8373 | Recall: 0.9044 | F1: 0.8696 | AUC: 0.9382

[Fold 5]
Acurácia: 0.8632 | Precisão: 0.8410 | Recall: 0.8963 | F1: 0.8678 | AUC: 0.9418

[Average Metrics - Cross-Validation]
Accuracy: 0.8642 ± 0.0078
Precision: 0.8379 ± 0.0058
Recall:   0.9034 ± 0.0137
F1-Score: 0.8693 ± 0.0074
AUC:      0.9416 ± 0.0047

[Final Training and Test Evaluation]

[Carbon Footprint]
Estimated emissions during training: 0.000042 kg CO₂eq

[Test Set Performance]
Accuracy: 0.8678
Precision: 0.8361
Recall:   0.9149
F1-Score: 0.8737
AUC:      0.9474
Confusion Matrix:
[[810 177]
 [ 84 903]]

[Inference Time]
Average time per image: 0.050428 s ± 0.100500 s


['../models/ischaemia\\random_forest_modelo_PCA_ischaemia.pkl']