# Tests of different classifiers – INFECTION

# OCPC

## OCPC with SVD

In [1]:
import pandas as pd
import numpy as np
import joblib
import os
import cv2
import time
from sklearn.model_selection import KFold, train_test_split
from sklearn.decomposition import TruncatedSVD

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
from ocpc_py import MultiClassPC
from codecarbon import EmissionsTracker
import json

# DATA LOADING
base_path = "../data/infection"
image_size = (256, 256)

dataset = []
for class_name, label in zip(["Aug-Positive", "Aug-Negative"], [1, 0]):
    folder = os.path.join(base_path, class_name)
    for image_name in os.listdir(folder):
        image_path = os.path.join(folder, image_name)
        dataset.append((image_path, label))

df = pd.DataFrame(dataset, columns=["image", "label"])

# Load, resize, normalize, and flatten images
def load_images(df, image_size):
    images, labels = [], []
    for _, row in df.iterrows():
        img = cv2.imread(row["image"])
        if img is not None:
            img = cv2.resize(img, image_size)
            img = img.astype("float32") / 255.0
            images.append(img.flatten())
            labels.append(row["label"])
        else:
            print(f"Image not loaded: {row['image']}")
    return np.array(images), np.array(labels)

X_all, y_all = load_images(df, image_size)
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# CROSS-VALIDATION
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accs, precs, recs, f1s, aucs = [], [], [], [], []

print("\n[Cross-Validation - Training]")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train), 1):
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train[train_idx], y_train[val_idx]

    svd = TruncatedSVD(n_components=50, random_state=80)
    X_tr_svd = svd.fit_transform(X_tr)
    X_val_svd = svd.transform(X_val)

    clf = MultiClassPC()
    clf.fit(X_tr_svd, y_tr)
    y_pred = clf.predict(X_val_svd)
    y_proba = clf.predict_proba(X_val_svd)[:, 1]

    accs.append(accuracy_score(y_val, y_pred))
    precs.append(precision_score(y_val, y_pred))
    recs.append(recall_score(y_val, y_pred))
    f1s.append(f1_score(y_val, y_pred))
    aucs.append(roc_auc_score(y_val, y_proba))

    print(f"\n[Fold {fold}]")
    print(f"Accuracy: {accs[-1]:.4f} | Precision: {precs[-1]:.4f} | Recall: {recs[-1]:.4f} | F1: {f1s[-1]:.4f} | AUC: {aucs[-1]:.4f}")

# Final K-Fold Metrics
print("\n[Average Metrics - Cross-Validation]")
print(f"Accuracy: {np.mean(accs):.4f} ± {np.std(accs):.4f}")
print(f"Precision: {np.mean(precs):.4f} ± {np.std(precs):.4f}")
print(f"Recall:   {np.mean(recs):.4f} ± {np.std(recs):.4f}")
print(f"F1-Score: {np.mean(f1s):.4f} ± {np.std(f1s):.4f}")
print(f"AUC:      {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")

# Final training and test evaluation
print("\n[Final Training and Test Evaluation]")
tracker = EmissionsTracker(log_level="ERROR")
tracker.start()

# Final SVD
svd_final = TruncatedSVD(n_components=50, random_state=80)
X_train_svd = svd_final.fit_transform(X_train)
X_test_svd = svd_final.transform(X_test)

# Final Classifier Training
clf_final = MultiClassPC()
clf_final.fit(X_train_svd, y_train)

# Stop tracker and save emissions
emissions = tracker.stop()

# Emissions
emissions_dir = "../reports/emissions"
os.makedirs(emissions_dir, exist_ok=True)
emissions_path = os.path.join(emissions_dir, "emissions_infection.json")
with open(emissions_path, "w") as f:
    json.dump({"emissions_kgCO2eq": emissions}, f)

print(f"\n[Carbon Footprint]")
print(f"Estimated emissions during the experiment: {emissions:.6f} kg CO₂eq")

# Test set evaluation
y_pred_test = clf_final.predict(X_test_svd)
y_proba_test = clf_final.predict_proba(X_test_svd)[:, 1]

print("\n[Test Set Performance]")
print(f"Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_test):.4f}")
print(f"Recall:   {recall_score(y_test, y_pred_test):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred_test):.4f}")
print(f"AUC:      {roc_auc_score(y_test, y_proba_test):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))

# Measuring Inference Time
inference_times = []
for x in X_test_svd:
    start = time.time()
    _ = clf_final.predict(x.reshape(1, -1))
    end = time.time()
    inference_times.append(end - start)

inference_times = np.array(inference_times)
print("\n[Inference Time]")
print(f"Average time per image: {np.mean(inference_times):.6f} s ± {np.std(inference_times):.6f} s")

# Save SVD and Model
output_dir = "../models/infection"
os.makedirs(output_dir, exist_ok=True)
svd_path = os.path.join(output_dir, "OCPC_svd.pkl")
clf_path = os.path.join(output_dir, "OCPC_model.pkl")
joblib.dump(svd_final, svd_path)
joblib.dump(clf_final, clf_path)


[Cross-Validation - Training]

[Fold 1]
Accuracy: 0.5302 | Precision: 0.5241 | Recall: 0.4914 | F1: 0.5072 | AUC: 0.5676

[Fold 2]
Accuracy: 0.5493 | Precision: 0.5533 | Recall: 0.5169 | F1: 0.5345 | AUC: 0.5746

[Fold 3]
Accuracy: 0.5372 | Precision: 0.5177 | Recall: 0.5177 | F1: 0.5177 | AUC: 0.5577

[Fold 4]
Accuracy: 0.5361 | Precision: 0.5451 | Recall: 0.5417 | F1: 0.5434 | AUC: 0.5371





[Fold 5]
Accuracy: 0.5605 | Precision: 0.5801 | Recall: 0.5492 | F1: 0.5642 | AUC: 0.5896

[Average Metrics - Cross-Validation]
Accuracy: 0.5427 ± 0.0109
Precision: 0.5441 ± 0.0223
Recall:   0.5234 ± 0.0205
F1-Score: 0.5334 ± 0.0199
AUC:      0.5653 ± 0.0175

[Final Training and Test Evaluation]

[Carbon Footprint]
Estimated emissions during the experiment: 0.000020 kg CO₂eq

[Test Set Performance]
Accuracy: 0.5501
Precision: 0.5543
Recall:   0.5110
F1-Score: 0.5318
AUC:      0.5817
Confusion Matrix:
[[347 242]
 [288 301]]

[Inference Time]
Average time per image: 0.000416 s ± 0.001845 s


['../models/infection\\OCPC_model.pkl']

## OCPC with PCA

In [2]:
import pandas as pd
import numpy as np
import joblib
import os
import cv2
import time
from sklearn.model_selection import KFold, train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
from ocpc_py import MultiClassPC
from codecarbon import EmissionsTracker
import json

# Configuration
base_path = "../data/infection"
image_size = (256, 256)

# Load dataset
dataset = []
for class_name, label in zip(["Aug-Positive", "Aug-Negative"], [1, 0]):
    folder = os.path.join(base_path, class_name)
    for image_name in os.listdir(folder):
        image_path = os.path.join(folder, image_name)
        dataset.append((image_path, label))

df = pd.DataFrame(dataset, columns=["image", "label"])

# Function to load and preprocess images
def load_images(df, image_size):
    images, labels = [], []
    for _, row in df.iterrows():
        img = cv2.imread(row["image"])
        if img is not None:
            img = cv2.resize(img, image_size)
            img = img.astype("float32") / 255.0
            images.append(img.flatten())
            labels.append(row["label"])
        else:
            print(f"Image not loaded: {row['image']}")
    return np.array(images), np.array(labels)

X_all, y_all = load_images(df, image_size)

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# Cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accs, precs, recs, f1s, aucs = [], [], [], [], []

print("\n[Cross-Validation - Training]")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train), 1):
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train[train_idx], y_train[val_idx]

    # PCA transformation
    pca = PCA(n_components=50)
    X_tr_pca = pca.fit_transform(X_tr)
    X_val_pca = pca.transform(X_val)

    # Train classifier
    clf = MultiClassPC()
    clf.fit(X_tr_pca, y_tr)
    y_pred = clf.predict(X_val_pca)
    y_proba = clf.predict_proba(X_val_pca)[:, 1]

    # Calculate metrics
    accs.append(accuracy_score(y_val, y_pred))
    precs.append(precision_score(y_val, y_pred))
    recs.append(recall_score(y_val, y_pred))
    f1s.append(f1_score(y_val, y_pred))
    aucs.append(roc_auc_score(y_val, y_proba))

    print(f"\n[Fold {fold}]")
    print(f"Accuracy: {accs[-1]:.4f} | Precision: {precs[-1]:.4f} | Recall: {recs[-1]:.4f} | F1: {f1s[-1]:.4f} | AUC: {aucs[-1]:.4f}")

# Average metrics across folds
print("\n[Average Metrics - Cross-Validation]")
print(f"Accuracy: {np.mean(accs):.4f} ± {np.std(accs):.4f}")
print(f"Precision: {np.mean(precs):.4f} ± {np.std(precs):.4f}")
print(f"Recall:   {np.mean(recs):.4f} ± {np.std(recs):.4f}")
print(f"F1-Score: {np.mean(f1s):.4f} ± {np.std(f1s):.4f}")
print(f"AUC:      {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")

# Final training and test evaluation
print("\n[Final Training and Test Evaluation]")
tracker = EmissionsTracker(log_level="ERROR")
tracker.start()

# Final PCA transformation
pca_final = PCA(n_components=50)
X_train_pca = pca_final.fit_transform(X_train)
X_test_pca = pca_final.transform(X_test)

# Train final classifier
clf_final = MultiClassPC()
clf_final.fit(X_train_pca, y_train)

# Stop carbon tracker and save emissions
emissions = tracker.stop()
emissions_dir = "../reports/emissions"
os.makedirs(emissions_dir, exist_ok=True)
emissions_path = os.path.join(emissions_dir, "emissions_infection.json")
with open(emissions_path, "w") as f:
    json.dump({"emissions_kgCO2eq": emissions}, f)

print(f"\n[Carbon Footprint]")
print(f"Estimated emissions during the experiment: {emissions:.6f} kg CO₂eq")

# Evaluate on test set
y_pred_test = clf_final.predict(X_test_pca)
y_proba_test = clf_final.predict_proba(X_test_pca)[:, 1]

print("\n[Test Set Performance]")
print(f"Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_test):.4f}")
print(f"Recall:   {recall_score(y_test, y_pred_test):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred_test):.4f}")
print(f"AUC:      {roc_auc_score(y_test, y_proba_test):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))

# Measure inference time
inference_times = []
for x in X_test_pca:
    start = time.time()
    _ = clf_final.predict(x.reshape(1, -1))
    end = time.time()
    inference_times.append(end - start)

inference_times = np.array(inference_times)
print("\n[Inference Time]")
print(f"Average time per image: {np.mean(inference_times):.6f} s ± {np.std(inference_times):.6f} s")

# Save PCA and final model
output_dir = "../models/infection"
os.makedirs(output_dir, exist_ok=True)
pca_path = os.path.join(output_dir, "OCPC_pca_infection.pkl")
clf_path = os.path.join(output_dir, "OCPC_pca_model_infection.pkl")
joblib.dump(pca_final, pca_path)
joblib.dump(clf_final, clf_path)



[Cross-Validation - Training]

[Fold 1]
Accuracy: 0.5292 | Precision: 0.5245 | Recall: 0.4612 | F1: 0.4908 | AUC: 0.5654

[Fold 2]
Accuracy: 0.5493 | Precision: 0.5535 | Recall: 0.5148 | F1: 0.5335 | AUC: 0.5740

[Fold 3]
Accuracy: 0.5531 | Precision: 0.5358 | Recall: 0.5133 | F1: 0.5243 | AUC: 0.5731

[Fold 4]
Accuracy: 0.5329 | Precision: 0.5444 | Recall: 0.5104 | F1: 0.5269 | AUC: 0.5377

[Fold 5]
Accuracy: 0.5817 | Precision: 0.6044 | Recall: 0.5574 | F1: 0.5800 | AUC: 0.6038

[Average Metrics - Cross-Validation]
Accuracy: 0.5492 ± 0.0187
Precision: 0.5525 ± 0.0277
Recall:   0.5114 ± 0.0305
F1-Score: 0.5311 ± 0.0286
AUC:      0.5708 ± 0.0211

[Final Training and Test Evaluation]

[Carbon Footprint]
Estimated emissions during the experiment: 0.000036 kg CO₂eq

[Test Set Performance]
Accuracy: 0.5407
Precision: 0.5465
Recall:   0.4788
F1-Score: 0.5104
AUC:      0.5716
Confusion Matrix:
[[355 234]
 [307 282]]

[Inference Time]
Average time per image: 0.001173 s ± 0.002565 s


['../models/infection\\OCPC_pca_model_infection.pkl']

# Random Forest

## Randon Forest without PCA

In [3]:
import pandas as pd
import numpy as np
import os
import cv2
import time
from sklearn.model_selection import train_test_split, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
import joblib
from codecarbon import EmissionsTracker
import json

# Configuration
base_path = "../data/infection"
image_size = (256, 256)

# Load dataset
dataset = []
for class_name, label in zip(["Aug-Positive", "Aug-Negative"], [1, 0]):
    folder = os.path.join(base_path, class_name)
    for image_name in os.listdir(folder):
        image_path = os.path.join(folder, image_name)
        dataset.append((image_path, label))

df = pd.DataFrame(dataset, columns=["image", "label"])

# Function to load and preprocess images
def load_images(df, image_size):
    images, labels = [], []
    for _, row in df.iterrows():
        img = cv2.imread(row["image"])
        if img is not None:
            img = cv2.resize(img, image_size)
            img = img.astype("float32") / 255.0
            images.append(img.flatten())
            labels.append(row["label"])
        else:
            print(f"Image not loaded: {row['image']}")
    return np.array(images), np.array(labels)

X_all, y_all = load_images(df, image_size)

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# Cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accs, precs, recs, f1s, aucs = [], [], [], [], []

print("\n[Cross-Validation - Training]")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train), 1):
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train[train_idx], y_train[val_idx]

    # Train Random Forest classifier
    clf = RandomForestClassifier(
        n_estimators=300,
        max_depth=20,
        min_samples_split=2,
        min_samples_leaf=1,
        class_weight='balanced',
        random_state=42,
        n_jobs=-1
    )
    clf.fit(X_tr, y_tr)

    y_pred = clf.predict(X_val)
    y_proba = clf.predict_proba(X_val)[:, 1]

    # Compute metrics
    accs.append(accuracy_score(y_val, y_pred))
    precs.append(precision_score(y_val, y_pred))
    recs.append(recall_score(y_val, y_pred))
    f1s.append(f1_score(y_val, y_pred))
    aucs.append(roc_auc_score(y_val, y_proba))

    print(f"\n[Fold {fold}]")
    print(f"Accuracy: {accs[-1]:.4f} | Precision: {precs[-1]:.4f} | Recall: {recs[-1]:.4f} | F1: {f1s[-1]:.4f} | AUC: {aucs[-1]:.4f}")

# Average metrics across folds
print("\n[Average Metrics - Cross-Validation]")
print(f"Accuracy: {np.mean(accs):.4f} ± {np.std(accs):.4f}")
print(f"Precision: {np.mean(precs):.4f} ± {np.std(precs):.4f}")
print(f"Recall:   {np.mean(recs):.4f} ± {np.std(recs):.4f}")
print(f"F1-Score: {np.mean(f1s):.4f} ± {np.std(f1s):.4f}")
print(f"AUC:      {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")

# Final training and test evaluation
print("\n[Final Training and Test Evaluation]")
tracker = EmissionsTracker(log_level="ERROR")
tracker.start()

clf_final = RandomForestClassifier(
    n_estimators=300,
    max_depth=20,
    min_samples_split=2,
    min_samples_leaf=1,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)
clf_final.fit(X_train, y_train)

# Stop carbon tracker and save emissions
emissions = tracker.stop()
emissions_dir = "../reports/emissions"
os.makedirs(emissions_dir, exist_ok=True)
emissions_path = os.path.join(emissions_dir, "emissions_infection_rf.json")
with open(emissions_path, "w") as f:
    json.dump({"emissions_kgCO2eq": emissions}, f)

print(f"\n[Carbon Footprint]")
print(f"Estimated emissions during training: {emissions:.6f} kg CO₂eq")

# Evaluate on test set
y_pred_test = clf_final.predict(X_test)
y_proba_test = clf_final.predict_proba(X_test)[:, 1]

print("\n[Test Set Performance]")
print(f"Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_test):.4f}")
print(f"Recall:   {recall_score(y_test, y_pred_test):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred_test):.4f}")
print(f"AUC:      {roc_auc_score(y_test, y_proba_test):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))

# Measure inference time
inference_times = []
for x in X_test:
    start = time.time()
    _ = clf_final.predict([x])
    end = time.time()
    inference_times.append(end - start)

inference_times = np.array(inference_times)
print("\n[Inference Time]")
print(f"Average time per image: {np.mean(inference_times):.6f} s ± {np.std(inference_times):.6f} s")

# Save final model
output_dir = "../models/infection"
os.makedirs(output_dir, exist_ok=True)
clf_path = os.path.join(output_dir, "random_forest_infection.pkl")
joblib.dump(clf_final, clf_path)



[Cross-Validation - Training]

[Fold 1]
Accuracy: 0.7370 | Precision: 0.7596 | Recall: 0.6810 | F1: 0.7182 | AUC: 0.8123

[Fold 2]
Accuracy: 0.7285 | Precision: 0.7741 | Recall: 0.6462 | F1: 0.7044 | AUC: 0.8101

[Fold 3]
Accuracy: 0.7537 | Precision: 0.7535 | Recall: 0.7235 | F1: 0.7381 | AUC: 0.8324

[Fold 4]
Accuracy: 0.7304 | Precision: 0.7580 | Recall: 0.6917 | F1: 0.7233 | AUC: 0.7999

[Fold 5]
Accuracy: 0.7176 | Precision: 0.7523 | Recall: 0.6783 | F1: 0.7134 | AUC: 0.8112

[Average Metrics - Cross-Validation]
Accuracy: 0.7334 ± 0.0119
Precision: 0.7595 ± 0.0078
Recall:   0.6841 ± 0.0248
F1-Score: 0.7195 ± 0.0112
AUC:      0.8132 ± 0.0106

[Final Training and Test Evaluation]

[Carbon Footprint]
Estimated emissions during training: 0.000050 kg CO₂eq

[Test Set Performance]
Accuracy: 0.7445
Precision: 0.7717
Recall:   0.6944
F1-Score: 0.7310
AUC:      0.8168
Confusion Matrix:
[[468 121]
 [180 409]]

[Inference Time]
Average time per image: 0.027872 s ± 0.004010 s


['../models/infection\\random_forest_infection.pkl']

# Random Forest with PCA

In [4]:
import pandas as pd
import numpy as np
import os
import cv2
from sklearn.model_selection import KFold, train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
from sklearn.ensemble import RandomForestClassifier
from codecarbon import EmissionsTracker
import joblib

# Configuration
base_path = "../data/infection"
image_size = (256, 256)

# Load dataset
dataset = []
for classe, label in zip(["Aug-Positive", "Aug-Negative"], [1, 0]):
    pasta = os.path.join(base_path, classe)
    for imagem in os.listdir(pasta):
        caminho_imagem = os.path.join(pasta, imagem)
        dataset.append((caminho_imagem, label))

df = pd.DataFrame(dataset, columns=["imagem", "label"])

# Function to load and preprocess images
def load_images(df, image_size):
    imagens, labels = [], []
    for _, row in df.iterrows():
        img = cv2.imread(row["imagem"])
        if img is not None:
            img = cv2.resize(img, image_size)
            img = img.astype("float32") / 255.0
            imagens.append(img.flatten())
            labels.append(row["label"])
        else:
            print(f"Imagem não carregada: {row['imagem']}")
    return np.array(imagens), np.array(labels)

X_all, y_all = load_images(df, image_size)

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# Cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accs, precs, recs, f1s, aucs = [], [], [], [], []

print("\n[Cross-Validation - Training]")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train), 1):
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train[train_idx], y_train[val_idx]

    pca = PCA(n_components=50)
    X_tr_pca = pca.fit_transform(X_tr)
    X_val_pca = pca.transform(X_val)

    # Train Random Forest classifier
    clf = RandomForestClassifier(
        n_estimators=300,
        max_depth=20,
        min_samples_split=2,
        min_samples_leaf=1,
        class_weight='balanced',
        random_state=42,
        n_jobs=-1
    )
    clf.fit(X_tr_pca, y_tr)
    y_pred = clf.predict(X_val_pca)
    y_proba = clf.predict_proba(X_val_pca)[:, 1]

    # Compute metrics
    accs.append(accuracy_score(y_val, y_pred))
    precs.append(precision_score(y_val, y_pred))
    recs.append(recall_score(y_val, y_pred))
    f1s.append(f1_score(y_val, y_pred))
    aucs.append(roc_auc_score(y_val, y_proba))

    print(f"\n[Fold {fold}]")
    print(f"Acurácia: {accs[-1]:.4f} | Precisão: {precs[-1]:.4f} | Recall: {recs[-1]:.4f} | F1: {f1s[-1]:.4f} | AUC: {aucs[-1]:.4f}")

# Average metrics across folds
print("\n[Average Metrics - Cross-Validation]")
print(f"Accuracy: {np.mean(accs):.4f} ± {np.std(accs):.4f}")
print(f"Precision: {np.mean(precs):.4f} ± {np.std(precs):.4f}")
print(f"Recall:   {np.mean(recs):.4f} ± {np.std(recs):.4f}")
print(f"F1-Score: {np.mean(f1s):.4f} ± {np.std(f1s):.4f}")
print(f"AUC:      {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")

# Final training and test evaluation
print("\n[Final Training and Test Evaluation]")
tracker = EmissionsTracker(log_level="ERROR")
tracker.start()

pca_final = PCA(n_components=50)
X_train_pca = pca_final.fit_transform(X_train)
X_test_pca = pca_final.transform(X_test)

clf_final = RandomForestClassifier(
    n_estimators=300,
    max_depth=20,
    min_samples_split=2,
    min_samples_leaf=1,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)
clf_final.fit(X_train_pca, y_train)

# Stop carbon tracker and save emissions
emissions = tracker.stop()
emissions_dir = "../reports/emissions"
os.makedirs(emissions_dir, exist_ok=True)
emissions_path = os.path.join(emissions_dir, "emissions_infection_rf.json")
with open(emissions_path, "w") as f:
    json.dump({"emissions_kgCO2eq": emissions}, f)

print(f"\n[Carbon Footprint]")
print(f"Estimated emissions during training: {emissions:.6f} kg CO₂eq")

# Evaluate on test set
y_pred_test = clf_final.predict(X_test_pca)
y_proba_test = clf_final.predict_proba(X_test_pca)[:, 1]

print("\n[Test Set Performance]")
print(f"Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_test):.4f}")
print(f"Recall:   {recall_score(y_test, y_pred_test):.4f}")
print(f"F1-Score: {f1_score(y_test, y_pred_test):.4f}")
print(f"AUC:      {roc_auc_score(y_test, y_proba_test):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))

# Measure inference time
inference_times = []
for x in X_test_pca:
    start = time.time()
    _ = clf_final.predict([x])
    end = time.time()
    inference_times.append(end - start)

inference_times = np.array(inference_times)
print("\n[Inference Time]")
print(f"Average time per image: {np.mean(inference_times):.6f} s ± {np.std(inference_times):.6f} s")

# Save final model
output_dir = "../models/infection"
os.makedirs(output_dir, exist_ok=True)
pca_path = os.path.join(output_dir, "random_forest_PCA_infection.pkl")
clf_path = os.path.join(output_dir, "random_forest_modelo_PCA_infection.pkl")
joblib.dump(pca_final, pca_path)
joblib.dump(clf_final, clf_path)


[Cross-Validation - Training]

[Fold 1]
Acurácia: 0.6978 | Precisão: 0.6808 | Recall: 0.7263 | F1: 0.7028 | AUC: 0.7655

[Fold 2]
Acurácia: 0.7020 | Precisão: 0.6985 | Recall: 0.7119 | F1: 0.7051 | AUC: 0.7798

[Fold 3]
Acurácia: 0.7240 | Precisão: 0.6853 | Recall: 0.7854 | F1: 0.7320 | AUC: 0.7886

[Fold 4]
Acurácia: 0.6847 | Precisão: 0.6819 | Recall: 0.7146 | F1: 0.6979 | AUC: 0.7584

[Fold 5]
Acurácia: 0.6794 | Precisão: 0.6816 | Recall: 0.7152 | F1: 0.6980 | AUC: 0.7695

[Average Metrics - Cross-Validation]
Accuracy: 0.6976 ± 0.0156
Precision: 0.6856 ± 0.0066
Recall:   0.7307 ± 0.0278
F1-Score: 0.7072 ± 0.0127
AUC:      0.7724 ± 0.0107

[Final Training and Test Evaluation]

[Carbon Footprint]
Estimated emissions during training: 0.000023 kg CO₂eq

[Test Set Performance]
Accuracy: 0.7046
Precision: 0.6922
Recall:   0.7368
F1-Score: 0.7138
AUC:      0.7925
Confusion Matrix:
[[396 193]
 [155 434]]

[Inference Time]
Average time per image: 0.027961 s ± 0.004011 s


['../models/infection\\random_forest_modelo_PCA_infection.pkl']