<a href="https://colab.research.google.com/github/rafaelrpq/classificadores/blob/main/classificadores_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### instalar efc

```bash
git clone https://github.com/EnergyBasedFlowClassifier/EFC-package
cd EFC-package
pip install -r requirements.txt
pip install .
```



In [3]:
import os
import numpy as np
import torch
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset

from sklearn.model_selection import train_test_split, KFold
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix
)

from efc import EnergyBasedFlowClassifier

from transformers import AutoModel
import warnings

from tqdm import tqdm
from collections import defaultdict

import matplotlib.pyplot as plt
import seaborn as sns
import json

warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# Configurar dispositivo (GPU se disponível)
device = torch.device ("cuda" if torch.cuda.is_available () else "cpu")
print (f"Usando o dispositivo: {device}")

# Hiperparametros
# Hiperparametros
hyperparameters = {
    "SVC": [{"C": c} for c in [0.1, 1, 10, 100, 1000]],
    "MLPClassifier": [{"hidden_layer_sizes": hls, "max_iter" : 200, "random_state": 42} for hls in [(50,), (100,), (50, 50), (100, 100), (200, 200)]],
    "RandomForestClassifier": [{"n_estimators": n, "random_state" : 42} for n in [10, 50, 100, 200, 500]],
    "KNeighborsClassifier": [{"n_neighbors": k} for k in [1, 3, 5, 7, 9]],
    "EnergyBasedFlowClassifier": [
        {"n_bins": 30, "cutoff_quantile": .95},
        {"n_bins": 50, "cutoff_quantile": .97},
        {"n_bins": 100, "cutoff_quantile": .99}
    ]
}

fit_efc = [
    {"n_epochs": 20, "learning_rate": 1e-3, "verbose": 0},
    {"n_epochs": 30, "learning_rate": 1e-3, "verbose": 0},
    {"n_epochs": 50, "learning_rate": 5e-4, "verbose": 0}
]
# Transformação das imagens
transform = transforms.Compose ([
    transforms.Resize ((224, 224)),
    transforms.ToTensor (),
    transforms.Normalize (mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Carrega o dataset
print("Downloading dataset...")
!git clone https://github.com/iman2693/CTCB.git > /dev/null 2>&1
path = "/content/CTCB"


# Diretório das imagens
train_data_dir = os.path.join (path,"dataset/Train")
# Assumindo que um diretório 'Test' existe baseda na estrutra comum dos datasets
test_data_dir = os.path.join (path,"dataset/Test")


train_dataset = datasets.ImageFolder(root=train_data_dir, transform=transform)
test_dataset = datasets.ImageFolder(root=test_data_dir, transform=transform)
class_names = train_dataset.classes
num_classes = len(class_names)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")
print(f"Classes: {class_names}")
print(f"Number of classes: {num_classes}") # Explicitly print num_classes


# KFold setup (apenas para o conjunto de TREINO inicial)
k_folds = 5
kf = KFold (n_splits=k_folds, shuffle=True, random_state=42)


class FeatureExtractor :
    def __init__ (self, vit_model_name) :
        try:
             self.model = AutoModel.from_pretrained(vit_model_name).to(device)
             print(f"Loaded {vit_model_name} as AutoModel.")
        except Exception as e:
             print(f"Error loading {vit_model_name} with AutoModel: {e}")
             print("Ensure the model checkpoint is compatible or try a different loading strategy.")
             raise e

        self.model.eval ()
        print(f"Feature extractor model loaded and set to evaluation mode.")

    def extract_features (self, dataloader):
        features = []
        labels = []
        print(f"Extracting features for {len(dataloader.dataset)} samples...")
        with torch.no_grad ():
            for inputs, targets in tqdm(dataloader, desc="Extracting Features"):
                inputs = inputs.to (device)
                outputs = self.model (inputs)

                if hasattr(outputs, 'last_hidden_state') and outputs.last_hidden_state is not None:
                    cls_tokens = outputs.last_hidden_state[:, 0, :]
                elif hasattr(outputs, 'pooler_output') and outputs.pooler_output is not None:
                    cls_tokens = outputs.pooler_output
                else:
                    try:
                         cls_tokens = outputs[0][:, 0, :]
                    except Exception as e:
                         print(f"Error accessing fallback output structure: {e}")
                         print("Model output structure is unexpected. Check model documentation or model type.")
                         raise e


                # Move para CPU e converte para numpy
                features.append (cls_tokens.cpu ().numpy ())
                labels.append (targets.numpy ())

        return np.vstack (features), np.hstack (labels)


def evaluate_metrics(y_true, y_pred, class_names=None, num_classes=None):
    """Calculates various classification metrics (including macro and weighted) and CM components."""
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)


    unique_labels, counts = np.unique(y_true, return_counts=True)

    weights = 1.0 / counts

    class_weight_map = {label: weight for label, weight in zip(unique_labels, weights)}

    sample_weights = np.array([class_weight_map.get(label, 0.0) for label in y_true])

    if np.sum(sample_weights) > 0:
       acc_weighted_sample = accuracy_score(y_true, y_pred, sample_weight=sample_weights)
    else:
       print("Warning: Cannot calculate sample-weighted accuracy, sum of sample weights is zero.")
       acc_weighted_sample = np.nan


    unique_classes_true = np.unique(y_true)
    unique_classes_pred = np.unique(y_pred)
    combined_unique_classes = np.unique(np.concatenate((unique_classes_true, unique_classes_pred)))

    precision_macro, recall_macro, f1_macro = np.nan, np.nan, np.nan
    precision_weighted, recall_weighted, f1_weighted = np.nan, np.nan, np.nan


    if len(combined_unique_classes) >= 2:
        try:
            precision_macro = precision_score(y_true, y_pred, average="macro", zero_division=0)
            recall_macro = recall_score(y_true, y_pred, average="macro", zero_division=0)
            f1_macro = f1_score(y_true, y_pred, average="macro", zero_division=0)

            precision_weighted = precision_score(y_true, y_pred, average="weighted", zero_division=0)
            recall_weighted = recall_score(y_true, y_pred, average="weighted", zero_division=0)
            f1_weighted = f1_score(y_true, y_pred, average="weighted", zero_division=0)

        except ValueError as e:
             print(f"Warning: Could not calculate average metrics (macro/weighted) due to ValueError: {e}")
             pass


    all_possible_labels = np.arange(num_classes) if num_classes is not None else np.unique(np.concatenate((y_true, y_pred)))
    all_possible_labels = np.sort(all_possible_labels)

    try:
        cm = confusion_matrix(y_true, y_pred, labels=all_possible_labels)
    except ValueError as e:
        print(f"Error calculating confusion matrix: {e}")
        print(f"y_true unique: {np.unique(y_true)}")
        print(f"y_pred unique: {np.unique(y_pred)}")
        print(f"all_possible_labels: {all_possible_labels}")
        cm = np.array([])


    n_classes_cm = cm.shape[0] if cm.ndim == 2 else 0
    cm_metrics_per_class = {}

    if n_classes_cm > 0:
        current_class_names = class_names if class_names is not None and len(class_names) == n_classes_cm else [f'class_{i}' for i in range(n_classes_cm)]

        total_samples = np.sum(cm)
        for i in range(n_classes_cm):
            class_label = current_class_names[i]

            TP_i = cm[i, i]
            FN_i = np.sum(cm[i, :]) - TP_i
            FP_i = np.sum(cm[:, i]) - TP_i
            TN_i = total_samples - TP_i - FN_i - FP_i

            cm_metrics_per_class[class_label] = {'TP': int(TP_i), 'FN': int(FN_i), 'FP': int(FP_i), 'TN': int(TN_i)}


    return {
        "accuracy": float(acc_weighted_sample) if not np.isnan(acc_weighted_sample) else None,
        "precision_macro": float(precision_macro) if precision_macro is not np.nan else None,
        "recall_macro": float(recall_macro) if recall_macro is not np.nan else None,
        "f1_macro": float(f1_macro) if f1_macro is not np.nan else None,
        "precision_weighted": float(precision_weighted) if precision_weighted is not np.nan else None,
        "recall_weighted": float(recall_weighted) if recall_weighted is not np.nan else None,
        "f1_weighted": float(f1_weighted) if f1_weighted is not np.nan else None,
        "confusion_matrix": cm.tolist() if cm.ndim == 2 else None,
        "cm_metrics_per_class": cm_metrics_per_class
    }



# Instanciando Extratores de Caracteristicas baseados em ViT Models
vit_extractors = {
    "DINO": FeatureExtractor("facebook/dino-vitb8"),
    # "ViT-Base": FeatureExtractor("google/vit-base-patch16-224"),
    # "ViT-Large": FeatureExtractor("google/vit-large-patch16-224"),
}

# Instanciando Classificadores (using sklearn class objects directly)
classifiers_map = {
    # 'SVM': SVC,
    # 'MLP': MLPClassifier,
    # 'RandomForest': RandomForestClassifier,
    # 'KNN': KNeighborsClassifier,
    'EnergyBasedFlow': EnergyBasedFlowClassifier,
}

# --- 1. Extraindo caracteristas dos conjuntos Full Train e Test ---
print("\n" + "="*60)
print("--- Initial Feature Extraction (Full Train and Test) ---")
print("="*60)
feature_data = {}

for extractor_name, extractor in vit_extractors.items():
    print(f"\nUsing extractor: {extractor_name}")
    full_train_loader_for_extraction = DataLoader(train_dataset, batch_size=64, shuffle=False)
    test_loader_for_extraction = DataLoader(test_dataset, batch_size=64, shuffle=False)

    X_train_full, y_train_full = extractor.extract_features(full_train_loader_for_extraction)
    X_test_full, y_test_full = extractor.extract_features(test_loader_for_extraction)

    feature_data[extractor_name] = {
        'X_train_full': X_train_full,
        'y_train_full': y_train_full,
        'X_test_full': X_test_full,
        'y_test_full': y_test_full,
    }
    print(f"Extracted features shape (Train): {X_train_full.shape}")
    print(f"Extracted features shape (Test): {X_test_full.shape}")


del full_train_loader_for_extraction, test_loader_for_extraction
torch.cuda.empty_cache()


# --- 2. K-Fold : Validação cruzada nos dados de treino ---
print("\n" + "="*60)
print("--- Starting K-Fold Cross-Validation on Training Data ---")
print("="*60)
kfold_results = []

for extractor_name, data in feature_data.items():
    X_train_full = data['X_train_full']
    y_train_full = data['y_train_full']

    print(f"\n--- K-Fold for Extractor: {extractor_name} ---")

    kf_split_indices = list(kf.split(X_train_full))


    for fold_idx, (train_indices_fold, val_indices_fold) in enumerate(kf_split_indices):
        print(f"\nProcessing Fold {fold_idx + 1}/{k_folds}")

        X_train_fold = X_train_full[train_indices_fold]
        y_train_fold = y_train_full[train_indices_fold]
        X_val_fold = X_train_full[val_indices_fold]
        y_val_fold = y_train_full[val_indices_fold]

        for clf_name, clf_class in classifiers_map.items():
            clf_hyperparameters = hyperparameters.get(clf_class.__name__, [])

            if not clf_hyperparameters:
                 continue

            if clf_name == 'EnergyBasedFlowClassifier':
                device = torch.device ("cpu")


            for params in tqdm(clf_hyperparameters, desc=f"  Fold {fold_idx+1} {clf_name} Params"):
                try:
                    clf = clf_class(**params)
                    clf.fit(X_train_fold, y_train_fold)
                    y_pred_fold = clf.predict(X_val_fold)

                    metrics = evaluate_metrics(y_val_fold, y_pred_fold, class_names=class_names, num_classes=num_classes)

                    kfold_results.append({
                        'extractor': extractor_name,
                        'fold_idx': fold_idx + 1,
                        'classifier': clf_name,
                        'params': params,
                        'metrics': metrics
                    })

                except Exception as e:
                     print(f"\nError training/predicting {clf_name} with params {params} in fold {fold_idx+1}: {e}")
                     kfold_results.append({
                         'extractor': extractor_name,
                         'fold_idx': fold_idx + 1,
                         'classifier': clf_name,
                         'params': params,
                         'error': str(e)
                     })

print("\n--- K-Fold Cross-Validation Finished ---")
print(f"Total K-Fold results stored (including errors): {len(kfold_results)}")


# --- 3. Analisa os resultados do K-Fold e seleciona os melhores hiperparametros ---
print("\n" + "="*60)
print("--- Analyzing K-Fold Results and Selecting Best Models ---")
print("="*60)

aggregated_kfold_metrics = defaultdict(lambda: defaultdict(list))

params_key_to_dict = {}

for result in kfold_results:
    if 'error' in result:
        continue

    extractor = result['extractor']
    classifier = result['classifier']

    try:
         params_key = json.dumps(result['params'], sort_keys=True)
         params_key_to_dict[params_key] = result['params']
    except TypeError:
         params_key = str(result['params'])
         params_key_to_dict[params_key] = result['params']


    for metric_name, metric_value in result['metrics'].items():

        if metric_name not in ['confusion_matrix', 'cm_metrics_per_class']:
            if isinstance(metric_value, (int, float)) or (metric_value is None):
                 if metric_value is not None:
                    aggregated_kfold_metrics[(extractor, classifier, params_key)][metric_name].append(metric_value)


average_kfold_metrics = {}
best_params_per_clf_extractor = {}

selection_metric = 'accuracy'

print(f"Selecting best hyperparameters based on average '{selection_metric}' across folds.")

for (extractor, classifier, params_key), metrics_list_dict in aggregated_kfold_metrics.items():
    average_metrics = {
        metric_name: np.nanmean(metric_values) if metric_values else np.nan
        for metric_name, metric_values in metrics_list_dict.items()
    }

    average_kfold_metrics[(extractor, classifier, params_key)] = average_metrics

    current_best_info = best_params_per_clf_extractor.get((extractor, classifier))
    current_combination_score = average_metrics.get(selection_metric, np.nan)

    if current_best_info is None or np.isnan(current_best_info.get('avg_score', np.nan)) or (not np.isnan(current_combination_score) and current_combination_score > current_best_info.get('avg_score', np.nan)):
        original_params = params_key_to_dict.get(params_key, {})
        best_params_per_clf_extractor[(extractor, classifier)] = {
            'params': original_params,
            'avg_score': current_combination_score
        }


print("\n--- K-Fold Average Metrics Summary (Best per Classifier/Extractor Combo) ---")

sorted_best_kfold = sorted(best_params_per_clf_extractor.items(),
                           key=lambda item: item[1].get('avg_score',
                            -float('inf')) if not np.isnan(item[1].get('avg_score',
                            -float('inf'))) else -float('inf'),
                           reverse=True)

for (extractor, classifier), best_info in sorted_best_kfold:
     best_params = best_info['params']
     avg_score = best_info['avg_score']

     try:
         best_params_key = json.dumps(best_params, sort_keys=True)
     except TypeError:
         best_params_key = str(best_params)


     full_avg_metrics = average_kfold_metrics.get((extractor, classifier, best_params_key), {})

     print(f"\nBest K-Fold Result for Extractor: {extractor}, Classifier: {classifier}")
     print(f"  Selected Params: {best_params}")
     print(f"  Avg Accuracy (Sample Weighted): {avg_score:.4f}" if not np.isnan(avg_score) else f"  Avg Accuracy (Sample Weighted): N/A (all folds failed or metric NaN)")

     print(f"  Avg Metrics (Macro): Precision={full_avg_metrics.get('precision_macro', np.nan):.4f}, Recall={full_avg_metrics.get('recall_macro', np.nan):.4f}, F1={full_avg_metrics.get('f1_macro', np.nan):.4f}")
     print(f"  Avg Metrics (Weighted): Precision={full_avg_metrics.get('precision_weighted', np.nan):.4f}, Recall={full_avg_metrics.get('recall_weighted', np.nan):.4f}, F1={full_avg_metrics.get('f1_weighted', np.nan):.4f}")


# --- 4. Avaliação Final no conjunto Test usando o melhores hiperparametros ---
print("\n" + "="*60)
print("--- Starting Final Evaluation on Test Set (Using Selected Best Models) ---")
print("="*60)
final_test_results = []

for extractor_name, data in feature_data.items():
    X_train_full = data['X_train_full']
    y_train_full = data['y_train_full']
    X_test_full = data['X_test_full']
    y_test_full = data['y_test_full']

    print(f"\n--- Testing with Extractor: {extractor_name} ---")

    for clf_name, clf_class in classifiers_map.items():
         best_info = best_params_per_clf_extractor.get((extractor_name, clf_name))

         if best_info is None or best_info.get('params') is None or np.isnan(best_info.get('avg_score', np.nan)):
              print(f"  No valid best parameters found for {clf_name} with {extractor_name} (or avg K-Fold score was NaN). Skipping final test.")

              final_test_results.append({
                  'extractor': extractor_name,
                  'classifier': clf_name,
                  'params': best_info.get('params', None) if best_info else None,
                  'error': f"Skipped: No valid best parameters found from K-Fold (avg {selection_metric}={best_info.get('avg_score', np.nan)})" if best_info else "Skipped: No best parameters found"
              })
              continue

         params = best_info['params']
         print(f"  Testing {clf_name} with Selected Best Params: {params}")

         try:
            clf = clf_class(**params)
            clf.fit(X_train_full, y_train_full)

            y_pred_test = clf.predict(X_test_full)

            test_metrics = evaluate_metrics(y_test_full, y_pred_test, class_names=class_names, num_classes=num_classes)

            final_test_results.append({
                'extractor': extractor_name,
                'classifier': clf_name,
                'params': params,
                'metrics': test_metrics,
                'y_true': y_test_full.tolist(),
                'y_pred': y_pred_test.tolist()
            })
         except Exception as e:
             print(f"\nError training/predicting selected {clf_name} with params {params} on test set: {e}")
             final_test_results.append({
                 'extractor': extractor_name,
                 'classifier': clf_name,
                 'params': params,
                 'error': str(e)
             })

del X_train_full, y_train_full
torch.cuda.empty_cache()

print("\n--- Final Evaluation on Test Set Finished ---")
print(f"Total Final Test results stored (selected models): {len(final_test_results)}")


# --- 5. Exibe o resultado final ---
print("\n" + "="*60)
print("--- Summarizing Final Test Results (Selected Models) ---")
print("="*60)

if not final_test_results:
    print("No final test results to display.")
else:
    for result in final_test_results:
        print("\n" + "=" * 60)
        print(f"Extractor: {result.get('extractor', 'N/A')}")
        print(f"Classifier: {result.get('classifier', 'N/A')}")
        print(f"Parameters (Selected by K-Fold): {result.get('params', 'N/A')}")
        print("-" * 60)

        if 'error' in result:
             print(f"Error during final test: {result['error']}")
             continue

        metrics = result['metrics']

        print(f"  Metrics on Test Set:")
        acc_weighted_sample = metrics.get('accuracy')
        prec_m = metrics.get('precision_macro')
        rec_m = metrics.get('recall_macro')
        f1_m = metrics.get('f1_macro')
        prec_w = metrics.get('precision_weighted')
        rec_w = metrics.get('recall_weighted')
        f1_w = metrics.get('f1_weighted')


        print(f"    Accuracy (Sample Weighted): {acc_weighted_sample:.4f}" if acc_weighted_sample is not None else "    Accuracy (Sample Weighted): N/A")
        print(f"    Precision (macro): {prec_m:.4f}" if prec_m is not None else "    Precision (macro): N/A")
        print(f"    Recall (macro): {rec_m:.4f}" if rec_m is not None else "    Recall (macro): N/A")
        print(f"    F1-Score (macro): {f1_m:.4f}" if f1_m is not None else "    F1-Score (macro): N/A")
        print(f"    Precision (weighted): {prec_w:.4f}" if prec_w is not None else "    Precision (weighted): N/A")
        print(f"    Recall (weighted): {rec_w:.4f}" if rec_w is not None else "    Recall (weighted): N/A")
        print(f"    F1-Score (weighted): {f1_w:.4f}" if f1_w is not None else "    F1-Score (weighted): N/A")


        cm_list = metrics.get('confusion_matrix')
        if cm_list is not None and class_names is not None:
            cm_np = np.array(cm_list)

            fig_size = max(6, num_classes * 0.7)
            plt.figure(figsize=(fig_size, fig_size * 0.8))

            sns.heatmap(cm_np,
                        annot=True, fmt='d', cmap='Blues',
                        xticklabels=class_names, yticklabels=class_names)
            plt.xlabel('Predicted Label')
            plt.ylabel('True Label')

            params_dict_for_title = result.get('params', {})
            params_str_title = ", ".join([f"{k}={v}" for k, v in params_dict_for_title.items()])
            if len(params_str_title) > 50:
                params_str_title = params_str_title[:47] + "..."

            plt.title(f'Confusion Matrix: {result["classifier"]} ({result["extractor"]})\nParams: {params_str_title}')
            plt.tight_layout()
            plt.show()
        elif cm_list is not None:
             print("\n  Confusion Matrix (cannot plot without class names or CM data):")
             print(np.array(cm_list) if cm_list else "N/A")
        else:
             print("\n  Confusion Matrix data not available.")


        cm_metrics_per_class = metrics.get('cm_metrics_per_class')
        if cm_metrics_per_class:
            print("\n  Per-Class TP/TN/FP/FN on Test Set:")
            sorted_class_labels = sorted(cm_metrics_per_class.keys(), key=lambda x: class_names.index(x) if class_names and x in class_names else x)

            for class_label in sorted_class_labels:
                cm_vals = cm_metrics_per_class.get(class_label, {})
                tp = cm_vals.get('TP', 'N/A')
                tn = cm_vals.get('TN', 'N/A')
                fp = cm_vals.get('FP', 'N/A')
                fn = cm_vals.get('FN', 'N/A')
                print(f"    {class_label}: TP={tp}, TN={tn}, FP={fp}, FN={fn}")
        else:
             print("\n  Per-Class metrics not available.")


        print("=" * 60)

print("\n--- All Final Test Results Displayed ---")

# --- 6. Seleciona e exibe o melhor resultado geral ---
print("\n" + "="*60)
print("--- Overall Best Result on Test Set ---")
print("="*60)

best_result_entry = None
highest_sample_weighted_accuracy = -1.0

for result in final_test_results:

    if 'error' in result or result.get('metrics') is None:
        continue

    current_accuracy = result['metrics'].get('accuracy')

    if current_accuracy is not None and not np.isnan(current_accuracy):
        if best_result_entry is None or current_accuracy > highest_sample_weighted_accuracy:
            highest_sample_weighted_accuracy = current_accuracy
            best_result_entry = result

if best_result_entry:
    print(f"\nOverall Best Model configuration based on Test Set Sample-Weighted Accuracy ({highest_sample_weighted_accuracy:.4f}):")
    print("-" * 60)
    print(f"Extractor: {best_result_entry['extractor']}")
    print(f"Classifier: {best_result_entry['classifier']}")
    print(f"Parameters (Selected by K-Fold): {best_result_entry['params']}")
    print("-" * 60)
    print("Metrics on Test Set for the Best Model:")

    metrics = best_result_entry['metrics']
    acc_weighted_sample = metrics.get('accuracy')
    prec_m = metrics.get('precision_macro')
    rec_m = metrics.get('recall_macro')
    f1_m = metrics.get('f1_macro')
    prec_w = metrics.get('precision_weighted')
    rec_w = metrics.get('recall_weighted')
    f1_w = metrics.get('f1_weighted')

    print(f"  Accuracy (Sample Weighted): {acc_weighted_sample:.4f}" if acc_weighted_sample is not None else "  Accuracy (Sample Weighted): N/A")
    print(f"  Precision (macro): {prec_m:.4f}" if prec_m is not None else "  Precision (macro): N/A")
    print(f"  Recall (macro): {rec_m:.4f}" if rec_m is not None else "  Recall (macro): N/A")
    print(f"  F1-Score (macro): {f1_m:.4f}" if f1_m is not None else "  F1-Score (macro): N/A")
    print(f"  Precision (weighted): {prec_w:.4f}" if prec_w is not None else "  Precision (weighted): N/A")
    print(f"  Recall (weighted): {rec_w:.4f}" if rec_w is not None else "  Recall (weighted): N/A")
    print(f"  F1-Score (weighted): {f1_w:.4f}" if f1_w is not None else "  F1-Score (weighted): N/A")


    cm_list = metrics.get('confusion_matrix')
    if cm_list is not None and class_names is not None:
        cm_np = np.array(cm_list)

        fig_size = max(6, num_classes * 0.7)
        plt.figure(figsize=(fig_size, fig_size * 0.8))

        sns.heatmap(cm_np,
                    annot=True, fmt='d', cmap='Blues',
                    xticklabels=class_names, yticklabels=class_names)
        plt.xlabel('Predicted Label')
        plt.ylabel('True Label')

        params_dict_for_title = best_result_entry.get('params', {})
        params_str_title = ", ".join([f"{k}={v}" for k, v in params_dict_for_title.items()])
        if len(params_str_title) > 50:
            params_str_title = params_str_title[:47] + "..."

        plt.title(f'Confusion Matrix: {best_result_entry["classifier"]} ({best_result_entry["extractor"]})\nParams: {params_str_title}\nTest Set (Best Model)')
        plt.tight_layout()
        plt.show()
    elif cm_list is not None:
         print("\n  Confusion Matrix (numerical data for best model):")
         print(np.array(cm_list) if cm_list else "N/A")
    else:
         print("\n  Confusion Matrix data not available for the best model.")


else:
    print("Could not determine the overall best result (perhaps all runs failed or had no valid sample-weighted accuracy metric).")

print("="*60)


# --- 7. Salva os resutados no arquivo de texto ---
print("\n" + "="*60)
print("--- Saving Results to File ---")
print("="*60)

output_filename = "evaluation_results.txt"

try:
    with open(output_filename, "w") as f:
        f.write("="*80 + "\n")
        f.write("      ViT + Traditional Classifier Evaluation Results\n")
        f.write("="*80 + "\n\n")

        f.write("="*60 + "\n")
        f.write("--- K-Fold Cross-Validation Summary (Best Params per Combo) ---\n")
        f.write("="*60 + "\n\n")

        if not sorted_best_kfold:
            f.write("No K-Fold results available.\n\n")
        else:
            for (extractor, classifier), best_info in sorted_best_kfold:
                best_params = best_info['params']
                avg_score = best_info['avg_score']

                try:
                    best_params_key = json.dumps(best_params, sort_keys=True)
                except TypeError:
                    best_params_key = str(best_params)

                full_avg_metrics = average_kfold_metrics.get((extractor, classifier, best_params_key), {})

                f.write(f"Extractor: {extractor}\n")
                f.write(f"Classifier: {classifier}\n")
                f.write(f"  Selected Params: {best_params}\n")
                f.write(f"  Avg Accuracy (Sample Weighted): {avg_score:.4f}\n" if not np.isnan(avg_score) else f"  Avg Accuracy (Sample Weighted): N/A\n")

                f.write(f"  Avg Metrics (Macro): Precision={full_avg_metrics.get('precision_macro', np.nan):.4f}, Recall={full_avg_metrics.get('recall_macro', np.nan):.4f}, F1={full_avg_metrics.get('f1_macro', np.nan):.4f}\n")
                f.write(f"  Avg Metrics (Weighted): Precision={full_avg_metrics.get('precision_weighted', np.nan):.4f}, Recall={full_avg_metrics.get('recall_weighted', np.nan):.4f}, F1={full_avg_metrics.get('f1_weighted', np.nan):.4f}\n")
                f.write("-" * 30 + "\n")
            f.write("\n")


        f.write("="*60 + "\n")
        f.write("--- Final Evaluation Results on Test Set ---\n")
        f.write("="*60 + "\n\n")

        if not final_test_results:
            f.write("No final test results available.\n\n")
        else:
            for i, result in enumerate(final_test_results):
                f.write(f"Result {i+1}:\n")
                f.write(f"  Extractor: {result.get('extractor', 'N/A')}\n")
                f.write(f"  Classifier: {result.get('classifier', 'N/A')}\n")
                f.write(f"  Parameters: {result.get('params', 'N/A')}\n")

                if 'error' in result:
                    f.write(f"  Status: ERROR - {result['error']}\n")
                else:
                    metrics = result['metrics']
                    f.write("  Status: Success\n")
                    f.write("  Metrics on Test Set:\n")
                    acc_weighted_sample = metrics.get('accuracy')
                    prec_m = metrics.get('precision_macro')
                    rec_m = metrics.get('recall_macro')
                    f1_m = metrics.get('f1_macro')
                    prec_w = metrics.get('precision_weighted')
                    rec_w = metrics.get('recall_weighted')
                    f1_w = metrics.get('f1_weighted')

                    f.write(f"    Accuracy (Sample Weighted): {acc_weighted_sample:.4f}\n" if acc_weighted_sample is not None else "    Accuracy (Sample Weighted): N/A\n")
                    f.write(f"    Precision (macro): {prec_m:.4f}\n" if prec_m is not None else "    Precision (macro): N/A\n")
                    f.write(f"    Recall (macro): {rec_m:.4f}\n" if rec_m is not None else "    Recall (macro): N/A\n")
                    f.write(f"    F1-Score (macro): {f1_m:.4f}\n" if f1_m is not None else "    F1-Score (macro): N/A\n")
                    f.write(f"    Precision (weighted): {prec_w:.4f}\n" if prec_w is not None else "    Precision (weighted): N/A\n")
                    f.write(f"    Recall (weighted): {rec_w:.4f}\n" if rec_w is not None else "    Recall (weighted): N/A\n")
                    f.write(f"    F1-Score (weighted): {f1_w:.4f}\n" if f1_w is not None else "    F1-Score (weighted): N/A\n")


                    cm_list = metrics.get('confusion_matrix')
                    if cm_list is not None:
                        f.write("\n  Confusion Matrix (Rows: True Label, Columns: Predicted Label):\n")
                        cm_np = np.array(cm_list)
                        f.write(np.array2string(cm_np, separator=', ', threshold=np.inf, precision=0, suppress_small=True) + "\n")
                        if class_names is not None:
                             f.write("  Labels (in order): " + ", ".join(class_names) + "\n")
                        else:
                             f.write("  Labels: N/A (Class names not available)\n")
                        f.write("  (Plots are generated interactively or saved separately)\n")
                    else:
                        f.write("\n  Confusion Matrix data not available.\n")


                    cm_metrics_per_class = metrics.get('cm_metrics_per_class')
                    if cm_metrics_per_class:
                        f.write("\n  Per-Class TP/TN/FP/FN on Test Set:\n")
                        sorted_class_labels = sorted(cm_metrics_per_class.keys(), key=lambda x: class_names.index(x) if class_names and x in class_names else x)
                        for class_label in sorted_class_labels:
                            cm_vals = cm_metrics_per_class.get(class_label, {})
                            tp = cm_vals.get('TP', 'N/A')
                            tn = cm_vals.get('TN', 'N/A')
                            fp = cm_vals.get('FP', 'N/A')
                            fn = cm_vals.get('FN', 'N/A')
                            f.write(f"    {class_label}: TP={tp}, TN={tn}, FP={fp}, FN={fn}\n")
                    else:
                        f.write("\n  Per-Class metrics not available.\n")

                f.write("-" * 40 + "\n\n")

        f.write("="*60 + "\n")
        f.write("--- Overall Best Result on Test Set (by Sample-Weighted Accuracy) ---\n")
        f.write("="*60 + "\n\n")

        if best_result_entry:
            f.write(f"Overall Best Model configuration based on Test Set Sample-Weighted Accuracy ({highest_sample_weighted_accuracy:.4f}):\n")
            f.write("-" * 60 + "\n")
            f.write(f"Extractor: {best_result_entry['extractor']}\n")
            f.write(f"Classifier: {best_result_entry['classifier']}\n")
            f.write(f"Parameters (Selected by K-Fold): {best_result_entry['params']}\n")
            f.write("-" * 60 + "\n")
            f.write("Metrics on Test Set for the Best Model:\n")

            metrics = best_result_entry['metrics']
            acc_weighted_sample = metrics.get('accuracy')
            prec_m = metrics.get('precision_macro')
            rec_m = metrics.get('recall_macro')
            f1_m = metrics.get('f1_macro')
            prec_w = metrics.get('precision_weighted')
            rec_w = metrics.get('recall_weighted')
            f1_w = metrics.get('f1_weighted')

            f.write(f"  Accuracy (Sample Weighted): {acc_weighted_sample:.4f}\n" if acc_weighted_sample is not None else "  Accuracy (Sample Weighted): N/A\n")
            f.write(f"  Precision (macro): {prec_m:.4f}\n" if prec_m is not None else "  Precision (macro): N/A\n")
            f.write(f"  Recall (macro): {rec_m:.4f}\n" if rec_m is not None else "  Recall (macro): N/A\n")
            f.write(f"  F1-Score (macro): {f1_m:.4f}\n" if f1_m is not None else "  F1-Score (macro): N/A\n")
            f.write(f"  Precision (weighted): {prec_w:.4f}\n" if prec_w is not None else "  Precision (weighted): N/A\n")
            f.write(f"  Recall (weighted): {rec_w:.4f}\n" if rec_w is not None else "  Recall (weighted): N/A\n")
            f.write(f"  F1-Score (weighted): {f1_w:.4f}\n" if f1_w is not None else "  F1-Score (weighted): N/A\n")

            cm_list = metrics.get('confusion_matrix')
            if cm_list is not None:
                f.write("\n  Confusion Matrix (Rows: True Label, Columns: Predicted Label):\n")
                cm_np = np.array(cm_list)
                f.write(np.array2string(cm_np, separator=', ', threshold=np.inf, precision=0, suppress_small=True) + "\n")
                if class_names is not None:
                    f.write("  Labels (in order): " + ", ".join(class_names) + "\n")
                else:
                    f.write("  Labels: N/A (Class names not available)\n")
                f.write("  (Plot generated interactively or saved separately)\n")
            else:
                f.write("\n  Confusion Matrix data not available.\n")


            cm_metrics_per_class = metrics.get('cm_metrics_per_class')
            if cm_metrics_per_class:
                f.write("\n  Per-Class TP/TN/FP/FN on Test Set:\n")
                sorted_class_labels = sorted(cm_metrics_per_class.keys(), key=lambda x: class_names.index(x) if class_names and x in class_names else x)
                for class_label in sorted_class_labels:
                    cm_vals = cm_metrics_per_class.get(class_label, {})
                    tp = cm_vals.get('TP', 'N/A')
                    tn = cm_vals.get('TN', 'N/A')
                    fp = cm_vals.get('FP', 'N/A')
                    fn = cm_vals.get('FN', 'N/A')
                    f.write(f"    {class_label}: TP={tp}, TN={tn}, FP={fp}, FN={fn}\n")
            else:
                f.write("\n  Per-Class metrics not available.\n")

        else:
            f.write("Overall best result could not be determined.\n")

        f.write("\n" + "="*80 + "\n")
        f.write("              End of Results\n")
        f.write("="*80 + "\n")


    print(f"Results successfully saved to {output_filename}")

except Exception as e:
    print(f"\nError saving results to file {output_filename}: {e}")

Usando o dispositivo: cuda
Downloading dataset...
Train dataset size: 2073
Test dataset size: 518
Classes: ['Anabaena', 'Aphanizomenon', 'Cylindrospermopsis', 'Dolichospermum', 'Microcystis', 'Nostoc', 'Oscillatoria', 'Phormidium', 'Planktothrix', 'Raphidiopsis']
Number of classes: 10


Some weights of ViTModel were not initialized from the model checkpoint at facebook/dino-vitb8 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loaded facebook/dino-vitb8 as AutoModel.
Feature extractor model loaded and set to evaluation mode.

--- Initial Feature Extraction (Full Train and Test) ---

Using extractor: DINO
Extracting features for 2073 samples...


Extracting Features: 100%|██████████| 33/33 [00:23<00:00,  1.40it/s]


Extracting features for 518 samples...


Extracting Features: 100%|██████████| 9/9 [00:05<00:00,  1.57it/s]


Extracted features shape (Train): (2073, 768)
Extracted features shape (Test): (518, 768)

--- Starting K-Fold Cross-Validation on Training Data ---

--- K-Fold for Extractor: DINO ---

Processing Fold 1/5


  Fold 1 EnergyBasedFlow Params: 100%|██████████| 3/3 [00:00<00:00, 26829.24it/s]



Error training/predicting EnergyBasedFlow with params {'n_bins': 30, 'cutoff_quantile': 0.95, 'random_state': 42} in fold 1: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

Error training/predicting EnergyBasedFlow with params {'n_bins': 50, 'cutoff_quantile': 0.97, 'random_state': 42} in fold 1: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

Error training/predicting EnergyBasedFlow with params {'n_bins': 100, 'cutoff_quantile': 0.99, 'random_state': 42} in fold 1: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

Processing Fold 2/5


  Fold 2 EnergyBasedFlow Params: 100%|██████████| 3/3 [00:00<00:00, 18950.17it/s]



Error training/predicting EnergyBasedFlow with params {'n_bins': 30, 'cutoff_quantile': 0.95, 'random_state': 42} in fold 2: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

Error training/predicting EnergyBasedFlow with params {'n_bins': 50, 'cutoff_quantile': 0.97, 'random_state': 42} in fold 2: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

Error training/predicting EnergyBasedFlow with params {'n_bins': 100, 'cutoff_quantile': 0.99, 'random_state': 42} in fold 2: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

Processing Fold 3/5


  Fold 3 EnergyBasedFlow Params: 100%|██████████| 3/3 [00:00<00:00, 31378.83it/s]



Error training/predicting EnergyBasedFlow with params {'n_bins': 30, 'cutoff_quantile': 0.95, 'random_state': 42} in fold 3: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

Error training/predicting EnergyBasedFlow with params {'n_bins': 50, 'cutoff_quantile': 0.97, 'random_state': 42} in fold 3: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

Error training/predicting EnergyBasedFlow with params {'n_bins': 100, 'cutoff_quantile': 0.99, 'random_state': 42} in fold 3: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

Processing Fold 4/5


  Fold 4 EnergyBasedFlow Params: 100%|██████████| 3/3 [00:00<00:00, 36261.99it/s]



Error training/predicting EnergyBasedFlow with params {'n_bins': 30, 'cutoff_quantile': 0.95, 'random_state': 42} in fold 4: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

Error training/predicting EnergyBasedFlow with params {'n_bins': 50, 'cutoff_quantile': 0.97, 'random_state': 42} in fold 4: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

Error training/predicting EnergyBasedFlow with params {'n_bins': 100, 'cutoff_quantile': 0.99, 'random_state': 42} in fold 4: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

Processing Fold 5/5


  Fold 5 EnergyBasedFlow Params: 100%|██████████| 3/3 [00:00<00:00, 33734.35it/s]


Error training/predicting EnergyBasedFlow with params {'n_bins': 30, 'cutoff_quantile': 0.95, 'random_state': 42} in fold 5: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

Error training/predicting EnergyBasedFlow with params {'n_bins': 50, 'cutoff_quantile': 0.97, 'random_state': 42} in fold 5: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

Error training/predicting EnergyBasedFlow with params {'n_bins': 100, 'cutoff_quantile': 0.99, 'random_state': 42} in fold 5: EnergyBasedFlowClassifier.__init__() got an unexpected keyword argument 'random_state'

--- K-Fold Cross-Validation Finished ---
Total K-Fold results stored (including errors): 15

--- Analyzing K-Fold Results and Selecting Best Models ---
Selecting best hyperparameters based on average 'accuracy' across folds.

--- K-Fold Average Metrics Summary (Best per Classifier/Extractor Combo) ---

--- Starting Final Evaluation on Test Set (Using Select


