In [None]:
!nvidia-smi --query-gpu=timestamp,index,memory.used,memory.total,utilization.gpu --format=csv,noheader,nounits -l 10

In [None]:
import numpy as np
from argparse import ArgumentParser
from omegaconf import OmegaConf
import torch
import os
import glob

from torchvision.models import ResNet18_Weights, EfficientNet_V2_S_Weights
from torchvision import transforms
from torchvision.transforms import functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from sklearn.model_selection import KFold, StratifiedKFold
from torch.utils.data import Subset
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
from lightning import Trainer, Callback
from lightning.pytorch.loggers import TensorBoardLogger

from scripts.utils import visualize_image, letterbox_to_square
from scripts.EfficentRex import EfficentRex
from scripts.RexNet import RexNet

In [2]:

def evaluate_model(model, dataloader, idx_to_class, device="cpu"):
    model.eval()
    model.to(device)

    results = {
        "scores": [],
        "predicted_names": [],
        "real_names": [],
        "correct": [],
        "overall_accuracy": None,
    }

    total_correct = 0
    total_samples = 0

    with torch.no_grad():
        for x, y in dataloader:
            x = x.to(device)
            y = y.to(device)

            pred = model(x).softmax(1)
            predicted_class_id = pred.argmax(1)

            # Scores for predicted classes
            idx = torch.arange(len(predicted_class_id))
            score = pred[idx, predicted_class_id]

            # Per-image correctness
            correct = (predicted_class_id == y).float()

            # Move to CPU for Python conversion
            pred_cpu = predicted_class_id.cpu()
            y_cpu = y.cpu()
            score_cpu = score.cpu()
            correct_cpu = correct.cpu()

            # Append to results
            results["scores"].extend(score_cpu.tolist())
            results["predicted_names"].extend([idx_to_class[int(c)] for c in pred_cpu])
            results["real_names"].extend([idx_to_class[int(c)] for c in y_cpu])
            results["correct"].extend(correct_cpu.tolist())

            # Update aggregated accuracy
            total_correct += int(correct_cpu.sum().item())
            total_samples += len(y_cpu)

    # Final overall accuracy
    results["overall_accuracy"] = total_correct / total_samples

    return results

def run_evaluation(config_path, model_class, ckpt_path, evaluate_fn, device="cpu"):
    config = OmegaConf.load(config_path)

    model = model_class.load_from_checkpoint(
        ckpt_path,
        config=config.model,
        strict=False
    )
    model.eval()
    model.to(device)

    test_tfms = transforms.Compose([
        transforms.Lambda(lambda im: im.convert("RGB")),
        transforms.Lambda(lambda im: letterbox_to_square(im, size=config.size, fill=0)),
        transforms.CenterCrop(224),
        model.base_tfms,
    ])

    root = "./dataset/dataset"
    test_ds = datasets.ImageFolder(root=f"{root}/test", transform=test_tfms)

    idx_to_class = {idx: class_ for class_, idx in test_ds.class_to_idx.items()}

    test_loader = DataLoader(
        test_ds,
        batch_size=config.training.batch_size,
        shuffle=False,
        num_workers=0
    )

    results = evaluate_fn(model, test_loader, idx_to_class, device=device)

    return results, idx_to_class, model



In [None]:
for experiment_dir in os.listdir('./models/cross_validation'):
    exp_path = f'./models/cross_validation/{experiment_dir}'
    if not os.path.isdir(exp_path):
        continue
    if experiment_dir.startswith('grid'):
        continue
    print('=========================================================')
    print('=========================================================')
    print(f'- Experiment: {experiment_dir}')

    for network_dir in os.listdir(exp_path):
        net_path = f'{exp_path}/{network_dir}'
        if not os.path.isdir(net_path):
            continue

        print('=========================================================')
        print(f'    - Network: {network_dir}')

        fold_accuracies = []  # <-- collect per-fold accuracies here

        for fold_dir in os.listdir(net_path):
            if not fold_dir.startswith('fold'):
                continue

            print('-----------------------------------------------------')
            print(f'         - Fold: {fold_dir}')

            fold_path = f'{net_path}/{fold_dir}'
            ckpt = glob.glob(f"{fold_path}/best-epoch=*")
            if not ckpt:
                print("           No checkpoint found, skipping this fold.")
                continue

            config_path = f'config/config_{network_dir.lower()}.yaml'

            if network_dir.lower() == 'rexnet':
                model_class = RexNet
            elif network_dir.lower() == 'efficentrex':
                model_class = EfficentRex
            else:
                print(f"           Unknown network type '{network_dir}', skipping.")
                continue

            # NOTE: using your call signature for run_evaluation
            results, _, _ = run_evaluation(
                config_path,
                model_class,
                ckpt[0],
                evaluate_model,
                device="cpu"
            )

            acc = float(results["overall_accuracy"])
            fold_accuracies.append(acc)

            # print(f"           Overall Accuracy: {acc:.4f}")
            # print(f"           Number of samples: {len(results['scores'])}")

        # ---- After all folds for this network, compute mean/std ----
        if fold_accuracies:
            mean_acc = float(np.mean(fold_accuracies))
            std_acc = float(np.std(fold_accuracies, ddof=1)) if len(fold_accuracies) > 1 else 0.0

            print('=========================================================')
            print(f"    Aggregated results for network '{network_dir}':")
            print(f"        Folds: {len(fold_accuracies)}")
            print(f"        Mean accuracy: {mean_acc:.4f}")
            print(f"        Std accuracy:  {std_acc:.4f}")
        else:
            print(f"    No valid folds found for network '{network_dir}'.")


In [6]:
import os
import pickle
import numpy

def print_all_test_accuracies(root_dir="./models/cross_validation/grid_search_lr"):
    """
    Itera su tutte le combinazioni di iperparametri e fold sotto root_dir
    e, se trova un file test_results.pkl, stampa la overall_accuracy.
    """
    if not os.path.isdir(root_dir):
        print(f"Root dir '{root_dir}' non trovata.")
        return
    
    # ogni sottocartella di root_dir è una combinazione di iperparametri
    for model_name in sorted(os.listdir(root_dir)):
        model_dir = os.path.join(root_dir, model_name)
        if not os.path.isdir(model_dir):
            continue
        print('---------------------------------------------')
        
        accuracies = []
        # dentro ogni model_dir ci sono fold_x.y + tb_logs
        for fold_name in sorted(os.listdir(model_dir)):
            if fold_name == "tb_logs":
                continue

            fold_dir = os.path.join(model_dir, fold_name)
            if not os.path.isdir(fold_dir):
                continue

            pkl_path = os.path.join(fold_dir, "test_results.pkl")
            if not os.path.isfile(pkl_path):
                # nessun risultato per questo fold
                continue
            
            # carica il pickle
            with open(pkl_path, "rb") as f:
                results = pickle.load(f)
            
            acc = results.get("overall_accuracy", None)
            if acc is not None:
                try:
                    print(f"model={model_name} | fold={fold_name} | overall_accuracy={acc:.4f}")
                    accuracies.append(acc)
                except Exception:
                    # se non è un float, stampa grezzo
                    print(f"model={model_name} | fold={fold_name} | overall_accuracy={acc}")
            else:
                print(f"model={model_name} | fold={fold_name} | overall_accuracy=NOT FOUND")
        
        accuracies = numpy.array(accuracies)
        print(f"Mean: {accuracies.mean():.4f}, Std: {accuracies.std():.4f}")

print_all_test_accuracies("./models/cross_validation/grid_search_weight_decay")


---------------------------------------------
model=EfficentRex_firstlr0.003_fac10_wd10.0001_wd20.0001 | fold=fold_0.0 | overall_accuracy=0.9121
model=EfficentRex_firstlr0.003_fac10_wd10.0001_wd20.0001 | fold=fold_0.1 | overall_accuracy=0.9368
model=EfficentRex_firstlr0.003_fac10_wd10.0001_wd20.0001 | fold=fold_1.0 | overall_accuracy=0.9066
model=EfficentRex_firstlr0.003_fac10_wd10.0001_wd20.0001 | fold=fold_1.1 | overall_accuracy=0.9258
Mean: 0.9203, Std: 0.0118
---------------------------------------------
model=EfficentRex_firstlr0.003_fac10_wd10.0001_wd21e-05 | fold=fold_0.0 | overall_accuracy=0.9231
model=EfficentRex_firstlr0.003_fac10_wd10.0001_wd21e-05 | fold=fold_0.1 | overall_accuracy=0.9011
model=EfficentRex_firstlr0.003_fac10_wd10.0001_wd21e-05 | fold=fold_1.0 | overall_accuracy=0.9148
model=EfficentRex_firstlr0.003_fac10_wd10.0001_wd21e-05 | fold=fold_1.1 | overall_accuracy=0.8956
Mean: 0.9087, Std: 0.0109
---------------------------------------------
model=EfficentRex_firs