## Training models
Requires a zip file containing preprocessed images into *AN2DL_Challenge2-TheBigBatchTheory/data/processed*.

### Preamble
Drive connection, setup for fast loading, Github repo connection

In [1]:
from google.colab import drive

# 1. Mount Drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [None]:
import cv2
import numpy as np
import shutil
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import argparse
import zipfile

# =============================================================================
# --- 1. CONFIGURAZIONE GLOBALE ---
# =============================================================================
TILE_SIZES = {
    'context': 768,
    'detail': 256
}
MIN_ROI_AREA = 100

# =============================================================================
# --- 2. FUNZIONI DI UTILIT√Ä E I/O (Invariate) ---
# =============================================================================
def load_image_cv2(path: Path):
    return cv2.imdecode(np.fromfile(str(path), dtype=np.uint8), cv2.IMREAD_COLOR)

def load_mask_cv2(path: Path):
    return cv2.imdecode(np.fromfile(str(path), dtype=np.uint8), cv2.IMREAD_GRAYSCALE)


# =============================================================================
# --- 3. CONTROLLO QUALIT√Ä SULL'INTERA SLIDE (Le tue funzioni) ---
# =============================================================================
def contains_slime(img_bgr, threshold=50):
    hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
    lower_green = np.array([35, 50, 50])
    upper_green = np.array([90, 255, 255])
    mask_slime = cv2.inRange(hsv, lower_green, upper_green)
    return cv2.countNonZero(mask_slime) > threshold

def analyze_image_memory(img_bgr):
    if img_bgr is None: return "FAIL"
    hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
    mask_foreground = (hsv[:,:,1] > 15) & (hsv[:,:,2] < 250)
    foreground_pixels = np.count_nonzero(mask_foreground)
    if foreground_pixels < 100: return "SAFE"
    h_foreground = hsv[:,:,0][mask_foreground]
    count_tissue = np.count_nonzero((h_foreground >= 125) & (h_foreground <= 175))
    count_ink = np.count_nonzero((h_foreground >= 80) & (h_foreground < 125))
    count_shrek_skin = np.count_nonzero((h_foreground >= 20) & (h_foreground < 80))
    count_shrek_clothes = np.count_nonzero((h_foreground >= 10) & (h_foreground < 20))
    count_shrek_total = count_shrek_skin + count_shrek_clothes
    if count_tissue == 0: return "SHREK" if count_shrek_total > 0 else "SAFE"
    ratio_tissue, ratio_shrek = count_tissue/foreground_pixels, count_shrek_total/foreground_pixels
    shrek_dominance = count_shrek_total / count_tissue
    if (count_ink/foreground_pixels) > ratio_shrek and (count_ink/foreground_pixels) > 0.1: return "SAFE"
    if ratio_shrek > 0.4 and shrek_dominance > 4.0: return "SHREK"
    if ratio_tissue > 0.05: return "SAFE"
    if ratio_shrek > 0.3: return "SHREK"
    return "SAFE"

# =============================================================================
# --- 4. MOTORE DI TILING (Invariato dalla versione multi-modale) ---
# =============================================================================
def process_single_slide_multimodal(img_bgr, roi_mask, label, img_path, output_dirs, is_test_set=False):
    # ... (questa funzione rimane identica, dato che il QC √® ora nel loop principale)
    img_gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, tissue_mask = cv2.threshold(img_gray, 220, 255, cv2.THRESH_BINARY_INV)
    img_masked = img_bgr.copy()
    img_masked[tissue_mask == 0] = 255
    contours, _ = cv2.findContours(roi_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    tiles_data = []
    base_name = img_path.stem
    if not contours: return None
    for i, contour in enumerate(contours):
        if cv2.contourArea(contour) < MIN_ROI_AREA: continue
        M = cv2.moments(contour)
        if M["m00"] == 0: continue
        center_x, center_y = int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"])
        tile_name_base = f"{base_name}_roi{i}"
        for scale_name, tile_size in TILE_SIZES.items():
            half_tile = tile_size // 2
            y_start, y_end = center_y - half_tile, center_y + half_tile
            x_start, x_end = center_x - half_tile, center_x + half_tile
            img_crop = img_masked[max(0, y_start):min(img_masked.shape[0], y_end), max(0, x_start):min(img_masked.shape[1], x_end)]
            mask_crop = roi_mask[max(0, y_start):min(roi_mask.shape[0], y_end), max(0, x_start):min(roi_mask.shape[1], x_end)]
            pad_top, pad_bottom = max(0, -y_start), max(0, y_end - img_masked.shape[0])
            pad_left, pad_right = max(0, -x_start), max(0, x_end - img_masked.shape[1])
            if any([pad_top, pad_bottom, pad_left, pad_right]):
                img_crop = cv2.copyMakeBorder(img_crop, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=[255, 255, 255])
                mask_crop = cv2.copyMakeBorder(mask_crop, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=[0])
            tile_name = f"{tile_name_base}.png"
            cv2.imwrite(str(output_dirs[f'images_{scale_name}'] / tile_name), img_crop)
            cv2.imwrite(str(output_dirs[f'masks_{scale_name}'] / tile_name), mask_crop)
        row = {'sample_index': f"{tile_name_base}.png", 'original_sample': img_path.name}
        if not is_test_set: row['label'] = label
        tiles_data.append(row)
    return tiles_data

# =============================================================================
# --- 5. WORKFLOW PRINCIPALE (CON QC REINTEGRATO CORRETTAMENTE) ---
# =============================================================================
def run_pipeline(train_dir: Path, labels_csv: Path, output_dir: Path, test_dir: Path = None):
    if output_dir.exists(): shutil.rmtree(output_dir)

    # --- Process TRAINING SET ---
    output_dirs_train = { f"images_{name}": output_dir / f"train/images_{name}" for name in TILE_SIZES.keys() }
    output_dirs_train.update({ f"masks_{name}": output_dir / f"train/masks_{name}" for name in TILE_SIZES.keys() })
    for d in output_dirs_train.values(): d.mkdir(parents=True, exist_ok=True)

    print("\n>>> FASE 1: Processing TRAINING SET (Multi-Modale con QC)")
    if not (train_dir.exists() and labels_csv.exists()):
        print(f"‚ùå ERROR: Training directory o labels CSV non trovati. Interruzione.")
        return

    labels_df = pd.read_csv(labels_csv)
    all_train_tiles = []

    for _, row in tqdm(labels_df.iterrows(), total=len(labels_df), desc="Train Slides"):
        fname, label = row['sample_index'], row['label']
        img_paths = list(train_dir.glob(f"**/{fname}"))
        if not img_paths: continue
        img_path = img_paths[0]

        mask_path = img_path.parent / fname.replace("img_", "mask_")
        if not mask_path.exists(): mask_path = mask_path.with_suffix('.png')
        if not mask_path.exists(): continue

        img_bgr = load_image_cv2(img_path)
        roi_mask = load_mask_cv2(mask_path)
        if img_bgr is None or roi_mask is None: continue

        # --- CONTROLLO QUALIT√Ä SULL'INTERA SLIDE ---
        if contains_slime(img_bgr) or analyze_image_memory(img_bgr) == "SHREK":
            # Stampa un messaggio opzionale per il debug
            # print(f"\nINFO: Skipping {fname} (QC Failed)")
            continue
        # --- FINE CONTROLLO QUALIT√Ä ---

        tiles = process_single_slide_multimodal(img_bgr, roi_mask, label, img_path, output_dirs_train, is_test_set=False)
        if tiles: all_train_tiles.extend(tiles)

    if all_train_tiles:
        train_df = pd.DataFrame(all_train_tiles)
        out_train_csv = output_dir / "train/train_patches.csv"
        train_df.to_csv(out_train_csv, index=False)
        print(f"‚úÖ Training Set Completo. Salvati {len(all_train_tiles)} multimodal tile sets.")

    # --- Process TEST SET (logica completa inclusa) ---
    if test_dir and test_dir.exists():
        output_dirs_test = { f"images_{name}": output_dir / f"test/images_{name}" for name in TILE_SIZES.keys() }
        output_dirs_test.update({ f"masks_{name}": output_dir / f"test/masks_{name}" for name in TILE_SIZES.keys() })
        for d in output_dirs_test.values(): d.mkdir(parents=True, exist_ok=True)

        print("\n>>> FASE 2: Processing TEST SET (Multi-Modale)")
        all_test_tiles = []
        test_images = sorted([p for p in test_dir.rglob("img_*.*") if "mask" not in p.name])

        for img_path in tqdm(test_images, desc="Test Slides"):
            id_part = img_path.stem.replace("img_", "")
            mask_path = img_path.parent / f"mask_{id_part}{img_path.suffix}"
            if not mask_path.exists(): mask_path = mask_path.with_suffix('.png')
            if not mask_path.exists(): continue

            img_bgr = load_image_cv2(img_path)
            roi_mask = load_mask_cv2(mask_path)
            if img_bgr is None or roi_mask is None: continue

            # NOTA: il QC non viene applicato al test set
            tiles = process_single_slide_multimodal(img_bgr, roi_mask, None, img_path, output_dirs_test, is_test_set=True)
            if tiles: all_test_tiles.extend(tiles)

        if all_test_tiles:
            test_df = pd.DataFrame(all_test_tiles)
            out_test_csv = output_dir / "test/test_patches.csv"
            test_df.to_csv(out_test_csv, index=False)
            print(f"‚úÖ Test Set Completo. Salvati {len(all_test_tiles)} multimodal tile sets.")
    else:
        print("\n‚ÑπÔ∏è Nessuna directory di test fornita o trovata, salto il processing del test set.")

def zip_directory(folder_path: Path, zip_path: Path):
  print(f"\n>>> Zipping output directory to '{zip_path}'...")
  files_to_zip = [entry for entry in folder_path.rglob('*') if entry.is_file()]
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
      for entry in tqdm(files_to_zip, desc="Zipping files"):
          zipf.write(entry, entry.relative_to(folder_path))
  print(f"‚úÖ Zipping complete.")
# =============================================================================
# --- 5. ENTRY POINT (Invariato) ---
# =============================================================================
if __name__ == "__main__":
    BASE_PATH = Path('/content/drive/MyDrive/AN2DL_Challenge2-TheBigBatchTheory/data')
    TRAIN_DATA_DIR = BASE_PATH / 'dataset/train_data'
    TEST_DATA_DIR = BASE_PATH / 'dataset/test_data'
    LABELS_CSV_PATH = BASE_PATH / 'dataset/train_labels.csv'
    OUTPUT_PREPROCESSED_DIR = BASE_PATH / 'preprocessed/preprocessed_MaskTile'
    OUTPUT_ZIP_PATH = BASE_PATH / 'preprocessed/preprocessed_MaskTile.zip'

    run_pipeline(
        train_dir=TRAIN_DATA_DIR,
        labels_csv=LABELS_CSV_PATH,
        output_dir=OUTPUT_PREPROCESSED_DIR,
        test_dir=TEST_DATA_DIR
    )
    zip_directory(OUTPUT_PREPROCESSED_DIR, OUTPUT_ZIP_PATH)




>>> FASE 1: Processing TRAINING SET (Multi-Modale con QC)


Train Slides: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 691/691 [08:04<00:00,  1.43it/s]


‚úÖ Training Set Completo. Salvati 4573 multimodal tile sets.

>>> FASE 2: Processing TEST SET (Multi-Modale)


Test Slides: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 477/477 [05:20<00:00,  1.49it/s]


‚úÖ Test Set Completo. Salvati 3596 multimodal tile sets.

>>> Zipping output directory to '/content/drive/MyDrive/AN2DL_Challenge2-TheBigBatchTheory/data/preprocessed/preprocessed_MaskTile.zip'...


In [2]:
import os, sys, random, subprocess, shutil
from pathlib import Path
import numpy as np
import pandas as pd
import torch
import time
!pip install -q comet_ml torchsummary # Uncomment if running in a fresh cell
from comet_ml import start
from comet_ml.integration.pytorch import log_model
from torch import nn
from torchsummary import summary
from torch.utils.tensorboard import SummaryWriter
import cv2
from PIL import Image
from tqdm.notebook import tqdm

logs_dir = "tensorboard"
# !pkill -f tensorboard # Uncomment if needed
# %load_ext tensorboard # Uncomment if needed
# !mkdir -p models      # Uncomment if needed

# 2. Setup Seed e Device
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 3. Setup Github Repository
REPO = "an2dl-challenges-25-26"
TARGET_FOLDER = "challenge2"

# Clone repo (Assuming you are in Colab root /content/)
if os.path.exists(REPO):
    print(f"Folder '{REPO}' already exists. Deleting it for a fresh clone...")
    shutil.rmtree(REPO)

# Now clone it fresh
subprocess.run(["git", "clone", "-b", "francesco2", "--single-branch", f"https://github.com/asarraa/{REPO}.git"])

# 3. Navigate INTO the subfolder
project_path = os.path.abspath(os.path.join(os.getcwd(), REPO, TARGET_FOLDER))

# Add to Python Path
if project_path not in sys.path:
    sys.path.append(project_path)

# Change Directory
os.chdir(project_path)

print(f"Setup Github complete.")
print(f"Current Working Directory: {os.getcwd()}")

# --------------------------------------------------------------------------
# 4. SETUP DATI VELOCI (DRIVE -> LOCALE SSD)
# --------------------------------------------------------------------------
DATA_VARIANT = 'preprocessed_MaskTile'
DRIVE_ROOT = Path('/content/drive/MyDrive/AN2DL_Challenge2-TheBigBatchTheory/')
DRIVE_DATA_ROOT = DRIVE_ROOT / 'data/preprocessed'

zip_path = DRIVE_DATA_ROOT / f"{DATA_VARIANT}.zip"
non_zip_path = DRIVE_DATA_ROOT / f"{DATA_VARIANT}"

LOCAL_DATA_ROOT = Path('/content/local_data')
LOCAL_DATA_ROOT.mkdir(parents=True, exist_ok=True) # Ensure local root exists

# Logic: Check Zip -> Fallback to Folder -> Error
ZIP = True
if zip_path.exists() and ZIP:
    print(f"Trovato zip in: {zip_path}")
    print("Copia ed estrazione in corso su SSD locale (richiede ~30-60 sec)...")

    temp_zip = Path("/content/temp_dataset.zip")
    shutil.copy(zip_path, temp_zip)
    shutil.unpack_archive(temp_zip, LOCAL_DATA_ROOT)
    os.remove(temp_zip)
    print("Estrazione completata!")

elif non_zip_path.exists():
    print(f"Zip NON trovato. Trovata cartella in: {non_zip_path}")
    print("Copia ricorsiva in corso su SSD locale (potrebbe essere pi√π lento del zip)...")
    # Destination path for the folder copy
    destination_dir = LOCAL_DATA_ROOT / DATA_VARIANT
    # Remove destination if it already exists to avoid errors
    if destination_dir.exists():
        shutil.rmtree(destination_dir)
    # Use copytree for directories
    shutil.copytree(non_zip_path, destination_dir)
    print("Copia cartella completata!")

else:
    raise FileNotFoundError(
        f"ERRORE CRITICO: Non trovo n√© {DATA_VARIANT}.zip n√© la cartella {DATA_VARIANT} "
        f"in: {DRIVE_DATA_ROOT}"
    )

# --------------------------------------------------------------------------
# 5. DEFINIZIONE PERCORSI
# --------------------------------------------------------------------------
# Check structure: Did the zip unpack into a subfolder or directly?
if (LOCAL_DATA_ROOT / DATA_VARIANT).exists():
    BASE_PATH = LOCAL_DATA_ROOT / DATA_VARIANT
else:
    BASE_PATH = LOCAL_DATA_ROOT

TRAIN_IMG_DIR = BASE_PATH / 'train/images'
TEST_IMG_DIR = BASE_PATH / 'test/images'
TRAIN_CSV_PATH = BASE_PATH / 'train/train_patches.csv'

print(f'Using device: {device}')
print(f'DATASET PRONTO IN LOCALE: {BASE_PATH}')




[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/780.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m780.9/780.9 kB[0m [31m39.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/1.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.3/1.3 MB[0m [31m57.7 MB/s[0m eta [36m0:00:00[0m
[?25hSetup Github complete.
Current Working Directory: /content/an2dl-challenges-25-26/challenge2
Trovato zip in: /content/drive/MyDrive/AN2DL_Challenge2-TheBigBatchTheory/data/preprocessed/preprocessed_MaskTile

In [3]:
DATA_VARIANT = 'preprocessed_MaskTile'


LOCAL_DATA_ROOT = Path('/content/local_data')
BASE_PATH = LOCAL_DATA_ROOT


### Dataloaders

In [4]:
import lazy_loaders
import multiscale_pipeline

BATCH_SIZE = 16
ADD_MASK_CHANNEL = False
#
#train_loader, val_loader, input_shape, class_weights = multiscale_pipeline.get_multiscale_loaders(batch_size=BATCH_SIZE, base_path=BASE_PATH)
#test_loader = multiscale_pipeline.get_multiscale_test_loader(batch_size=BATCH_SIZE, base_path=BASE_PATH)

### Training

In [5]:
from launch_training import start_training
from torchvision import transforms
# Ensure 'models' is importable. If imports fail, restart kernel and run Cell 1 only.

MODEL_NAME = "AttentionMIL"
TRAINING_PARAMS = {
    'epochs': 1000,
    'patience':30,
    "l2_lambda": 1e-2,
    "learning_rate": 1e-04,
    "verbose": 1
}

MIL_DEFAULT = {"num_classes": 4, "pretrained": True, "freeze_backbone": True}



# Note: Ensure you imported comet_ml at the top if you want logging
trained_model, history, exp_id = start_training(
    model_name=MODEL_NAME,
    training_params=TRAINING_PARAMS,
    model_params = MIL_DEFAULT,
    local_data_path = BASE_PATH,
    batch_size = BATCH_SIZE,
    data_path = BASE_PATH
)

Using GPU: Tesla T4
--- Starting AttentionMIL on cuda ---




Starting AttentionMIL model training...
Training Configuration:
 epochs: 1000
learning_rate: 0.0001
patience: 30
l1_lambda: 0
l2_lambda: 0.01
verbose: 1
criterion_name: CrossEntropyLoss
optimizer_name: adamw
Model Configuration:
 num_classes: 4
backbone_name: vit_small_patch16_224
pretrained: True
freeze_backbone: True


[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/asarraa/test/762e7514b13145cf9dce60913268a036

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/88.2M [00:00<?, ?B/s]

--- ATTENZIONE: Backbone √® freezato. Solo la testa di classificazione verr√† addestrata. ---




Epoch   1/1000 | Train: Loss=1.4260, F1=0.2789 | Val: Loss=1.3660, F1=0.2584

‚úÖ Improvement! val_f1 changed from -inf to 0.2584.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_210310_best_model.pth'




Epoch   2/1000 | Train: Loss=1.3587, F1=0.3475 | Val: Loss=1.3578, F1=0.3250

‚úÖ Improvement! val_f1 changed from 0.2584 to 0.3250.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_210310_best_model.pth'




Epoch   3/1000 | Train: Loss=1.3461, F1=0.3293 | Val: Loss=1.3539, F1=0.2634
   - No improvement. Patience: 1/30




Epoch   4/1000 | Train: Loss=1.3184, F1=0.3488 | Val: Loss=1.3511, F1=0.2780
   - No improvement. Patience: 2/30




Epoch   5/1000 | Train: Loss=1.2811, F1=0.3934 | Val: Loss=1.3447, F1=0.3503

‚úÖ Improvement! val_f1 changed from 0.3250 to 0.3503.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_210310_best_model.pth'




Epoch   6/1000 | Train: Loss=1.2813, F1=0.3958 | Val: Loss=1.3402, F1=0.3550

‚úÖ Improvement! val_f1 changed from 0.3503 to 0.3550.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_210310_best_model.pth'




Epoch   7/1000 | Train: Loss=1.2482, F1=0.3917 | Val: Loss=1.3468, F1=0.3601

‚úÖ Improvement! val_f1 changed from 0.3550 to 0.3601.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_210310_best_model.pth'




Epoch   8/1000 | Train: Loss=1.2467, F1=0.4284 | Val: Loss=1.3466, F1=0.3258
   - No improvement. Patience: 1/30




Epoch   9/1000 | Train: Loss=1.2072, F1=0.4617 | Val: Loss=1.3451, F1=0.3364
   - No improvement. Patience: 2/30




Epoch  10/1000 | Train: Loss=1.1879, F1=0.5054 | Val: Loss=1.3499, F1=0.3095
   - No improvement. Patience: 3/30




Epoch  11/1000 | Train: Loss=1.1974, F1=0.4902 | Val: Loss=1.3493, F1=0.3417
   - No improvement. Patience: 4/30




Epoch  12/1000 | Train: Loss=1.2036, F1=0.4787 | Val: Loss=1.3503, F1=0.3348
   - No improvement. Patience: 5/30




Epoch  13/1000 | Train: Loss=1.1929, F1=0.4871 | Val: Loss=1.3513, F1=0.3534
   - No improvement. Patience: 6/30




Epoch  14/1000 | Train: Loss=1.1703, F1=0.5150 | Val: Loss=1.3529, F1=0.3480
   - No improvement. Patience: 7/30




Epoch  15/1000 | Train: Loss=1.1648, F1=0.5052 | Val: Loss=1.3521, F1=0.3410
   - No improvement. Patience: 8/30




Epoch  16/1000 | Train: Loss=1.1644, F1=0.5238 | Val: Loss=1.3507, F1=0.3514
   - No improvement. Patience: 9/30




Epoch  17/1000 | Train: Loss=1.1291, F1=0.5559 | Val: Loss=1.3623, F1=0.3043
   - No improvement. Patience: 10/30




Epoch  18/1000 | Train: Loss=1.1007, F1=0.5645 | Val: Loss=1.3671, F1=0.3395
   - No improvement. Patience: 11/30




Epoch  19/1000 | Train: Loss=1.1171, F1=0.5660 | Val: Loss=1.3654, F1=0.3532
   - No improvement. Patience: 12/30




Epoch  20/1000 | Train: Loss=1.1021, F1=0.5787 | Val: Loss=1.3693, F1=0.3344
   - No improvement. Patience: 13/30




Epoch  21/1000 | Train: Loss=1.0861, F1=0.5684 | Val: Loss=1.3748, F1=0.3476
   - No improvement. Patience: 14/30




Epoch  22/1000 | Train: Loss=1.1084, F1=0.5745 | Val: Loss=1.3808, F1=0.3278
   - No improvement. Patience: 15/30




Epoch  23/1000 | Train: Loss=1.0639, F1=0.5740 | Val: Loss=1.3800, F1=0.3404
   - No improvement. Patience: 16/30




Epoch  24/1000 | Train: Loss=1.0575, F1=0.5631 | Val: Loss=1.3801, F1=0.3260
   - No improvement. Patience: 17/30




Epoch  25/1000 | Train: Loss=1.0591, F1=0.6158 | Val: Loss=1.3828, F1=0.2973
   - No improvement. Patience: 18/30




Epoch  26/1000 | Train: Loss=1.0498, F1=0.6116 | Val: Loss=1.3998, F1=0.3010
   - No improvement. Patience: 19/30




Epoch  27/1000 | Train: Loss=1.0279, F1=0.6263 | Val: Loss=1.4078, F1=0.3228
   - No improvement. Patience: 20/30




Epoch  28/1000 | Train: Loss=1.0096, F1=0.6481 | Val: Loss=1.4016, F1=0.3048
   - No improvement. Patience: 21/30




Epoch  29/1000 | Train: Loss=0.9963, F1=0.6352 | Val: Loss=1.4207, F1=0.3200
   - No improvement. Patience: 22/30




Epoch  30/1000 | Train: Loss=0.9926, F1=0.6642 | Val: Loss=1.4225, F1=0.3062
   - No improvement. Patience: 23/30




Epoch  31/1000 | Train: Loss=0.9745, F1=0.6556 | Val: Loss=1.4137, F1=0.3279
   - No improvement. Patience: 24/30




Epoch  32/1000 | Train: Loss=0.9854, F1=0.6790 | Val: Loss=1.4305, F1=0.3158
   - No improvement. Patience: 25/30




Epoch  33/1000 | Train: Loss=0.9508, F1=0.6491 | Val: Loss=1.4333, F1=0.3203
   - No improvement. Patience: 26/30




Epoch  34/1000 | Train: Loss=0.9506, F1=0.6837 | Val: Loss=1.4357, F1=0.3232
   - No improvement. Patience: 27/30




Epoch  35/1000 | Train: Loss=0.9419, F1=0.6614 | Val: Loss=1.4364, F1=0.3249
   - No improvement. Patience: 28/30




Epoch  36/1000 | Train: Loss=0.9318, F1=0.6778 | Val: Loss=1.4434, F1=0.3368
   - No improvement. Patience: 29/30




Epoch  37/1000 | Train: Loss=0.9169, F1=0.7103 | Val: Loss=1.4673, F1=0.2931
   - No improvement. Patience: 30/30

üõë Early stopping triggered after 37 epochs.

--- Training Finished ---
Restoring best model weights from epoch 7 with val_f1 of 0.3601
Model saved to: /content/local_data/experiments/models/AttentionMIL_20251216_210310.pt
Registry updated: ID AttentionMIL_20251216_210310


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : AttentionMIL_20251216_210310
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/asarraa/test/762e7514b13145cf9dce60913268a036
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     best_val_f1 [5] : (0.258416336741525, 0.3601266422131213)
[1;38;5;39mCOMET INFO:[0m     train_f1 [37]   : (0.2789116870054049, 0.7103098496364149)
[1;38;5;39mCOMET INFO:[0m     train_loss [37] : (0.9169371025315647, 1.425970278937241)
[1;38;5;39mCOMET INFO:[0m     val_f1 [37]     : (0.258416336741525, 0.36012664

In [7]:
MODEL_NAME = "AttentionMIL"
TRAINING_PARAMS = {
    'epochs': 1000,
    'patience':30,
    "l2_lambda": 1e-2,
    "learning_rate": 1e-05,
    "verbose": 1
}

MIL_DEFAULT = {"num_classes": 4, "pretrained": True, "freeze_backbone": False}

trained_model, history, exp_id = start_training(
    pretrained_model_path = '/content/local_data/fit_models/AttentionMIL_20251216_210310_best_model.pth',
    model_name=MODEL_NAME,
    training_params=TRAINING_PARAMS,
    model_params = MIL_DEFAULT,
    local_data_path = BASE_PATH,
    batch_size = BATCH_SIZE,
    data_path = BASE_PATH
)

Using GPU: Tesla T4
--- Starting AttentionMIL on cuda ---


[1;38;5;39mCOMET INFO:[0m An experiment with the same configuration options is already running and will be reused.


Starting AttentionMIL model training...
Training Configuration:
 epochs: 1000
learning_rate: 1e-05
patience: 30
l1_lambda: 0
l2_lambda: 0.01
verbose: 1
criterion_name: CrossEntropyLoss
optimizer_name: adamw
Model Configuration:
 num_classes: 4
backbone_name: vit_small_patch16_224
pretrained: True
freeze_backbone: False
Loading pretrained model from /content/local_data/fit_models/AttentionMIL_20251216_210310_best_model.pth...
Pretrained model loaded successfully.




Epoch   1/1000 | Train: Loss=1.2612, F1=0.4018 | Val: Loss=1.3459, F1=0.3437

‚úÖ Improvement! val_f1 changed from -inf to 0.3437.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_212112_best_model.pth'




Epoch   2/1000 | Train: Loss=1.2062, F1=0.4509 | Val: Loss=1.3521, F1=0.3287
   - No improvement. Patience: 1/30




Epoch   3/1000 | Train: Loss=1.1688, F1=0.5275 | Val: Loss=1.3500, F1=0.3712

‚úÖ Improvement! val_f1 changed from 0.3437 to 0.3712.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_212112_best_model.pth'




Epoch   4/1000 | Train: Loss=1.1279, F1=0.5249 | Val: Loss=1.3521, F1=0.3295
   - No improvement. Patience: 1/30




Epoch   5/1000 | Train: Loss=1.0744, F1=0.5641 | Val: Loss=1.3598, F1=0.3609
   - No improvement. Patience: 2/30




Epoch   6/1000 | Train: Loss=1.0360, F1=0.6248 | Val: Loss=1.3672, F1=0.3669
   - No improvement. Patience: 3/30




Epoch   7/1000 | Train: Loss=0.9853, F1=0.6633 | Val: Loss=1.4034, F1=0.3287
   - No improvement. Patience: 4/30




Epoch   8/1000 | Train: Loss=0.9431, F1=0.7093 | Val: Loss=1.4008, F1=0.3665
   - No improvement. Patience: 5/30




Epoch   9/1000 | Train: Loss=0.8643, F1=0.7652 | Val: Loss=1.4242, F1=0.3602
   - No improvement. Patience: 6/30




Epoch  10/1000 | Train: Loss=0.8080, F1=0.8064 | Val: Loss=1.4867, F1=0.3338
   - No improvement. Patience: 7/30




Epoch  11/1000 | Train: Loss=0.7045, F1=0.8831 | Val: Loss=1.4815, F1=0.3826

‚úÖ Improvement! val_f1 changed from 0.3712 to 0.3826.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_212112_best_model.pth'




Epoch  12/1000 | Train: Loss=0.6539, F1=0.8812 | Val: Loss=1.5793, F1=0.3318
   - No improvement. Patience: 1/30




Epoch  13/1000 | Train: Loss=0.6164, F1=0.9200 | Val: Loss=1.5819, F1=0.3603
   - No improvement. Patience: 2/30




Epoch  14/1000 | Train: Loss=0.5757, F1=0.9375 | Val: Loss=1.5835, F1=0.3485
   - No improvement. Patience: 3/30




Epoch  15/1000 | Train: Loss=0.5206, F1=0.9655 | Val: Loss=1.6042, F1=0.3609
   - No improvement. Patience: 4/30




Epoch  16/1000 | Train: Loss=0.4934, F1=0.9784 | Val: Loss=1.7038, F1=0.3609
   - No improvement. Patience: 5/30




Epoch  17/1000 | Train: Loss=0.4772, F1=0.9763 | Val: Loss=1.7121, F1=0.3667
   - No improvement. Patience: 6/30




Epoch  18/1000 | Train: Loss=0.4436, F1=0.9849 | Val: Loss=1.7077, F1=0.3703
   - No improvement. Patience: 7/30




Epoch  19/1000 | Train: Loss=0.4381, F1=0.9870 | Val: Loss=1.7329, F1=0.3160
   - No improvement. Patience: 8/30




Epoch  20/1000 | Train: Loss=0.4212, F1=1.0000 | Val: Loss=1.7516, F1=0.3605
   - No improvement. Patience: 9/30




Epoch  21/1000 | Train: Loss=0.4219, F1=0.9978 | Val: Loss=1.7529, F1=0.3534
   - No improvement. Patience: 10/30




Epoch  22/1000 | Train: Loss=0.4092, F1=0.9978 | Val: Loss=1.8052, F1=0.3528
   - No improvement. Patience: 11/30




Epoch  23/1000 | Train: Loss=0.4043, F1=0.9978 | Val: Loss=1.7516, F1=0.3656
   - No improvement. Patience: 12/30




Epoch  24/1000 | Train: Loss=0.3988, F1=1.0000 | Val: Loss=1.7256, F1=0.3410
   - No improvement. Patience: 13/30




Epoch  25/1000 | Train: Loss=0.3985, F1=0.9957 | Val: Loss=1.7709, F1=0.3392
   - No improvement. Patience: 14/30




Epoch  26/1000 | Train: Loss=0.3958, F1=0.9978 | Val: Loss=1.7315, F1=0.3956

‚úÖ Improvement! val_f1 changed from 0.3826 to 0.3956.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_212112_best_model.pth'




Epoch  27/1000 | Train: Loss=0.3899, F1=1.0000 | Val: Loss=1.7436, F1=0.3632
   - No improvement. Patience: 1/30




Epoch  28/1000 | Train: Loss=0.3900, F1=1.0000 | Val: Loss=1.7774, F1=0.3714
   - No improvement. Patience: 2/30




Epoch  29/1000 | Train: Loss=0.3879, F1=1.0000 | Val: Loss=1.7964, F1=0.3923
   - No improvement. Patience: 3/30




Epoch  30/1000 | Train: Loss=0.3842, F1=1.0000 | Val: Loss=1.7217, F1=0.3661
   - No improvement. Patience: 4/30




Epoch  31/1000 | Train: Loss=0.3872, F1=1.0000 | Val: Loss=1.7556, F1=0.3640
   - No improvement. Patience: 5/30




Epoch  32/1000 | Train: Loss=0.3855, F1=1.0000 | Val: Loss=1.7625, F1=0.3742
   - No improvement. Patience: 6/30




Epoch  33/1000 | Train: Loss=0.3836, F1=1.0000 | Val: Loss=1.7186, F1=0.3800
   - No improvement. Patience: 7/30




Epoch  34/1000 | Train: Loss=0.3828, F1=1.0000 | Val: Loss=1.7494, F1=0.3694
   - No improvement. Patience: 8/30




Epoch  35/1000 | Train: Loss=0.3875, F1=1.0000 | Val: Loss=1.7072, F1=0.3723
   - No improvement. Patience: 9/30




Epoch  36/1000 | Train: Loss=0.3906, F1=0.9978 | Val: Loss=1.7409, F1=0.3463
   - No improvement. Patience: 10/30




Epoch  37/1000 | Train: Loss=0.3799, F1=1.0000 | Val: Loss=1.7401, F1=0.3642
   - No improvement. Patience: 11/30




Epoch  38/1000 | Train: Loss=0.3824, F1=1.0000 | Val: Loss=1.7349, F1=0.3744
   - No improvement. Patience: 12/30




Epoch  39/1000 | Train: Loss=0.3821, F1=1.0000 | Val: Loss=1.6908, F1=0.3862
   - No improvement. Patience: 13/30




Epoch  40/1000 | Train: Loss=0.3833, F1=1.0000 | Val: Loss=1.6883, F1=0.3489
   - No improvement. Patience: 14/30




Epoch  41/1000 | Train: Loss=0.3762, F1=1.0000 | Val: Loss=1.7147, F1=0.3766
   - No improvement. Patience: 15/30




Epoch  42/1000 | Train: Loss=0.3780, F1=1.0000 | Val: Loss=1.6914, F1=0.3875
   - No improvement. Patience: 16/30




Epoch  43/1000 | Train: Loss=0.3765, F1=1.0000 | Val: Loss=1.7000, F1=0.3632
   - No improvement. Patience: 17/30




Epoch  44/1000 | Train: Loss=0.3769, F1=1.0000 | Val: Loss=1.7106, F1=0.3553
   - No improvement. Patience: 18/30




Epoch  45/1000 | Train: Loss=0.3782, F1=1.0000 | Val: Loss=1.7500, F1=0.3839
   - No improvement. Patience: 19/30




Epoch  46/1000 | Train: Loss=0.3766, F1=1.0000 | Val: Loss=1.7214, F1=0.3977

‚úÖ Improvement! val_f1 changed from 0.3956 to 0.3977.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_212112_best_model.pth'




Epoch  47/1000 | Train: Loss=0.3762, F1=1.0000 | Val: Loss=1.6700, F1=0.3569
   - No improvement. Patience: 1/30




Epoch  48/1000 | Train: Loss=0.3768, F1=1.0000 | Val: Loss=1.6985, F1=0.3789
   - No improvement. Patience: 2/30




Epoch  49/1000 | Train: Loss=0.3751, F1=1.0000 | Val: Loss=1.7692, F1=0.3578
   - No improvement. Patience: 3/30




Epoch  50/1000 | Train: Loss=0.3783, F1=1.0000 | Val: Loss=1.6734, F1=0.3923
   - No improvement. Patience: 4/30




Epoch  51/1000 | Train: Loss=0.3762, F1=1.0000 | Val: Loss=1.6562, F1=0.4011

‚úÖ Improvement! val_f1 changed from 0.3977 to 0.4011.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_212112_best_model.pth'




Epoch  52/1000 | Train: Loss=0.3750, F1=1.0000 | Val: Loss=1.6885, F1=0.3778
   - No improvement. Patience: 1/30




Epoch  53/1000 | Train: Loss=0.3752, F1=1.0000 | Val: Loss=1.7009, F1=0.3564
   - No improvement. Patience: 2/30




Epoch  54/1000 | Train: Loss=0.3730, F1=1.0000 | Val: Loss=1.6830, F1=0.3614
   - No improvement. Patience: 3/30




Epoch  55/1000 | Train: Loss=0.3764, F1=1.0000 | Val: Loss=1.6652, F1=0.3620
   - No improvement. Patience: 4/30




Epoch  56/1000 | Train: Loss=0.3769, F1=1.0000 | Val: Loss=1.6961, F1=0.3614
   - No improvement. Patience: 5/30




Epoch  57/1000 | Train: Loss=0.3744, F1=1.0000 | Val: Loss=1.6794, F1=0.3543
   - No improvement. Patience: 6/30




Epoch  58/1000 | Train: Loss=0.3735, F1=1.0000 | Val: Loss=1.6855, F1=0.3633
   - No improvement. Patience: 7/30




Epoch  59/1000 | Train: Loss=0.3740, F1=1.0000 | Val: Loss=1.6940, F1=0.3563
   - No improvement. Patience: 8/30




Epoch  60/1000 | Train: Loss=0.3739, F1=1.0000 | Val: Loss=1.7099, F1=0.3622
   - No improvement. Patience: 9/30




Epoch  61/1000 | Train: Loss=0.3746, F1=1.0000 | Val: Loss=1.6974, F1=0.3778
   - No improvement. Patience: 10/30




Epoch  62/1000 | Train: Loss=0.3714, F1=1.0000 | Val: Loss=1.6496, F1=0.3636
   - No improvement. Patience: 11/30




Epoch  63/1000 | Train: Loss=0.3703, F1=1.0000 | Val: Loss=1.7117, F1=0.3378
   - No improvement. Patience: 12/30




Epoch  64/1000 | Train: Loss=0.3735, F1=1.0000 | Val: Loss=1.6823, F1=0.3618
   - No improvement. Patience: 13/30




Epoch  65/1000 | Train: Loss=0.3730, F1=1.0000 | Val: Loss=1.7139, F1=0.3899
   - No improvement. Patience: 14/30




Epoch  66/1000 | Train: Loss=0.3727, F1=1.0000 | Val: Loss=1.7366, F1=0.3769
   - No improvement. Patience: 15/30




Epoch  67/1000 | Train: Loss=0.3729, F1=1.0000 | Val: Loss=1.6850, F1=0.3469
   - No improvement. Patience: 16/30




Epoch  68/1000 | Train: Loss=0.3735, F1=1.0000 | Val: Loss=1.6864, F1=0.3855
   - No improvement. Patience: 17/30




Epoch  69/1000 | Train: Loss=0.3743, F1=1.0000 | Val: Loss=1.6712, F1=0.3638
   - No improvement. Patience: 18/30




Epoch  70/1000 | Train: Loss=0.3710, F1=1.0000 | Val: Loss=1.7207, F1=0.3611
   - No improvement. Patience: 19/30




Epoch  71/1000 | Train: Loss=0.3721, F1=1.0000 | Val: Loss=1.7260, F1=0.3792
   - No improvement. Patience: 20/30




Epoch  72/1000 | Train: Loss=0.3705, F1=1.0000 | Val: Loss=1.7121, F1=0.3627
   - No improvement. Patience: 21/30




Epoch  73/1000 | Train: Loss=0.3708, F1=1.0000 | Val: Loss=1.6976, F1=0.3380
   - No improvement. Patience: 22/30




Epoch  74/1000 | Train: Loss=0.3715, F1=1.0000 | Val: Loss=1.6789, F1=0.3786
   - No improvement. Patience: 23/30




Epoch  75/1000 | Train: Loss=0.3698, F1=1.0000 | Val: Loss=1.7278, F1=0.3849
   - No improvement. Patience: 24/30




Epoch  76/1000 | Train: Loss=0.3702, F1=1.0000 | Val: Loss=1.7011, F1=0.3529
   - No improvement. Patience: 25/30




Epoch  77/1000 | Train: Loss=0.3705, F1=1.0000 | Val: Loss=1.6844, F1=0.3624
   - No improvement. Patience: 26/30




Epoch  78/1000 | Train: Loss=0.3678, F1=1.0000 | Val: Loss=1.6860, F1=0.3771
   - No improvement. Patience: 27/30




Epoch  79/1000 | Train: Loss=0.3703, F1=1.0000 | Val: Loss=1.6422, F1=0.3696
   - No improvement. Patience: 28/30




Epoch  80/1000 | Train: Loss=0.3684, F1=1.0000 | Val: Loss=1.6738, F1=0.3892
   - No improvement. Patience: 29/30




Epoch  81/1000 | Train: Loss=0.3689, F1=1.0000 | Val: Loss=1.7450, F1=0.3437
   - No improvement. Patience: 30/30

üõë Early stopping triggered after 81 epochs.

--- Training Finished ---
Restoring best model weights from epoch 51 with val_f1 of 0.4011
Model saved to: /content/local_data/experiments/models/AttentionMIL_20251216_212112.pt
Registry updated: ID AttentionMIL_20251216_212112


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : AttentionMIL_20251216_212112
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/asarraa/test/56af7d79438f43939a4f6fbd6706c905
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     best_val_f1 [6] : (0.3436692506459948, 0.40108315573155956)
[1;38;5;39mCOMET INFO:[0m     train_f1 [81]   : (0.401810716789928, 1.0)
[1;38;5;39mCOMET INFO:[0m     train_loss [81] : (0.36775538119776496, 1.2612053936925427)
[1;38;5;39mCOMET INFO:[0m     val_f1 [81]     : (0.31599548929119436, 0.40108315573155956)

In [None]:
MODEL_NAME = "AttentionMIL"
TRAINING_PARAMS = {
    'epochs': 1000,
    'patience':10,
    "l2_lambda": 1e-2,
    "learning_rate": 1e-06,
    "verbose": 1
}

MIL_DEFAULT = {"num_classes": 4, "pretrained": True, "freeze_backbone": False}

trained_model, history, exp_id = start_training(
    pretrained_model_path = '/content/local_data/fit_models/AttentionMIL_20251216_174926_best_model.pth',
    model_name=MODEL_NAME,
    training_params=TRAINING_PARAMS,
    model_params = MIL_DEFAULT,
    local_data_path = BASE_PATH,
    batch_size = BATCH_SIZE,
    data_path = BASE_PATH
)

Using GPU: Tesla T4
--- Starting AttentionMIL on cuda ---




Starting AttentionMIL model training...
Training Configuration:
 epochs: 1000
learning_rate: 1e-06
patience: 10
l1_lambda: 0
l2_lambda: 0.01
verbose: 1
criterion_name: CrossEntropyLoss
optimizer_name: adamw
Model Configuration:
 num_classes: 4
backbone_name: vit_small_patch16_224
pretrained: True
freeze_backbone: False


[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/asarraa/test/2e19ec1b55734495a4e5a8d802703d6e



Loading pretrained model from /content/local_data/fit_models/AttentionMIL_20251216_174926_best_model.pth...
Pretrained model loaded successfully.




Epoch   1/1000 | Train: Loss=1.2378, F1=0.4636 | Val: Loss=1.3442, F1=0.3796

‚úÖ Improvement! val_f1 changed from -inf to 0.3796.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_181934_best_model.pth'




Epoch   2/1000 | Train: Loss=1.2462, F1=0.4362 | Val: Loss=1.3450, F1=0.3793
   - No improvement. Patience: 1/10




Epoch   3/1000 | Train: Loss=1.2137, F1=0.4424 | Val: Loss=1.3437, F1=0.3821

‚úÖ Improvement! val_f1 changed from 0.3796 to 0.3821.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_181934_best_model.pth'




Epoch   4/1000 | Train: Loss=1.2132, F1=0.4479 | Val: Loss=1.3461, F1=0.3693
   - No improvement. Patience: 1/10




Epoch   5/1000 | Train: Loss=1.2019, F1=0.4956 | Val: Loss=1.3456, F1=0.3638
   - No improvement. Patience: 2/10




Epoch   6/1000 | Train: Loss=1.2064, F1=0.4555 | Val: Loss=1.3468, F1=0.3672
   - No improvement. Patience: 3/10




Epoch   7/1000 | Train: Loss=1.1880, F1=0.5083 | Val: Loss=1.3468, F1=0.3680
   - No improvement. Patience: 4/10




Epoch   8/1000 | Train: Loss=1.1809, F1=0.5083 | Val: Loss=1.3464, F1=0.3483
   - No improvement. Patience: 5/10




Epoch   9/1000 | Train: Loss=1.1816, F1=0.4869 | Val: Loss=1.3456, F1=0.3855

‚úÖ Improvement! val_f1 changed from 0.3821 to 0.3855.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_181934_best_model.pth'




Epoch  10/1000 | Train: Loss=1.1685, F1=0.5337 | Val: Loss=1.3464, F1=0.3746
   - No improvement. Patience: 1/10




Epoch  11/1000 | Train: Loss=1.1854, F1=0.5072 | Val: Loss=1.3455, F1=0.3784
   - No improvement. Patience: 2/10




Epoch  12/1000 | Train: Loss=1.1590, F1=0.5082 | Val: Loss=1.3459, F1=0.3873

‚úÖ Improvement! val_f1 changed from 0.3855 to 0.3873.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_181934_best_model.pth'




Epoch  13/1000 | Train: Loss=1.1493, F1=0.4960 | Val: Loss=1.3445, F1=0.3691
   - No improvement. Patience: 1/10




Epoch  14/1000 | Train: Loss=1.1244, F1=0.5522 | Val: Loss=1.3439, F1=0.3692
   - No improvement. Patience: 2/10




Epoch  15/1000 | Train: Loss=1.1352, F1=0.5410 | Val: Loss=1.3430, F1=0.3837
   - No improvement. Patience: 3/10




Epoch  16/1000 | Train: Loss=1.1499, F1=0.5246 | Val: Loss=1.3450, F1=0.3655
   - No improvement. Patience: 4/10




Epoch  17/1000 | Train: Loss=1.1308, F1=0.5190 | Val: Loss=1.3447, F1=0.3721
   - No improvement. Patience: 5/10




Epoch  18/1000 | Train: Loss=1.0822, F1=0.5867 | Val: Loss=1.3451, F1=0.3906

‚úÖ Improvement! val_f1 changed from 0.3873 to 0.3906.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_181934_best_model.pth'




Epoch  19/1000 | Train: Loss=1.1239, F1=0.5301 | Val: Loss=1.3473, F1=0.3710
   - No improvement. Patience: 1/10




Epoch  20/1000 | Train: Loss=1.0885, F1=0.5829 | Val: Loss=1.3470, F1=0.3830
   - No improvement. Patience: 2/10




Epoch  21/1000 | Train: Loss=1.0976, F1=0.5725 | Val: Loss=1.3466, F1=0.3830
   - No improvement. Patience: 3/10




Epoch  22/1000 | Train: Loss=1.0776, F1=0.5976 | Val: Loss=1.3476, F1=0.3599
   - No improvement. Patience: 4/10




Epoch  23/1000 | Train: Loss=1.0854, F1=0.6004 | Val: Loss=1.3498, F1=0.3471
   - No improvement. Patience: 5/10




Epoch  24/1000 | Train: Loss=1.0685, F1=0.5846 | Val: Loss=1.3507, F1=0.3605
   - No improvement. Patience: 6/10




Epoch  25/1000 | Train: Loss=1.0511, F1=0.6229 | Val: Loss=1.3500, F1=0.3724
   - No improvement. Patience: 7/10




Epoch  26/1000 | Train: Loss=1.0446, F1=0.6167 | Val: Loss=1.3526, F1=0.3576
   - No improvement. Patience: 8/10




Epoch  27/1000 | Train: Loss=1.0357, F1=0.6291 | Val: Loss=1.3552, F1=0.3576
   - No improvement. Patience: 9/10




Epoch  28/1000 | Train: Loss=1.0384, F1=0.6398 | Val: Loss=1.3570, F1=0.3708
   - No improvement. Patience: 10/10

üõë Early stopping triggered after 28 epochs.

--- Training Finished ---
Restoring best model weights from epoch 18 with val_f1 of 0.3906
Model saved to: /content/local_data/experiments/models/AttentionMIL_20251216_181934.pt
Registry updated: ID AttentionMIL_20251216_181934


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : AttentionMIL_20251216_181934
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/asarraa/test/2e19ec1b55734495a4e5a8d802703d6e
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     best_val_f1 [5] : (0.37962314127450775, 0.3906378138681229)
[1;38;5;39mCOMET INFO:[0m     train_f1 [28]   : (0.43618726508781513, 0.6397776458322688)
[1;38;5;39mCOMET INFO:[0m     train_loss [28] : (1.035734374975336, 1.2461730293158828)
[1;38;5;39mCOMET INFO:[0m     val_f1 [28]     : (0.34711359372032735, 0.390

In [None]:
MODEL_NAME = "AttentionMIL"
TRAINING_PARAMS = {
    'epochs': 1000,
    'patience':10,
    "l2_lambda": 0.05,
    "learning_rate": 5e-06,
    "verbose": 1
}

MIL_DEFAULT = {"num_classes": 4, "pretrained": True, "freeze_backbone": False}

trained_model, history, exp_id = start_training(
    pretrained_model_path = '/content/local_data/fit_models/AttentionMIL_20251216_174926_best_model.pth',
    model_name=MODEL_NAME,
    training_params=TRAINING_PARAMS,
    model_params = MIL_DEFAULT,
    local_data_path = BASE_PATH,
    batch_size = BATCH_SIZE,
    data_path = BASE_PATH
)

Using GPU: Tesla T4
--- Starting AttentionMIL on cuda ---




Starting AttentionMIL model training...
Training Configuration:
 epochs: 1000
learning_rate: 5e-06
patience: 10
l1_lambda: 0
l2_lambda: 0.05
verbose: 1
criterion_name: CrossEntropyLoss
optimizer_name: adamw
Model Configuration:
 num_classes: 4
backbone_name: vit_small_patch16_224
pretrained: True
freeze_backbone: False


[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/asarraa/test/8cc557875973480db1fec8e8f8a78e26



Loading pretrained model from /content/local_data/fit_models/AttentionMIL_20251216_174926_best_model.pth...
Pretrained model loaded successfully.




Epoch   1/1000 | Train: Loss=1.2534, F1=0.4541 | Val: Loss=1.3398, F1=0.3626

‚úÖ Improvement! val_f1 changed from -inf to 0.3626.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_184103_best_model.pth'




Epoch   2/1000 | Train: Loss=1.2126, F1=0.4571 | Val: Loss=1.3492, F1=0.3091
   - No improvement. Patience: 1/10




Epoch   3/1000 | Train: Loss=1.1803, F1=0.5148 | Val: Loss=1.3461, F1=0.3742

‚úÖ Improvement! val_f1 changed from 0.3626 to 0.3742.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_184103_best_model.pth'




Epoch   4/1000 | Train: Loss=1.1498, F1=0.5435 | Val: Loss=1.3419, F1=0.3486
   - No improvement. Patience: 1/10




Epoch   5/1000 | Train: Loss=1.1275, F1=0.5571 | Val: Loss=1.3565, F1=0.3193
   - No improvement. Patience: 2/10




Epoch   6/1000 | Train: Loss=1.0842, F1=0.5722 | Val: Loss=1.3556, F1=0.3660
   - No improvement. Patience: 3/10




Epoch   7/1000 | Train: Loss=1.0660, F1=0.5884 | Val: Loss=1.3544, F1=0.3834

‚úÖ Improvement! val_f1 changed from 0.3742 to 0.3834.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_184103_best_model.pth'




Epoch   8/1000 | Train: Loss=1.0000, F1=0.6515 | Val: Loss=1.3806, F1=0.3942

‚úÖ Improvement! val_f1 changed from 0.3834 to 0.3942.
   - Saving best model weights to '/content/local_data/fit_models/AttentionMIL_20251216_184103_best_model.pth'




Epoch   9/1000 | Train: Loss=0.9539, F1=0.6984 | Val: Loss=1.4040, F1=0.3077
   - No improvement. Patience: 1/10




Epoch  10/1000 | Train: Loss=0.9157, F1=0.7204 | Val: Loss=1.3932, F1=0.3493
   - No improvement. Patience: 2/10




Epoch  11/1000 | Train: Loss=0.8711, F1=0.7604 | Val: Loss=1.4061, F1=0.3684
   - No improvement. Patience: 3/10




Epoch  12/1000 | Train: Loss=0.8240, F1=0.7875 | Val: Loss=1.4159, F1=0.3474
   - No improvement. Patience: 4/10




Epoch  13/1000 | Train: Loss=0.7726, F1=0.8320 | Val: Loss=1.4460, F1=0.3426
   - No improvement. Patience: 5/10




Epoch  14/1000 | Train: Loss=0.7447, F1=0.8570 | Val: Loss=1.4858, F1=0.3398
   - No improvement. Patience: 6/10




Epoch  15/1000 | Train: Loss=0.6810, F1=0.8767 | Val: Loss=1.4977, F1=0.3612
   - No improvement. Patience: 7/10




Epoch  16/1000 | Train: Loss=0.6609, F1=0.8988 | Val: Loss=1.5479, F1=0.3152
   - No improvement. Patience: 8/10




Epoch  17/1000 | Train: Loss=0.6105, F1=0.9243 | Val: Loss=1.5452, F1=0.3635
   - No improvement. Patience: 9/10




Epoch  18/1000 | Train: Loss=0.5651, F1=0.9353 | Val: Loss=1.5921, F1=0.3600
   - No improvement. Patience: 10/10

üõë Early stopping triggered after 18 epochs.

--- Training Finished ---
Restoring best model weights from epoch 8 with val_f1 of 0.3942
Model saved to: /content/local_data/experiments/models/AttentionMIL_20251216_184103.pt
Registry updated: ID AttentionMIL_20251216_184103


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : AttentionMIL_20251216_184103
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/asarraa/test/8cc557875973480db1fec8e8f8a78e26
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     best_val_f1 [4] : (0.3626379115415971, 0.3941631298610704)
[1;38;5;39mCOMET INFO:[0m     train_f1 [18]   : (0.4540904686388393, 0.9353160965189341)
[1;38;5;39mCOMET INFO:[0m     train_loss [18] : (0.56509212892631, 1.253370460765115)
[1;38;5;39mCOMET INFO:[0m     val_f1 [18]     : (0.3076946515860543, 0.39416312


### Inference on test set

In [8]:
from inference import make_mil_inference

# 1. Definisci i parametri
MODEL_PATH = Path('/content/local_data/fit_models/AttentionMIL_20251216_212112_best_model.pth')
BATCH_SIZE = 8 # Usa un batch size basso per l'inferenza MIL
EXPERIMENT_ID = "mil_inference_vit_01"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 2. Chiama la funzione principale di inferenza
make_mil_inference(
    model_path=MODEL_PATH,
    base_path=BASE_PATH,
    device=DEVICE,
    batch_size=BATCH_SIZE,
    experiment_id=EXPERIMENT_ID,
    backbone_name='vit_small_patch16_224'
)


[INFO] Starting MIL inference for experiment 'mil_inference_vit_01'...

--- Creating MIL Test DataLoader ---
[INFO] Loading AttentionMIL model with 4 classes and 'vit_small_patch16_224' backbone.
--- ATTENZIONE: Backbone √® freezato. Solo la testa di classificazione verr√† addestrata. ---
[INFO] Loading weights from: '/content/local_data/fit_models/AttentionMIL_20251216_212112_best_model.pth'
[INFO] Model loaded successfully.


Predicting on slides: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 60/60 [00:19<00:00,  3.09it/s]


‚úÖ Inference complete. Submission file saved to 'mil_inference_vit_01_submission.csv'



