# Colab Quickstart

In [1]:
!git clone --recursive https://github.com/lucap02/aml-2025-mistake-detection.git code

Cloning into 'code'...
remote: Enumerating objects: 475, done.[K
remote: Counting objects: 100% (41/41), done.[K
remote: Compressing objects: 100% (27/27), done.[K
remote: Total 475 (delta 19), reused 26 (delta 14), pack-reused 434 (from 2)[K
Receiving objects: 100% (475/475), 165.43 KiB | 1.31 MiB/s, done.
Resolving deltas: 100% (302/302), done.
Submodule 'annotations' (https://github.com/CaptainCook4D/annotations) registered for path 'annotations'
Cloning into '/content/code/annotations'...
remote: Enumerating objects: 152, done.        
remote: Counting objects: 100% (152/152), done.        
remote: Compressing objects: 100% (98/98), done.        
remote: Total 152 (delta 75), reused 108 (delta 46), pack-reused 0 (from 0)        
Receiving objects: 100% (152/152), 793.14 KiB | 3.17 MiB/s, done.
Resolving deltas: 100% (75/75), done.
Submodule path 'annotations': checked out '0e9a108be2cbcbcbd592e7418c0ab9c16232d27a'


In [2]:
import os
import shutil
import zipfile
from pathlib import Path
from typing import List

from tqdm import tqdm

def mount_google_drive() -> bool:
    try:
        from google.colab import drive  # type: ignore
        drive.mount("/content/drive")
        print("✓ Google Drive mounted at /content/drive")
        return True
    except Exception:
        print("⚠ Not running on Colab; skipping drive mount")
        return False


def extract_zip(zip_path: str, destination: str) -> Path:
    destination_path = Path(destination)
    destination_path.mkdir(parents=True, exist_ok=True)

    with zipfile.ZipFile(zip_path, "r") as zf:
        members = zf.namelist()
        print(f"📦 Extracting {zip_path} -> {destination} ({len(members)} entries)")
        for member in tqdm(members, desc="Extract", unit="file"):
            zf.extract(member, destination)
    return destination_path


def extract_inner_zips(root: Path) -> List[Path]:
    extracted_dirs = []
    for zip_file in root.rglob("*.zip"):
        # Skip the outer archive itself if present in the scan
        if zip_file.samefile(root):
            continue
        target_dir = zip_file.with_suffix("")
        target_dir.mkdir(parents=True, exist_ok=True)
        with zipfile.ZipFile(zip_file, "r") as zf:
            members = zf.namelist()
            print(f"📦 Extracting inner {zip_file} -> {target_dir} ({len(members)} entries)")
            for member in tqdm(members, desc=f"Extract {zip_file.name}", unit="file"):
                zf.extract(member, target_dir)
        extracted_dirs.append(target_dir)
    return extracted_dirs


def ensure_target_dirs(base: Path) -> None:
    (base / "video" / "omnivore").mkdir(parents=True, exist_ok=True)
    (base / "video" / "slowfast").mkdir(parents=True, exist_ok=True)
    (base / "audio").mkdir(parents=True, exist_ok=True)


def decide_target(file_path: Path, target_root: Path) -> Path:
    lower_parts = [p.lower() for p in file_path.parts]
    name_lower = file_path.name.lower()

    if "omnivore" in lower_parts or "omnivore" in name_lower:
        return target_root / "video" / "omnivore" / file_path.name
    if "slowfast" in lower_parts or "slowfast" in name_lower:
        return target_root / "video" / "slowfast" / file_path.name
    if "audio" in lower_parts or name_lower.endswith("_audio.npz"):
        return target_root / "audio" / file_path.name
    # Fallback: treat as omnivore video
    return target_root / "video" / "omnivore" / file_path.name


def move_npz_files(source_root: Path, target_root: Path) -> int:
    ensure_target_dirs(target_root)
    moved = 0
    for npz in source_root.rglob("*.npz"):
        target = decide_target(npz, target_root)
        target.parent.mkdir(parents=True, exist_ok=True)
        if not target.exists():
            shutil.move(str(npz), str(target))
            moved += 1
    return moved


def verify(target_root: Path) -> None:
    print("\n✅ Verification:")
    for label, rel in [
        ("Video Omnivore", target_root / "video" / "omnivore"),
        ("Video SlowFast", target_root / "video" / "slowfast"),
        ("Audio", target_root / "audio"),
    ]:
        count = len(list(rel.glob("*.npz"))) if rel.exists() else 0
        status = "✓" if count > 0 else "✗"
        print(f"  {status} {label}: {count} files in {rel}")


def cleanup(temp_dir: Path) -> None:
    if temp_dir.exists():
        shutil.rmtree(temp_dir)
        print(f"🧹 Removed {temp_dir}")


def main():
    on_colab = mount_google_drive()

    outer_zip = Path("/content/drive/MyDrive/CaptainCook4D/features.zip" if on_colab else "./features.zip")
    if not outer_zip.exists():
        raise FileNotFoundError(f"Zip not found at {outer_zip}")

    temp_root = Path("./features_temp")
    target_root = Path("/content/code/data") # Direct extraction to /content/code/data

    # Step 1: Extract outer zip
    extract_zip(str(outer_zip), str(temp_root))

    # Step 2: Extract inner zips (omnivore.zip, slowfast.zip, etc.)
    extract_inner_zips(temp_root)

    # Step 3: Move npz files into expected layout
    moved = move_npz_files(temp_root, target_root)
    print(f"\n📂 Moved {moved} .npz files into {target_root}")

    # Step 4: Verify
    verify(target_root)

    # Step 5: Cleanup temp
    cleanup(temp_root)

    print("\nDone. You can now run training/eval with data under ./data")


if __name__ == "__main__":
    main()


Mounted at /content/drive
✓ Google Drive mounted at /content/drive
📦 Extracting /content/drive/MyDrive/CaptainCook4D/features.zip -> features_temp (2 entries)


Extract: 100%|██████████| 2/2 [00:37<00:00, 18.84s/file]


📦 Extracting inner features_temp/omnivore.zip -> features_temp/omnivore (385 entries)


Extract omnivore.zip: 100%|██████████| 385/385 [00:11<00:00, 33.99file/s]


📦 Extracting inner features_temp/slowfast.zip -> features_temp/slowfast (385 entries)


Extract slowfast.zip: 100%|██████████| 385/385 [00:06<00:00, 59.58file/s]



📂 Moved 768 .npz files into /content/code/data

✅ Verification:
  ✓ Video Omnivore: 384 files in /content/code/data/video/omnivore
  ✓ Video SlowFast: 384 files in /content/code/data/video/slowfast
  ✗ Audio: 0 files in /content/code/data/audio
🧹 Removed features_temp

Done. You can now run training/eval with data under ./data


In [3]:
# !mv /content/data /content/code/ # This cell is no longer needed

In [4]:
!mkdir /content/code/checkpoints
!unzip /content/drive/MyDrive/CaptainCook4D/error_recognition_best.zip -d /content/code/checkpoints

Archive:  /content/drive/MyDrive/CaptainCook4D/error_recognition_best.zip
  inflating: /content/code/checkpoints/error_recognition_best/MLP/3dresnet/error_recognition_MLP_3dresnet_recordings_epoch_45.pt  
  inflating: /content/code/checkpoints/error_recognition_best/MLP/3dresnet/error_recognition_MLP_3dresnet_person_epoch_39.pt  
  inflating: /content/code/checkpoints/error_recognition_best/MLP/3dresnet/error_recognition_MLP_3dresnet_step_epoch_41.pt  
  inflating: /content/code/checkpoints/error_recognition_best/MLP/3dresnet/error_recognition_MLP_3dresnet_environment_epoch_11.pt  
  inflating: /content/code/checkpoints/error_recognition_best/MLP/imagebind/error_recognition_MLP_imagebind_audio_step_epoch_28.pt  
  inflating: /content/code/checkpoints/error_recognition_best/MLP/imagebind/error_recognition_MLP_imagebind_audio_recordings_epoch_2.pt  
  inflating: /content/code/checkpoints/error_recognition_best/MLP/imagebind/error_recognition_MLP_imagebind_audio_environment_epoch_50.pt  


In [5]:
!pip install torcheval

Collecting torcheval
  Downloading torcheval-0.0.7-py3-none-any.whl.metadata (8.6 kB)
Downloading torcheval-0.0.7-py3-none-any.whl (179 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/179.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.2/179.2 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torcheval
Successfully installed torcheval-0.0.7


In [6]:
%%bash

cd code
python -m core.evaluate --variant MLP --backbone omnivore \
  --ckpt /content/code/checkpoints/error_recognition_best/MLP/omnivore/error_recognition_MLP_omnivore_step_epoch_43.pt \
  --split step --threshold 0.6

Loaded annotations...... 
Loading recording ids from recordings_combined_splits.json
----------------------------------------------------------------
test Sub Step Level Metrics: {'precision': 0.4096162736939436, 'recall': 0.2989708115404083, 'f1': 0.3456549302643129, 'accuracy': 0.6831416629277163, 'auc': np.float64(0.6541560352028618), 'pr_auc': tensor(0.3187)}
test Step Level Metrics: {'precision': 0.6607142857142857, 'recall': 0.14859437751004015, 'f1': 0.24262295081967214, 'accuracy': 0.7105263157894737, 'auc': np.float64(0.7573902166041213), 'pr_auc': tensor(0.3638)}
test Step Level Metrics per Category: {0: {'precision': 0.8085106382978723, 'recall': 0.05499276410998553, 'f1': 0.10298102981029811, 'accuracy': 0.17042606516290726, 'auc': np.float64(0.3165803319041887), 'pr_auc': tensor(0.8628)}, 2: {'precision': 0.14893617021276595, 'recall': 0.14285714285714285, 'f1': 0.14583333333333334, 'accuracy': 0.8972431077694235, 'auc': np.float64(0.6917522683305632), 'pr_auc': tensor(0.0

  0%|          | 0/798 [00:00<?, ?it/s]test Progress: 94/798:   0%|          | 0/798 [00:00<?, ?it/s]test Progress: 94/798:   0%|          | 1/798 [00:00<05:06,  2.60it/s]test Progress: 171/798:   0%|          | 1/798 [00:00<05:06,  2.60it/s]test Progress: 239/798:   0%|          | 1/798 [00:00<05:06,  2.60it/s]test Progress: 272/798:   0%|          | 1/798 [00:00<05:06,  2.60it/s]test Progress: 317/798:   0%|          | 1/798 [00:00<05:06,  2.60it/s]test Progress: 381/798:   0%|          | 1/798 [00:00<05:06,  2.60it/s]test Progress: 408/798:   0%|          | 1/798 [00:00<05:06,  2.60it/s]test Progress: 437/798:   0%|          | 1/798 [00:00<05:06,  2.60it/s]test Progress: 522/798:   0%|          | 1/798 [00:00<05:06,  2.60it/s]test Progress: 534/798:   0%|          | 1/798 [00:00<05:06,  2.60it/s]test Progress: 534/798:   1%|▏         | 10/798 [00:00<00:31, 25.21it/s]test Progress: 555/798:   1%|▏         | 10/798 [00:00<00:31, 25.21it/s]test Progress: 584/798:   1%|▏ 

In [7]:
%%bash

cd code
python -m core.evaluate \
  --variant MLP \
  --backbone omnivore \
  --ckpt /content/code/checkpoints/error_recognition_best/MLP/omnivore/error_recognition_MLP_omnivore_step_epoch_43.pt \
  --split recordings \
  --threshold 0.4

Loaded annotations...... 
Loading recording ids from recordings_combined_splits.json
----------------------------------------------------------------
test Sub Step Level Metrics: {'precision': 0.5234383954154728, 'recall': 0.36225906242563655, 'f1': 0.4281830114382149, 'accuracy': 0.6818466353677621, 'auc': np.float64(0.7017401173803638), 'pr_auc': tensor(0.3993)}
test Step Level Metrics: {'precision': 0.6183745583038869, 'recall': 0.7261410788381742, 'f1': 0.6679389312977099, 'accuracy': 0.7406855439642325, 'auc': np.float64(0.8147061661680981), 'pr_auc': tensor(0.5474)}
test Step Level Metrics per Category: {0: {'precision': 0.7546468401486989, 'recall': 0.3548951048951049, 'f1': 0.4827586206896552, 'accuracy': 0.35171385991058124, 'auc': np.float64(0.3052553507098961), 'pr_auc': tensor(0.8177)}, 2: {'precision': 0.12267657992565056, 'recall': 0.673469387755102, 'f1': 0.20754716981132076, 'accuracy': 0.624441132637854, 'auc': np.float64(0.7218649517684887), 'pr_auc': tensor(0.1065)},

  0%|          | 0/671 [00:00<?, ?it/s]test Progress: 102/671:   0%|          | 0/671 [00:00<?, ?it/s]test Progress: 102/671:   0%|          | 1/671 [00:00<01:32,  7.24it/s]test Progress: 155/671:   0%|          | 1/671 [00:00<01:32,  7.24it/s]test Progress: 218/671:   0%|          | 1/671 [00:00<01:32,  7.24it/s]test Progress: 277/671:   0%|          | 1/671 [00:00<01:32,  7.24it/s]test Progress: 347/671:   0%|          | 1/671 [00:00<01:32,  7.24it/s]test Progress: 381/671:   0%|          | 1/671 [00:00<01:32,  7.24it/s]test Progress: 434/671:   0%|          | 1/671 [00:00<01:32,  7.24it/s]test Progress: 510/671:   0%|          | 1/671 [00:00<01:32,  7.24it/s]test Progress: 580/671:   0%|          | 1/671 [00:00<01:32,  7.24it/s]test Progress: 695/671:   0%|          | 1/671 [00:00<01:32,  7.24it/s]test Progress: 702/671:   0%|          | 1/671 [00:00<01:32,  7.24it/s]test Progress: 709/671:   0%|          | 1/671 [00:00<01:32,  7.24it/s]test Progress: 741/671:   0%|  

In [8]:
%%bash

cd code
python -m core.evaluate --variant Transformer --backbone omnivore \
  --ckpt /content/code/checkpoints/error_recognition_best/Transformer/omnivore/error_recognition_Transformer_omnivore_step_epoch_9.pt \
  --split step --threshold 0.6

Loaded annotations...... 
Loading recording ids from recordings_combined_splits.json
----------------------------------------------------------------
test Sub Step Level Metrics: {'precision': 0.4445452483556362, 'recall': 0.6613801248523705, 'f1': 0.5317056629365887, 'accuracy': 0.6738848088412402, 'auc': np.float64(0.7461755308526944), 'pr_auc': tensor(0.3888)}
test Step Level Metrics: {'precision': 0.5155709342560554, 'recall': 0.5983935742971888, 'f1': 0.5539033457249071, 'accuracy': 0.6992481203007519, 'auc': np.float64(0.7561832027563805), 'pr_auc': tensor(0.4338)}
test Step Level Metrics per Category: {0: {'precision': 0.778169014084507, 'recall': 0.31982633863965265, 'f1': 0.4533333333333333, 'accuracy': 0.33208020050125314, 'auc': np.float64(0.3025278277452426), 'pr_auc': tensor(0.8379)}, 2: {'precision': 0.09859154929577464, 'recall': 0.5714285714285714, 'f1': 0.16816816816816818, 'accuracy': 0.6528822055137845, 'auc': np.float64(0.6967385084875072), 'pr_auc': tensor(0.0827)}

  0%|          | 0/798 [00:00<?, ?it/s]test Progress: 94/798:   0%|          | 0/798 [00:00<?, ?it/s]test Progress: 94/798:   0%|          | 1/798 [00:00<02:18,  5.75it/s]test Progress: 171/798:   0%|          | 1/798 [00:00<02:18,  5.75it/s]test Progress: 239/798:   0%|          | 1/798 [00:00<02:18,  5.75it/s]test Progress: 272/798:   0%|          | 1/798 [00:00<02:18,  5.75it/s]test Progress: 317/798:   0%|          | 1/798 [00:00<02:18,  5.75it/s]test Progress: 381/798:   0%|          | 1/798 [00:00<02:18,  5.75it/s]test Progress: 408/798:   0%|          | 1/798 [00:00<02:18,  5.75it/s]test Progress: 437/798:   0%|          | 1/798 [00:00<02:18,  5.75it/s]test Progress: 522/798:   0%|          | 1/798 [00:00<02:18,  5.75it/s]test Progress: 534/798:   0%|          | 1/798 [00:00<02:18,  5.75it/s]test Progress: 555/798:   0%|          | 1/798 [00:00<02:18,  5.75it/s]test Progress: 584/798:   0%|          | 1/798 [00:00<02:18,  5.75it/s]test Progress: 615/798:   0%|    

In [9]:
%%bash

cd code
python -m core.evaluate \
  --variant Transformer \
  --backbone omnivore \
  --ckpt /content/code/checkpoints/error_recognition_best/Transformer/omnivore/error_recognition_Transformer_omnivore_recordings_epoch_31.pt \
  --split recordings \
  --threshold 0.4

Loaded annotations...... 
Loading recording ids from recordings_combined_splits.json
----------------------------------------------------------------
test Sub Step Level Metrics: {'precision': 0.4491327720864185, 'recall': 0.35123344173871657, 'f1': 0.39419567346212053, 'accuracy': 0.645018257694314, 'auc': np.float64(0.6254427005929003), 'pr_auc': tensor(0.3711)}
test Step Level Metrics: {'precision': 0.45408163265306123, 'recall': 0.36929460580912865, 'f1': 0.4073226544622426, 'accuracy': 0.6140089418777943, 'auc': np.float64(0.6226768310334846), 'pr_auc': tensor(0.3942)}
test Step Level Metrics per Category: {0: {'precision': 0.7908163265306123, 'recall': 0.270979020979021, 'f1': 0.4036458333333333, 'accuracy': 0.3174366616989568, 'auc': np.float64(0.39113159567705025), 'pr_auc': tensor(0.8358)}, 2: {'precision': 0.08673469387755102, 'recall': 0.3469387755102041, 'f1': 0.13877551020408163, 'accuracy': 0.6855439642324889, 'auc': np.float64(0.5750377321346545), 'pr_auc': tensor(0.0778

  0%|          | 0/671 [00:00<?, ?it/s]test Progress: 102/671:   0%|          | 0/671 [00:00<?, ?it/s]test Progress: 102/671:   0%|          | 1/671 [00:00<01:30,  7.43it/s]test Progress: 155/671:   0%|          | 1/671 [00:00<01:30,  7.43it/s]test Progress: 218/671:   0%|          | 1/671 [00:00<01:30,  7.43it/s]test Progress: 277/671:   0%|          | 1/671 [00:00<01:30,  7.43it/s]test Progress: 347/671:   0%|          | 1/671 [00:00<01:30,  7.43it/s]test Progress: 381/671:   0%|          | 1/671 [00:00<01:30,  7.43it/s]test Progress: 434/671:   0%|          | 1/671 [00:00<01:30,  7.43it/s]test Progress: 510/671:   0%|          | 1/671 [00:00<01:30,  7.43it/s]test Progress: 580/671:   0%|          | 1/671 [00:00<01:30,  7.43it/s]test Progress: 695/671:   0%|          | 1/671 [00:00<01:30,  7.43it/s]test Progress: 702/671:   0%|          | 1/671 [00:00<01:30,  7.43it/s]test Progress: 709/671:   0%|          | 1/671 [00:00<01:30,  7.43it/s]test Progress: 741/671:   0%|  

In [10]:
"""
NOTA:
il parametro --threshold consigliato nei repository ufficiali è ERRATO.
Se si vogliono ottenere ESATTAMENTE i risultati della tabella nel paper,
usare --threshold 0.5 e non 0.4 come specificato nei README ufficiali.
"""

'\nNOTA:\nil parametro --threshold consigliato nei repository ufficiali è ERRATO.\nSe si vogliono ottenere ESATTAMENTE i risultati della tabella nel paper,\nusare --threshold 0.5 e non 0.4 come specificato nei README ufficiali.\n'

In [11]:
!pip install loguru

Collecting loguru
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Downloading loguru-0.7.3-py3-none-any.whl (61 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.6/61.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: loguru
Successfully installed loguru-0.7.3


In [12]:
!wandb login

[34m[1mwandb[0m: Logging into https://api.wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: Find your API key here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter: 
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mlucaprato02[0m ([33mlucaprato02-politecnico-di-torino[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [13]:
%%bash
# Create a symbolic link from /content/data to /content/code/data
ln -s /content/code/data /content/data

In [16]:
%%bash
cd code
python train_er.py --variant lstm --backbone omnivore --split recordings --num_epochs 50 --batch_size 32 --lr 1e-3 --ckpt_directory ./checkpoints

-------------------------------------------------------------
Training step model and testing on step level
Train args: {'num_workers': 8, 'pin_memory': False, 'shuffle': True, 'batch_size': 32}
Test args: {'num_workers': 8, 'pin_memory': False, 'shuffle': False, 'batch_size': 1}
{'batch_size': 32, 'test_batch_size': 1, 'num_epochs': 50, 'lr': 0.001, 'weight_decay': 0.001, 'ckpt': None, 'seed': 42, 'backbone': 'omnivore', 'ckpt_directory': './checkpoints', 'split': 'recordings', 'variant': 'lstm', 'model_name': None, 'task_name': 'error_recognition', 'error_category': None, 'modality': ['video']}
-------------------------------------------------------------
Loaded annotations...... 
Loading recording ids from recordings_combined_splits.json
Loaded annotations...... 
Loading recording ids from recordings_combined_splits.json
Loaded annotations...... 
Loading recording ids from recordings_combined_splits.json
----------------------------------------------------------------
val Sub Step L

wandb: Currently logged in as: lucaprato02 (lucaprato02-politecnico-di-torino) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin
wandb: Tracking run with wandb version 0.23.1
wandb: Run data is saved locally in /content/code/wandb/run-20260103_144646-goatjvb9
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run stellar-thunder-11
wandb: ⭐️ View project at https://wandb.ai/lucaprato02-politecnico-di-torino/error_recognition_recordings_omnivore_lstm_video
wandb: 🚀 View run at https://wandb.ai/lucaprato02-politecnico-di-torino/error_recognition_recordings_omnivore_lstm_video/runs/goatjvb9
  0%|          | 0/125 [00:00<?, ?it/s]Train Epoch: 1, Progress: 0/125, Loss: 0.821000:   0%|          | 0/125 [00:02<?, ?it/s]Train Epoch: 1, Progress: 0/125, Loss: 0.821000:   1%|          | 1/125 [00:02<05:46,  2.79s/it]Train Epoch: 1, Progress: 1/125, Loss: 0.998062:   1%|          | 1/125 [00:02<05:46,  2.79s/it]Train Epoch: 1, Progress: 1/125, Loss: 0.998062: 