<a href="https://colab.research.google.com/github/lahirumanulanka/ann-visual-emotion/blob/created_new_dataset/notebooks/emo_CNN_Baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
# # ===========================
# # Cell 0 — (Colab) installs
# # ===========================
# # Assumes torch/torchvision already OK in your runtime.
# # If not, install a matched triplet first (we discussed earlier).
# !pip -q install timm==1.0.9 facenet-pytorch opencv-python-headless

In [13]:
# # Smart installer: pick one build: 'cu121', 'cu126', or 'cpu'
# # Using standard PyTorch installation command for Colab to ensure compatibility
# !pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

# # Verify installation
# import torch, torchvision, torchaudio
# print("torch:", torch.__version__)
# print("torchvision:", torchvision.__version__)
# print("torchaudio:", torchaudio.__version__)
# print("CUDA available:", torch.cuda.is_available())

In [14]:
# =========================
# Cell 1 — Imports & setup
# =========================
import os, json, math, time, random
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.optim import AdamW
from torch.optim.lr_scheduler import LambdaLR

import torchvision
from torchvision import transforms
from torchvision.models import resnet18, ResNet18_Weights

from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score

import matplotlib.pyplot as plt
from IPython.display import clear_output
plt.rcParams["figure.dpi"] = 120

# reproducibility
def set_seed(seed=42):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cuda


In [15]:

# Change this if your folder lives elsewhere (e.g., in a Shared Drive)
PROJECT_ROOTS = [
    "/content/drive/MyDrive/ann-visual-emotion",                          # common
    "/content/drive/MyDrive/Colab Notebooks/ann-visual-emotion",          # sometimes used
    "/content/drive/Shared drives/YourTeamDrive/ann-visual-emotion",      # shared drive (edit name)
]

import os
for p in PROJECT_ROOTS:
    if os.path.isdir(p):
        PROJECT_ROOT = p
        break
else:
    raise FileNotFoundError(
        "Couldn't find 'ann-visual-emotion' in the usual locations.\n"
        "Please create it or update PROJECT_ROOTS with the correct path."
    )

print("Using PROJECT_ROOT:", PROJECT_ROOT)

Using PROJECT_ROOT: /content/drive/MyDrive/ann-visual-emotion


In [16]:
# ========================
# Cell 2 — Configuration
# ========================
import os
from pathlib import Path

# ---- Base project dir on Google Drive (set by Cell 1.5) ----
assert 'PROJECT_ROOT' in globals(), "Run the Drive mount cell first to set PROJECT_ROOT."

# ---- Common subfolders (edit if your structure differs) ----
RAW_DIR        = Path(PROJECT_ROOT) / "data" / "raw" / "EmoSet"
SPLIT_DIR      = Path(PROJECT_ROOT) / "data" / "processed" / "EmoSet_splits"

# ---- Your ORIGINAL dataset (used if RUN_FACE_PREP=True) ----
IMAGES_ROOT_RAW = str(RAW_DIR)                                 # folder containing original images
TRAIN_CSV_RAW   = str(SPLIT_DIR / "train.csv")
VAL_CSV_RAW     = str(SPLIT_DIR / "val.csv")
TEST_CSV_RAW    = str(SPLIT_DIR / "test.csv")                  # set to None if you don't have it

LABEL_MAP_JSON  = str(SPLIT_DIR / "label_map.json")            # optional

# ---- Face-cropped dataset output (created if RUN_FACE_PREP=True) ----
FACES_ROOT      = str(Path(PROJECT_ROOT) / "data" / "processed" / "EmoSet_faces224")
FACES_SPLITS    = str(Path(PROJECT_ROOT) / "data" / "processed" / "EmoSet_faces_splits")

# Create output dirs if missing
Path(FACES_ROOT).mkdir(parents=True, exist_ok=True)
Path(FACES_SPLITS).mkdir(parents=True, exist_ok=True)

# ---- Whether to run the face detection/cropping step now ----
RUN_FACE_PREP   = True   # set False to train directly on RAW images

# ---- Training will read from these (set automatically if RUN_FACE_PREP=True) ----
IMAGES_ROOT     = FACES_ROOT if RUN_FACE_PREP else IMAGES_ROOT_RAW
TRAIN_CSV       = None  # set after face prep or set to raw if skipping
VAL_CSV         = None
TEST_CSV        = None

# ---- Data & model ----
IMG_MODE        = "rgb"      # after face-cropping we save RGB crops; if training on raw gray 48x48, set 'gray'
IMG_SIZE        = 224        # ResNet default
BATCH           = 64
NUM_WORKERS     = 4

# ---- Optimization ----
EPOCHS          = 30
WARMUP_EPOCHS   = 2
BASE_LR         = 3e-4
WEIGHT_DECAY    = 1e-4
LABEL_SMOOTHING = 0.05

# ---- Imbalance / loss ----
BALANCE_MODE    = "sampler"  # 'sampler' or 'none'
LOSS_MODE       = "ce"       # 'ce' or 'focal'
FOCAL_GAMMA     = 2.0

# ---- Augmentations ----
USE_MIXUP       = True
MIXUP_ALPHA     = 0.2
USE_CUTMIX      = False
CUTMIX_ALPHA    = 0.0

# ---- Early stopping ----
PATIENCE        = 5

# ---- Outputs (also on Drive so they persist) ----
OUT_DIR         = str(Path(PROJECT_ROOT) / "outputs" / "emo_training")
Path(OUT_DIR).mkdir(parents=True, exist_ok=True)

# ---- Sanity prints ----
print("IMAGES_ROOT_RAW:", IMAGES_ROOT_RAW)
print("TRAIN/VAL/TEST CSV:", TRAIN_CSV_RAW, VAL_CSV_RAW, TEST_CSV_RAW)
print("FACES_ROOT:", FACES_ROOT)
print("FACES_SPLITS:", FACES_SPLITS)
print("OUT_DIR:", OUT_DIR)

IMAGES_ROOT_RAW: /content/drive/MyDrive/ann-visual-emotion/data/raw/EmoSet
TRAIN/VAL/TEST CSV: /content/drive/MyDrive/ann-visual-emotion/data/processed/EmoSet_splits/train.csv /content/drive/MyDrive/ann-visual-emotion/data/processed/EmoSet_splits/val.csv /content/drive/MyDrive/ann-visual-emotion/data/processed/EmoSet_splits/test.csv
FACES_ROOT: /content/drive/MyDrive/ann-visual-emotion/data/processed/EmoSet_faces224
FACES_SPLITS: /content/drive/MyDrive/ann-visual-emotion/data/processed/EmoSet_faces_splits
OUT_DIR: /content/drive/MyDrive/ann-visual-emotion/outputs/emo_training


In [17]:
# =========================================
# Cell 3 — Label map & small helper funcs
# =========================================
# Definitions moved to Cell 5 to resolve NameError

In [None]:
# ======================================================
# Cell 4 — OPTIONAL: Face detection & crop with MTCNN
# (Runs once. Produces new CSVs that point to face crops)
# Skips face-cropping for files starting with 'syn_'
# ======================================================
if RUN_FACE_PREP:
    from facenet_pytorch import MTCNN
    from PIL import Image

    FACE_SIZE     = 224
    MARGIN_FRAC   = 0.20     # padding around face bbox
    MIN_FACE_SIZE = 24
    CONF_THRESH   = 0.90

    os.makedirs(FACES_ROOT, exist_ok=True)
    os.makedirs(FACES_SPLITS, exist_ok=True)

    mtcnn = MTCNN(
        image_size=FACE_SIZE, margin=0, min_face_size=MIN_FACE_SIZE,
        thresholds=[0.6, 0.7, 0.7], post_process=False,
        device=device, keep_all=True
    )

    def add_margin(box, w, h, frac=0.2):
        x1, y1, x2, y2 = box
        bw, bh = x2 - x1, y2 - y1
        m = frac * max(bw, bh)
        nx1 = max(0, int(x1 - m))
        ny1 = max(0, int(y1 - m))
        nx2 = min(w, int(x2 + m))
        ny2 = min(h, int(y2 + m))
        return [nx1, ny1, nx2, ny2]

    def largest_face_box(boxes, probs):
        if boxes is None or len(boxes) == 0: return None, None
        areas = (boxes[:,2]-boxes[:,0]) * (boxes[:,3]-boxes[:,1])
        score = areas * probs
        idx = int(np.argmax(score))
        return boxes[idx], float(probs[idx])

    def save_face_crop(pil_img, box, out_path, size=224):
        w, h = pil_img.size
        x1, y1, x2, y2 = [int(v) for v in box]
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(w, x2), min(h, y2)
        crop = pil_img.crop((x1, y1, x2, y2))
        # square pad to avoid aspect distortion
        cw, ch = crop.size
        side = max(cw, ch)
        canvas = Image.new("RGB", (side, side), (0,0,0))
        canvas.paste(crop, ((side - cw)//2, (side - ch)//2))
        canvas = canvas.resize((size, size), Image.BILINEAR)
        Path(out_path).parent.mkdir(parents=True, exist_ok=True)
        canvas.save(out_path, format="JPEG", quality=95, subsampling=0)
        return out_path

    def process_split(csv_path, images_root, faces_root, out_split_dir,
                      label_col="label", path_col="image",
                      face_size=224, margin_frac=0.2, conf_thresh=0.90,
                      fallback_to_full=True, split_name="train"):
        df = pd.read_csv(csv_path)
        out_rows = []
        misses = 0

        for i, row in df.iterrows():
            # path column handling
            rel_path = row[path_col] if path_col in df.columns else row.get("image_path", None)
            if rel_path is None:
                raise ValueError("CSV must have column 'image' or 'image_path'")

            src = resolve_path(rel_path, images_root)
            label = str(row[label_col]) if label_col in df.columns else str(row.get("label_idx"))

            base = Path(rel_path).name
            # enforce .jpg output
            out_path = str(Path(faces_root) / split_name / label / base)
            out_path = out_path.rsplit(".", 1)[0] + ".jpg"

            # ---- SKIP CROPPING for syn_ files: just resize to FACE_SIZE ----
            if base.startswith("syn_"):
                try:
                    pil = Image.open(src).convert("RGB")
                    pil = pil.resize((face_size, face_size), Image.BILINEAR)
                    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
                    pil.save(out_path, format="JPEG", quality=95)
                    out_rows.append({"image": out_path, "label": label})
                except Exception as e:
                    print(f"[{split_name}] bad syn_ image {src}: {e}")
                    misses += 1
                continue

            # ---- Normal face-cropping for non-syn_ files ----
            try:
                pil = Image.open(src).convert("RGB")
            except Exception as e:
                print(f"[{split_name}] bad image {src}: {e}")
                if fallback_to_full:
                    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
                    # save a black placeholder of correct size to keep row
                    Image.new("RGB", (face_size, face_size), (0,0,0)).save(out_path, format="JPEG", quality=95)
                    out_rows.append({"image": out_path, "label": label})
                else:
                    misses += 1
                continue

            # detect faces
            boxes, probs = mtcnn.detect(pil)
            box, score = largest_face_box(boxes, probs) if boxes is not None else (None, None)

            if box is not None and score is not None and score >= conf_thresh:
                w, h = pil.size
                mbox = add_margin(box, w, h, frac=margin_frac)
                save_face_crop(pil, mbox, out_path, size=face_size)
                out_rows.append({"image": out_path, "label": label})
            else:
                if fallback_to_full:
                    pil = pil.resize((face_size, face_size), Image.BILINEAR)
                    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
                    pil.save(out_path, format="JPEG", quality=95)
                    out_rows.append({"image": out_path, "label": label})
                else:
                    misses += 1

            if (i+1) % 1000 == 0:
                print(f"[{split_name}] processed {i+1}/{len(df)}")

        out_df = pd.DataFrame(out_rows)
        out_csv = str(Path(out_split_dir) / f"{split_name}.csv")
        out_df.to_csv(out_csv, index=False)
        print(f"[{split_name}] wrote {len(out_df)} rows → {out_csv}; misses = {misses}")
        return out_csv

    # ---- Run face extraction for each split ----
    TRAIN_CSV = process_split(TRAIN_CSV_RAW, IMAGES_ROOT_RAW, FACES_ROOT, FACES_SPLITS,
                              split_name="train", face_size=FACE_SIZE,
                              margin_frac=MARGIN_FRAC, conf_thresh=CONF_THRESH)

    VAL_CSV   = process_split(VAL_CSV_RAW,   IMAGES_ROOT_RAW, FACES_ROOT, FACES_SPLITS,
                              split_name="val", face_size=FACE_SIZE,
                              margin_frac=MARGIN_FRAC, conf_thresh=CONF_THRESH)

    TEST_CSV  = None
    if TEST_CSV_RAW and Path(TEST_CSV_RAW).exists():
        TEST_CSV = process_split(TEST_CSV_RAW, IMAGES_ROOT_RAW, FACES_ROOT, FACES_SPLITS,
                                 split_name="test", face_size=FACE_SIZE,
                                 margin_frac=MARGIN_FRAC, conf_thresh=CONF_THRESH)

    # After cropping we train on the new face dataset
    IMAGES_ROOT = FACES_ROOT
    IMG_MODE    = "rgb"
else:
    # training straight on raw images
    TRAIN_CSV, VAL_CSV, TEST_CSV = TRAIN_CSV_RAW, VAL_CSV_RAW, TEST_CSV_RAW

In [4]:
# ==================================
# Cell 5 — Dataset & DataLoaders
# (fixes transform order: ToTensor BEFORE Normalize/RandomErasing)
# ==================================
TRAIN_CSV = process_split(TRAIN_CSV_RAW, str(RAW_DIR), FACES_ROOT, FACES_SPLITS, "train")
VAL_CSV   = process_split(VAL_CSV_RAW,   str(RAW_DIR), FACES_ROOT, FACES_SPLITS, "val")
TEST_CSV  = process_split(TEST_CSV_RAW,  str(RAW_DIR), FACES_ROOT, FACES_SPLITS, "test") if (TEST_CSV_RAW and Path(TEST_CSV_RAW).exists()) else None

from torchvision import transforms
from PIL import Image
import pandas as pd
import numpy as np
from pathlib import Path
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

str2idx, idx2str = load_label_map(TRAIN_CSV, LABEL_MAP_JSON if not RUN_FACE_PREP else None)
num_classes = len(idx2str)
print("Classes:", idx2str)

class EmotionCSVDataset(Dataset):
    def __init__(self, csv_path, images_root, str2idx, img_mode="rgb", img_size=224, train=True):
        self.df = pd.read_csv(csv_path)
        self.images_root = images_root
        self.str2idx = str2idx
        self.img_mode = img_mode
        self.train = train
        self.img_size = img_size

        # path column
        self.path_col = "image" if "image" in self.df.columns else ("image_path" if "image_path" in self.df.columns else None)
        if self.path_col is None:
            raise ValueError("CSV must contain 'image' or 'image_path'")

        # label to idx
        if "label" not in self.df.columns:
            raise ValueError("CSV must contain 'label' column")
        if self.df["label"].dtype == object:
            self.df["label_idx"] = self.df["label"].map(self.str2idx).astype(int)
        else:
            self.df["label_idx"] = self.df["label"].astype(int)

        # --- IMPORTANT: ToTensor BEFORE Normalize/RandomErasing ---
        normalize = transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])

        if train:
            self.tf = transforms.Compose([
                transforms.Resize((img_size, img_size)),
                (transforms.Grayscale(num_output_channels=3) if img_mode=="gray" else transforms.Lambda(lambda x: x)),
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.RandomApply([transforms.ColorJitter(0.2,0.2,0.2,0.1)], p=0.5),
                transforms.ToTensor(),                # <-- convert to tensor first
                transforms.RandomErasing(             # <-- now OK (expects tensor)
                    p=0.3, scale=(0.02, 0.12), ratio=(0.3, 3.3)
                ),
                normalize,                            # <-- Normalize expects tensor
            ])
        else:
            self.tf = transforms.Compose([
                transforms.Resize((img_size, img_size)),
                (transforms.Grayscale(num_output_channels=3) if img_mode=="gray" else transforms.Lambda(lambda x: x)),
                transforms.ToTensor(),
                normalize,
            ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, i):
        row = self.df.iloc[i]
        img_path = resolve_path(row[self.path_col], self.images_root)
        img = Image.open(img_path).convert("RGB")
        y = int(row["label_idx"])
        return self.tf(img), y

    @property
    def labels(self):
        return self.df["label_idx"].to_numpy()

train_ds = EmotionCSVDataset(TRAIN_CSV, IMAGES_ROOT, str2idx, IMG_MODE, IMG_SIZE, train=True)
val_ds   = EmotionCSVDataset(VAL_CSV,   IMAGES_ROOT, str2idx, IMG_MODE, IMG_SIZE, train=False)
test_ds  = EmotionCSVDataset(TEST_CSV,  IMAGES_ROOT, str2idx, IMG_MODE, IMG_SIZE, train=False) if (TEST_CSV and Path(TEST_CSV).exists()) else None

counts = np.bincount(train_ds.labels, minlength=num_classes)
print("Train counts per class:", counts)

if BALANCE_MODE == "sampler":
    inv = 1.0 / np.clip(counts, 1, None)
    sample_weights = inv[train_ds.labels]
    sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)
    train_loader = DataLoader(train_ds, batch_size=BATCH, sampler=sampler, num_workers=NUM_WORKERS, pin_memory=True)
else:
    train_loader = DataLoader(train_ds, batch_size=BATCH, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)

val_loader  = DataLoader(val_ds,   batch_size=BATCH, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_loader = DataLoader(test_ds,  batch_size=BATCH, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True) if test_ds else None

NameError: name 'process_split' is not defined

In [None]:
# =========================
# Cell 6 — Model & losses
# =========================
weights = ResNet18_Weights.IMAGENET1K_V1
model = resnet18(weights=weights)
model.fc = nn.Sequential(
    nn.Dropout(0.2),
    nn.Linear(model.fc.in_features, num_classes)
)
model.to(device)

# Class weights if not using sampler
class_weights = None
if LOSS_MODE == "ce" and BALANCE_MODE != "sampler":
    inv = 1.0 / np.clip(counts, 1, None)
    class_weights = torch.tensor(inv / inv.sum() * num_classes, dtype=torch.float32, device=device)

class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0, weight=None, reduction='mean'):
        super().__init__()
        self.gamma = gamma
        self.weight = weight
        self.reduction = reduction
        self.ce = nn.CrossEntropyLoss(weight=weight, reduction='none')
    def forward(self, logits, targets):
        ce = self.ce(logits, targets)
        pt = torch.exp(-ce)
        loss = ((1 - pt) ** self.gamma) * ce
        return loss.mean() if self.reduction == 'mean' else loss.sum()

criterion = FocalLoss(gamma=FOCAL_GAMMA, weight=class_weights) if LOSS_MODE=="focal" \
            else nn.CrossEntropyLoss(weight=class_weights, label_smoothing=LABEL_SMOOTHING)

optimizer = AdamW(model.parameters(), lr=BASE_LR, weight_decay=WEIGHT_DECAY)

def lr_lambda(epoch):
    if epoch < WARMUP_EPOCHS:
        return float(epoch + 1) / float(max(1, WARMUP_EPOCHS))
    progress = (epoch - WARMUP_EPOCHS) / float(max(1, EPOCHS - WARMUP_EPOCHS))
    return 0.5 * (1.0 + math.cos(math.pi * progress))
scheduler = LambdaLR(optimizer, lr_lambda=lr_lambda)

In [None]:
# ==================================
# Cell 7 — MixUp / CutMix helpers
# ==================================
def rand_bbox(W, H, lam):
    cut_rat = math.sqrt(1.0 - lam)
    cw, ch = int(W * cut_rat), int(H * cut_rat)
    cx, cy = np.random.randint(W), np.random.randint(H)
    x1 = np.clip(cx - cw // 2, 0, W); x2 = np.clip(cx + cw // 2, 0, W)
    y1 = np.clip(cy - ch // 2, 0, H); y2 = np.clip(cy + ch // 2, 0, H)
    return x1, y1, x2, y2

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [None]:
# ==================================
# Cell 8 — Train / Evaluate loops
# ==================================
def evaluate(model, loader):
    model.eval()
    ys, ps = [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            pred = torch.argmax(logits, dim=1)
            ys.append(y.cpu().numpy()); ps.append(pred.cpu().numpy())
    y_true = np.concatenate(ys); y_pred = np.concatenate(ps)
    return accuracy_score(y_true, y_pred), f1_score(y_true, y_pred, average='macro', zero_division=0), y_true, y_pred

class EarlyStopper:
    def __init__(self, patience=5):
        self.patience = patience; self.best = -1; self.bad_epochs = 0
        self.best_path = Path(OUT_DIR) / "best_model.pth"
    def step(self, score, model):
        improved = score > self.best
        if improved:
            self.best = score; self.bad_epochs = 0
            torch.save({"state_dict": model.state_dict(), "best_macro_f1": score}, self.best_path)
        else:
            self.bad_epochs += 1
        return improved

def confusion_matrix_fig(y_true, y_pred, title, labels):
    cm = confusion_matrix(y_true, y_pred, labels=list(range(len(labels))))
    fig, ax = plt.subplots(figsize=(5,5))
    im = ax.imshow(cm, cmap='viridis')
    ax.set_title(title); ax.set_xlabel("Predicted"); ax.set_ylabel("True")
    ax.set_xticks(range(len(labels))); ax.set_yticks(range(len(labels)))
    ax.set_xticklabels(range(len(labels))); ax.set_yticklabels(range(len(labels)))
    fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
    plt.tight_layout()
    return fig, cm

history = {"train_loss": [], "val_loss": [], "val_acc": [], "val_f1": []}
early = EarlyStopper(PATIENCE)

def plot_live():
    clear_output(wait=True)
    fig, ax = plt.subplots(1,2, figsize=(10,4))
    ax[0].plot(history["train_loss"], label="train loss")
    ax[0].plot(history["val_loss"], label="val loss")
    ax[0].set_title("Loss"); ax[0].legend()
    ax[1].plot(history["val_acc"], label="val acc")
    ax[1].plot(history["val_f1"], label="val macro-F1")
    ax[1].set_title("Validation"); ax[1].legend()
    plt.tight_layout(); plt.show()

def train_one_epoch():
    model.train()
    running = 0.0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()

        use_mix = USE_MIXUP and MIXUP_ALPHA > 0
        use_cut = USE_CUTMIX and CUTMIX_ALPHA > 0

        if use_mix or use_cut:
            indices = torch.randperm(x.size(0)).to(device)
            y_a, y_b = y, y[indices]

            if use_mix:
                lam = np.random.beta(MIXUP_ALPHA, MIXUP_ALPHA)
                x_mixed = lam * x + (1 - lam) * x[indices]
                logits = model(x_mixed)
                loss = mixup_criterion(criterion, logits, y_a, y_b, lam)
            else:
                lam = np.random.beta(CUTMIX_ALPHA, CUTMIX_ALPHA)
                W, H = x.size(3), x.size(2)
                x1,y1,x2,y2 = rand_bbox(W,H,lam)
                x_cut = x.clone()
                x_cut[:, :, y1:y2, x1:x2] = x[indices, :, y1:y2, x1:x2]
                lam = 1 - ((x2-x1)*(y2-y1)/(W*H))
                logits = model(x_cut)
                loss = mixup_criterion(criterion, logits, y_a, y_b, lam)
        else:
            logits = model(x)
            loss = criterion(logits, y)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        running += loss.item() * x.size(0)
    return running / len(train_ds)

In [None]:
# ===========================
# Cell 9 — Run training loop
# ===========================
best_epoch = -1
for epoch in range(EPOCHS):
    t0 = time.time()
    train_loss = train_one_epoch()

    # Validation
    model.eval()
    with torch.no_grad():
        val_running = 0.0; ys=[]; ps=[]
        for x,y in val_loader:
            x,y = x.to(device), y.to(device)
            logits = model(x)
            loss = criterion(logits, y)
            val_running += loss.item() * x.size(0)
            ys.append(y.cpu().numpy()); ps.append(torch.argmax(logits,1).cpu().numpy())
        y_true = np.concatenate(ys); y_pred = np.concatenate(ps)
        val_loss = val_running / len(val_ds)
        val_acc  = accuracy_score(y_true, y_pred)
        val_f1   = f1_score(y_true, y_pred, average="macro", zero_division=0)

    history["train_loss"].append(train_loss)
    history["val_loss"].append(val_loss)
    history["val_acc"].append(val_acc)
    history["val_f1"].append(val_f1)

    scheduler.step()

    if early.step(val_f1, model): best_epoch = epoch
    plot_live()
    print(f"Epoch {epoch+1:02d}/{EPOCHS} | "
          f"train_loss={train_loss:.4f}  val_loss={val_loss:.4f}  val_acc={val_acc:.4f}  val_macroF1={val_f1:.4f}  "
          f"lr={optimizer.param_groups[0]['lr']:.2e}  time={time.time()-t0:.1f}s")

    rep = classification_report(y_true, y_pred, output_dict=True, zero_division=0)
    per_class = [rep.get(str(i), {}).get("f1-score", 0.0) for i in range(num_classes)]
    print("Per-class F1:", np.round(per_class, 3))

    if early.bad_epochs >= PATIENCE:
        print(f"Early stopping. Best epoch was {best_epoch+1}.")
        break

# Save plots
plt.figure(figsize=(5,4))
plt.plot(history["train_loss"], label="train"); plt.plot(history["val_loss"], label="val")
plt.title("Loss"); plt.xlabel("epoch"); plt.legend(); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "loss_curve.png"))

plt.figure(figsize=(5,4))
plt.plot(history["val_acc"], label="val acc"); plt.plot(history["val_f1"], label="val macro-F1")
plt.title("Validation"); plt.xlabel("epoch"); plt.legend(); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "val_metrics.png"))

In [None]:
# =========================================
# Cell 10 — Best checkpoint & final reports
# =========================================
best_ckpt = torch.load(Path(OUT_DIR) / "best_model.pth", map_location=device)
model.load_state_dict(best_ckpt["state_dict"])

# VAL
val_acc, val_f1, y_true_v, y_pred_v = evaluate(model, val_loader)
print(f"[VAL]  acc={val_acc:.4f}  macro-F1={val_f1:.4f}")

fig, cm = confusion_matrix_fig(y_true_v, y_pred_v, "Confusion Matrix (VAL)", idx2str)
plt.savefig(os.path.join(OUT_DIR, "cm_val.png")); plt.show()

rep_val = classification_report(y_true_v, y_pred_v,
                                target_names=[f"{i}-{n}" for i,n in enumerate(idx2str)],
                                zero_division=0, digits=4)
print(rep_val)
Path(os.path.join(OUT_DIR, "val_classification_report.txt")).write_text(rep_val)
pd.DataFrame({"y_true": y_true_v, "y_pred": y_pred_v}).to_csv(os.path.join(OUT_DIR, "val_predictions.csv"), index=False)

# TEST (if provided)
if test_loader:
    test_acc, test_f1, y_true_t, y_pred_t = evaluate(model, test_loader)
    print(f"[TEST] acc={test_acc:.4f}  macro-F1={test_f1:.4f}")
    fig, cm = confusion_matrix_fig(y_true_t, y_pred_t, "Confusion Matrix (TEST)", idx2str)
    plt.savefig(os.path.join(OUT_DIR, "cm_test.png")); plt.show()
    rep_test = classification_report(y_true_t, y_pred_t,
                                     target_names=[f"{i}-{n}" for i,n in enumerate(idx2str)],
                                     zero_division=0, digits=4)
    print(rep_test)
    Path(os.path.join(OUT_DIR, "test_classification_report.txt")).write_text(rep_test)
    pd.DataFrame({"y_true": y_true_t, "y_pred": y_pred_t}).to_csv(os.path.join(OUT_DIR, "test_predictions.csv"), index=False)

print("Outputs saved to:", OUT_DIR)

In [None]:
# =========================================
# Cell 11 — (Optional) Export to ONNX
# =========================================
onnx_path = os.path.join(OUT_DIR, "resnet18_emotions.onnx")
model.eval()
dummy = torch.randn(1, 3, IMG_SIZE, IMG_SIZE, device=device)
torch.onnx.export(model, dummy, onnx_path,
                  input_names=["input"], output_names=["logits"],
                  dynamic_axes={"input": {0: "batch"}, "logits": {0: "batch"}},
                  opset_version=12)
print("Saved ONNX:", onnx_path)

In [None]:
# Install the onnx package
!pip install onnx