In [3]:
import kagglehub

  from .autonotebook import tqdm as notebook_tqdm


# Download latest version

In [4]:
path = kagglehub.dataset_download("astraszab/facial-expression-dataset-image-folders-fer2013")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\manar\.cache\kagglehub\datasets\astraszab\facial-expression-dataset-image-folders-fer2013\versions\1


In [5]:
from pathlib import Path
path = Path(path)
DATA_DIR = path / "data"

print("FER_ROOT:", path)
print("DATA_DIR:", DATA_DIR)
print("Subfolders at DATA_DIR:", [p.name for p in DATA_DIR.iterdir() if p.is_dir()])

FER_ROOT: C:\Users\manar\.cache\kagglehub\datasets\astraszab\facial-expression-dataset-image-folders-fer2013\versions\1
DATA_DIR: C:\Users\manar\.cache\kagglehub\datasets\astraszab\facial-expression-dataset-image-folders-fer2013\versions\1\data
Subfolders at DATA_DIR: ['test', 'train', 'val']


# FER2013 numeric folder -> name

In [6]:
FER_ID2NAME = {'0':'angry','1':'disgust','2':'fear','3':'happy','4':'sad','5':'surprise','6':'neutral'}

# We keep only these folders

In [7]:
KEEP_IDS = {'3':'happy', '4':'sad', '6':'neutral'}     # folder names in source
NAME2TARGET = {'happy':0, 'neutral':1, 'sad':2}        # our label ids
print("Keeping:", KEEP_IDS)

Keeping: {'3': 'happy', '4': 'sad', '6': 'neutral'}


In [8]:
from torchvision import transforms

IMG_SIZE = 224

train_tfms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),   # FER2013 is grayscale
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5]),
])

val_tfms = transforms.Compose([
    transforms.Grayscale(3),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5]),
])

In [9]:
KEEP_IDS     = {'3':'happy', '4':'sad', '6':'neutral'}
NAME2TARGET  = {'happy':0, 'neutral':1, 'sad':2}


In [10]:
from torchvision import transforms

IMG_SIZE = 224

train_tfms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),   # FER2013 is grayscale
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5]),
])

val_tfms = transforms.Compose([
    transforms.Grayscale(3),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5]),
])

In [11]:
from torchvision import datasets
from pathlib import Path

KEEP_IDS     = {'3': 'happy', '4': 'sad', '6': 'neutral'}
NAME2TARGET  = {'happy': 0, 'neutral': 1, 'sad': 2}

def make_three_class_dataset(root: Path, tfms):
    ds = datasets.ImageFolder(root=str(root), transform=tfms)

    new_samples, new_targets = [], []

    # each item in ds.samples is (img_path: str, class_idx: int)
    for img_path, _ in ds.samples:
        cls_folder = Path(img_path).parent.name  # e.g. '0'..'6'
        if cls_folder in KEEP_IDS:
            label_name = KEEP_IDS[cls_folder]        # 'happy'/'sad'/'neutral'
            label_id   = NAME2TARGET[label_name]     # 0/1/2
            new_samples.append((img_path, label_id))
            new_targets.append(label_id)

    # keep ImageFolder internals consistent
    ds.samples = new_samples
    ds.imgs = new_samples           # alias used by older torchvision
    ds.targets = new_targets
    ds.classes = ['happy', 'neutral', 'sad']
    ds.class_to_idx = {'happy': 0, 'neutral': 1, 'sad': 2}

    return ds

#Make sure DATA_DIR, train_tfms, val_tfms are defined earlier
TRAIN_DIR = DATA_DIR / "train"
VAL_DIR   = DATA_DIR / "val"

train_ds = make_three_class_dataset(TRAIN_DIR, train_tfms)
val_ds   = make_three_class_dataset(VAL_DIR,   val_tfms)

from collections import Counter
print("Train counts:", Counter(train_ds.targets))
print("Val   counts:", Counter(val_ds.targets))
print("class_to_idx:", train_ds.class_to_idx)

Train counts: Counter({0: 7215, 1: 4965, 2: 4830})
Val   counts: Counter({0: 895, 2: 653, 1: 607})
class_to_idx: {'happy': 0, 'neutral': 1, 'sad': 2}


In [12]:
import torch
from torch.utils.data import DataLoader, WeightedRandomSampler
from collections import Counter
import numpy as np

#class weights for sampler & loss
counts = Counter(train_ds.targets)          # {0:..., 1:..., 2:...}
max_c = max(counts.values())
loss_weights = torch.tensor(
    [max_c / counts[i] for i in range(3)],  # order: 0=happy,1=neutral,2=sad
    dtype=torch.float32
)

#WeightedRandomSampler so each mini-batch is balanced-ish
sample_weights = torch.tensor([loss_weights[t] for t in train_ds.targets],
                              dtype=torch.float32)
sampler = WeightedRandomSampler(weights=sample_weights,
                                num_samples=len(sample_weights),
                                replacement=True)

BATCH_SIZE = 128
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE,
                          sampler=sampler, num_workers=2, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE,
                          shuffle=False, num_workers=2, pin_memory=True)

print("loss_weights (for CE):", loss_weights.tolist())
print("BATCH_SIZE =", BATCH_SIZE)

loss_weights (for CE): [1.0, 1.453172206878662, 1.4937888383865356]
BATCH_SIZE = 128


In [13]:
# ===== Training: MobileNetV3-Large (3 classes) =====
import torch, torch.nn as nn, torch.optim as optim
from torchvision import models

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
EPOCHS = 15
BEST_PATH = "emotion_model_best.pt"

# 1) Build model
model = models.mobilenet_v3_large(weights=models.MobileNet_V3_Large_Weights.IMAGENET1K_V1)
in_feats = model.classifier[-1].in_features
model.classifier[-1] = nn.Linear(in_feats, 3)   # happy/neutral/sad

# (optional) warmup: freeze backbone for first 2 epochs
for p in model.features.parameters():
    p.requires_grad = False

model = model.to(DEVICE)
if DEVICE == "cuda":
    model = model.to(memory_format=torch.channels_last)

# 2) Loss / optimizer / scheduler
ce_weights = (loss_weights / loss_weights.min()).to(DEVICE)  # normalize a bit
criterion = nn.CrossEntropyLoss(weight=ce_weights)

opt = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
                  lr=3e-4, weight_decay=1e-4)
sched = optim.lr_scheduler.CosineAnnealingLR(opt, T_max=EPOCHS)

scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE == "cuda"))

def run_epoch(dl, train=True):
    model.train(train)
    tot, correct, loss_sum = 0, 0, 0.0
    for xb, yb in dl:
        xb, yb = xb.to(DEVICE, non_blocking=True), yb.to(DEVICE, non_blocking=True)
        if DEVICE == "cuda":
            xb = xb.to(memory_format=torch.channels_last)
        with torch.cuda.amp.autocast(enabled=(DEVICE == "cuda")):
            logits = model(xb)
            loss = criterion(logits, yb)
        if train:
            opt.zero_grad(set_to_none=True)
            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()
        loss_sum += loss.item() * yb.size(0)
        pred = logits.argmax(1)
        correct += (pred == yb).sum().item()
        tot += yb.size(0)
    return loss_sum / max(1, tot), correct / max(1, tot)

best_acc = 0.0
for epoch in range(1, EPOCHS + 1):
    # unfreeze after warmup
    if epoch == 3:
        for p in model.features.parameters():
            p.requires_grad = True
        opt = optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
        print("ðŸ”“ Unfroze backbone and reset optimizer.")

    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    va_loss, va_acc = run_epoch(val_loader,   train=False)
    sched.step()

    print(f"Epoch {epoch:02d}/{EPOCHS} | "
          f"train acc {tr_acc:.3f} loss {tr_loss:.4f} | "
          f"val acc {va_acc:.3f} loss {va_loss:.4f}")

    if va_acc > best_acc:
        best_acc = va_acc
        torch.save(model.state_dict(), BEST_PATH)
        print(f"âœ… Saved best to {BEST_PATH} (val_acc={best_acc:.3f})")

print("Done. Best val acc:", best_acc)

  scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE == "cuda"))
  with torch.cuda.amp.autocast(enabled=(DEVICE == "cuda")):
  with torch.cuda.amp.autocast(enabled=(DEVICE == "cuda")):


Epoch 01/15 | train acc 0.553 loss 0.9194 | val acc 0.573 loss 0.9155
âœ… Saved best to emotion_model_best.pt (val_acc=0.573)
Epoch 02/15 | train acc 0.599 loss 0.8487 | val acc 0.558 loss 0.9303
ðŸ”“ Unfroze backbone and reset optimizer.
Epoch 02/15 | train acc 0.599 loss 0.8487 | val acc 0.558 loss 0.9303
ðŸ”“ Unfroze backbone and reset optimizer.
Epoch 03/15 | train acc 0.737 loss 0.6134 | val acc 0.727 loss 0.6219
âœ… Saved best to emotion_model_best.pt (val_acc=0.727)
Epoch 03/15 | train acc 0.737 loss 0.6134 | val acc 0.727 loss 0.6219
âœ… Saved best to emotion_model_best.pt (val_acc=0.727)
Epoch 04/15 | train acc 0.815 loss 0.4649 | val acc 0.773 loss 0.5577
âœ… Saved best to emotion_model_best.pt (val_acc=0.773)
Epoch 04/15 | train acc 0.815 loss 0.4649 | val acc 0.773 loss 0.5577
âœ… Saved best to emotion_model_best.pt (val_acc=0.773)
Epoch 05/15 | train acc 0.850 loss 0.3901 | val acc 0.764 loss 0.5985
Epoch 05/15 | train acc 0.850 loss 0.3901 | val acc 0.764 loss 0.5985
Epoc

In [14]:
#==== SAVE STEP (device-safe) ====
import os, json, copy, torch
import torch.nn as nn

OUT_DIR = "artifacts"
STATE_DICT_OUT = os.path.join(OUT_DIR, "best_model.pt")
TS_OUT         = os.path.join(OUT_DIR, "emotion_mnv3_ts.pt")
ONNX_OUT       = os.path.join(OUT_DIR, "emotion_mnv3.onnx")
LABELS_OUT     = os.path.join(OUT_DIR, "labels.json")

IMG_SIZE = 224
LABELS   = ["happy", "neutral", "sad"]

os.makedirs(OUT_DIR, exist_ok=True)

if 'model' not in globals():
    raise RuntimeError("No model in memory. Run this cell right after training.")

#1) Save standard PyTorch weights (from your current device, GPU or CPU)
model.eval()
torch.save(model.state_dict(), STATE_DICT_OUT)

#2) For exports, move a COPY to CPU to avoid device mismatches
export_model = copy.deepcopy(model).to("cpu").eval()

#3) Try TorchScript (trace). If tracing has issues, fall back to scripting.
example = torch.randn(1, 3, IMG_SIZE, IMG_SIZE, device="cpu")
try:
    ts_model = torch.jit.trace(export_model, example)
    ts_model.save(TS_OUT)
    ts_mode = "trace"
except Exception as e:
    print("[-] TorchScript trace failed, trying script:", e)
    ts_model = torch.jit.script(export_model)
    ts_model.save(TS_OUT)
    ts_mode = "script"

#4) Export ONNX from the CPU copy
torch.onnx.export(
    export_model, example, ONNX_OUT,
    input_names=["input"], output_names=["logits"],
    dynamic_axes={"input": {0: "batch"}, "logits": {0: "batch"}},
    opset_version=12
)

#5) Save labels
with open(LABELS_OUT, "w") as f:
    json.dump(LABELS, f)

#6) Sanity forward on CPU copy
with torch.no_grad():
    y = export_model(example)
print(f"âœ… Saved:\n - state_dict : {STATE_DICT_OUT}\n - TorchScript({ts_mode}): {TS_OUT}\n - ONNX       : {ONNX_OUT}\n - labels     : {LABELS_OUT}")
print("Sanity forward OK. logits shape:", tuple(y.shape))

âœ… Saved:
 - state_dict : artifacts\best_model.pt
 - TorchScript(trace): artifacts\emotion_mnv3_ts.pt
 - ONNX       : artifacts\emotion_mnv3.onnx
 - labels     : artifacts\labels.json
Sanity forward OK. logits shape: (1, 3)
