In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [2]:
!ls -la "/content/drive/MyDrive/Dataset"


total 8
drwx------ 2 root root 4096 Dec 21 06:59 Mirror_COLAB
drwx------ 2 root root 4096 Dec 20 16:31 Output


In [3]:
!pip install torch torchvision --index-url https://download.pytorch.org/whl/cu126

Looking in indexes: https://download.pytorch.org/whl/cu126


In [4]:
!pip install mediapipe==0.10.20

Collecting mediapipe==0.10.20
  Downloading mediapipe-0.10.20-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting numpy<2 (from mediapipe==0.10.20)
  Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf<5,>=4.25.3 (from mediapipe==0.10.20)
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe==0.10.20)
  Downloading sounddevice-0.5.3-py3-none-any.whl.metadata (1.6 kB)
INFO: pip is looking at multiple versions of jax to determine which version is compatible with other requirements. This could take a while.
Collecting jax (from mediapipe==0.10.20)
  Downloading jax-0.8.2-py3-none-any.whl.metadata (13 kB)
Collecting jaxlib (from mediapipe==0.10.20)
  Downloading jaxlib-0.8.2-cp312-cp312-manylinux_2_27_x86_64.whl.

In [1]:
!pip install -U pillow torchvision opencv-python


Collecting pillow
  Using cached pillow-12.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.8 kB)
Collecting torchvision
  Using cached torchvision-0.24.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (5.9 kB)
Collecting torch==2.9.1 (from torchvision)
  Using cached torch-2.9.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.8.93 (from torch==2.9.1->torchvision)
  Using cached nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cuda-runtime-cu12==12.8.90 (from torch==2.9.1->torchvision)
  Using cached nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cuda-cupti-cu12==12.8.90 (from torch==2.9.1->torchvision)
  Using cached nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cublas-cu12==12.8.4.1 (from

In [1]:
import os
import cv2
import math
import pandas as pd
import numpy as np
import mediapipe as mp
from tqdm import tqdm
import torch
from torch.utils.data import Dataset, DataLoader




In [2]:
# Block 1: Paths + small helper utilities (project config)

import os
import cv2

# Input (read-only)
DATASET_ROOT = "/content/drive/MyDrive/Dataset/Mirror_COLAB"

# Output (writable)
OUTPUT_ROOT = "/content/drive/MyDrive/Dataset/Output"
os.makedirs(OUTPUT_ROOT, exist_ok=True)

# Derived output paths
VIDEOS_INFO_PATH = os.path.join(OUTPUT_ROOT, "videos_info.csv")

FRAMES_ROOT      = os.path.join(OUTPUT_ROOT, "frames")
FRAMES_INFO_PATH = os.path.join(OUTPUT_ROOT, "frames_info.csv")

CROPS_ROOT       = os.path.join(OUTPUT_ROOT, "crops")
CROPS_INFO_PATH  = os.path.join(OUTPUT_ROOT, "crops_labeled.csv")

SPLIT_DIR        = os.path.join(OUTPUT_ROOT, "splits")

# EMake nsure folders exist
for d in [FRAMES_ROOT, CROPS_ROOT, SPLIT_DIR]:
    os.makedirs(d, exist_ok=True)

# Helpers
def safe_imread(path: str):
    """
    Read an image from disk. Raises a clear error if the file can't be read.
    Note: OpenCV returns BGR images by default.
    """
    path = str(path)
    img = cv2.imread(path)
    if img is None:
        raise FileNotFoundError(
            f"Could not read image (missing file or unsupported format): {path}"
        )
    return img


In [30]:
# Block 2: Scan dataset and create videos_info.csv

import os
import pandas as pd

print("Scanning dataset folder:")
print("  DATASET_ROOT:", DATASET_ROOT)



VIDEO_EXTS = (".mp4", ".avi", ".mov", ".mkv")

video_rows = []
for root, _, files in os.walk(DATASET_ROOT):
    for fname in files:
        if not fname.lower().endswith(VIDEO_EXTS):
            continue

        full_path = os.path.join(root, fname)

        group = os.path.basename(root)

        # video_id is filename without extension
        video_id = os.path.splitext(fname)[0]

        # Treat each video as one "subject" to reduce leakage when you later split.
        subject = video_id

        video_rows.append(
            {
                "video_path": full_path.replace("\\", "/"),  # normalize paths for Colab
                "video_name": fname,
                "video_id": video_id,
                "subject": subject,
                "group": group,
            }
        )

if len(video_rows) == 0:
    raise RuntimeError(
        f"No videos found under DATASET_ROOT={DATASET_ROOT} with extensions {VIDEO_EXTS}"
    )

videos_df = pd.DataFrame(video_rows).sort_values(["group", "video_name"]).reset_index(drop=True)
videos_df.to_csv(VIDEOS_INFO_PATH, index=False)

print(f"Saved {len(videos_df)} video entries → {VIDEOS_INFO_PATH}")
display(videos_df.head(10))

print("Unique subjects:", videos_df["subject"].nunique())
print("Unique groups:", videos_df["group"].nunique())


Scanning dataset folder:
  DATASET_ROOT: /content/Dataset_Local/Mirror_COLAB
Saved 320 video entries → /content/drive/MyDrive/Dataset/Output/videos_info.csv


Unnamed: 0,video_path,video_name,video_id,subject,group
0,/content/Dataset_Local/Mirror_COLAB/Female_mir...,1-FemaleNoGlasses-Normal.avi,1-FemaleNoGlasses-Normal,1-FemaleNoGlasses-Normal,Female_mirror
1,/content/Dataset_Local/Mirror_COLAB/Female_mir...,1-FemaleNoGlasses-Talking.avi,1-FemaleNoGlasses-Talking,1-FemaleNoGlasses-Talking,Female_mirror
2,/content/Dataset_Local/Mirror_COLAB/Female_mir...,1-FemaleNoGlasses-Yawning.avi,1-FemaleNoGlasses-Yawning,1-FemaleNoGlasses-Yawning,Female_mirror
3,/content/Dataset_Local/Mirror_COLAB/Female_mir...,10-FemaleNoGlasses-Normal.avi,10-FemaleNoGlasses-Normal,10-FemaleNoGlasses-Normal,Female_mirror
4,/content/Dataset_Local/Mirror_COLAB/Female_mir...,10-FemaleNoGlasses-Talking.avi,10-FemaleNoGlasses-Talking,10-FemaleNoGlasses-Talking,Female_mirror
5,/content/Dataset_Local/Mirror_COLAB/Female_mir...,10-FemaleNoGlasses-Yawning.avi,10-FemaleNoGlasses-Yawning,10-FemaleNoGlasses-Yawning,Female_mirror
6,/content/Dataset_Local/Mirror_COLAB/Female_mir...,11-FemaleNoGlasses-Normal.avi,11-FemaleNoGlasses-Normal,11-FemaleNoGlasses-Normal,Female_mirror
7,/content/Dataset_Local/Mirror_COLAB/Female_mir...,11-FemaleNoGlasses-Talking.avi,11-FemaleNoGlasses-Talking,11-FemaleNoGlasses-Talking,Female_mirror
8,/content/Dataset_Local/Mirror_COLAB/Female_mir...,11-FemaleNoGlasses-Yawning.avi,11-FemaleNoGlasses-Yawning,11-FemaleNoGlasses-Yawning,Female_mirror
9,/content/Dataset_Local/Mirror_COLAB/Female_mir...,12-FemaleNoGlasses-Normal.avi,12-FemaleNoGlasses-Normal,12-FemaleNoGlasses-Normal,Female_mirror


Unique subjects: 320
Unique groups: 2


In [4]:
# Block 2.5: Optional – copy dataset to local Colab storage
# Reading videos from Google Drive is slow. Copying the dataset once to
# Colab's local disk (/content) speeds up frame extraction
# and training.

import os
import shutil

# Toggle
COPY_TO_LOCAL = True  # Set to False if you want to read directly from Drive

# Paths
DRIVE_DATASET_ROOT = "/content/drive/MyDrive/Dataset"
LOCAL_DATASET_ROOT = "/content/Dataset_Local"

SRC_DIR = os.path.join(DRIVE_DATASET_ROOT, "Mirror_COLAB")
DST_DIR = os.path.join(LOCAL_DATASET_ROOT, "Mirror_COLAB")

# Copy logic
if COPY_TO_LOCAL:
    print("\nPreparing local dataset copy (for speed)...")

    if not os.path.isdir(SRC_DIR):
        raise FileNotFoundError(
            f"Source dataset not found in Google Drive: {SRC_DIR}"
        )

    # Create parent directory if needed
    os.makedirs(LOCAL_DATASET_ROOT, exist_ok=True)

    if os.path.exists(DST_DIR):
        print("Local dataset already exists. Skipping copy.")
    else:
        print("Copying dataset from Drive to local storage...")
        print("This may take a few minutes, but it will speed up all later steps.")
        shutil.copytree(SRC_DIR, DST_DIR)
        print("Copy completed successfully.")

    # IMPORTANT:
    # If you copy locally, update DATASET_ROOT so downstream blocks
    # automatically use the faster local path.
    DATASET_ROOT = DST_DIR
    print("DATASET_ROOT updated to local path:")
    print(" ", DATASET_ROOT)
else:
    print("\nCOPY_TO_LOCAL is False → using dataset directly from Google Drive.")
    print("DATASET_ROOT remains:")
    print(" ", DATASET_ROOT)



Preparing local dataset copy (for speed)...
Copying dataset from Drive to local storage...
This may take a few minutes, but it will speed up all later steps.
Copy completed successfully.
DATASET_ROOT updated to local path:
  /content/Dataset_Local/Mirror_COLAB


In [5]:
# Block 3: Extract frames from videos (target = 10 FPS)

import os
import cv2
import pandas as pd
from tqdm import tqdm

# Toggle
SAVE_TO_DRIVE = False  # set True if you want frames + CSV saved to Google Drive


# Paths
LOCAL_FRAMES_ROOT = "/content/CropsLabeled/frames_tmp"
DRIVE_FRAMES_ROOT = "/content/drive/MyDrive/Dataset/Output/frames"

FRAMES_INFO_PATH = (
    "/content/drive/MyDrive/Dataset/Output/frames_info.csv"
    if SAVE_TO_DRIVE
    else "/content/CropsLabeled/frames_info.csv"
)

os.makedirs(LOCAL_FRAMES_ROOT, exist_ok=True)
if SAVE_TO_DRIVE:
    os.makedirs(DRIVE_FRAMES_ROOT, exist_ok=True)

FPS_TARGET = 10
frames_info = []

# Frame extraction
for _, row in tqdm(videos_df.iterrows(), total=len(videos_df), desc="Extracting frames"):
    video_path = row["video_path"]
    subject = row["subject"]
    video_name = row["video_name"]

    if not os.path.isfile(video_path):
        print(f"Skipping missing video: {video_path}")
        continue

    base_name = f"{subject}_{os.path.splitext(video_name)[0]}"

    local_dir = os.path.join(LOCAL_FRAMES_ROOT, base_name)
    os.makedirs(local_dir, exist_ok=True)

    drive_dir = None
    if SAVE_TO_DRIVE:
        drive_dir = os.path.join(DRIVE_FRAMES_ROOT, base_name)
        os.makedirs(drive_dir, exist_ok=True)

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Could not open video: {video_path}")
        continue

    fps = cap.get(cv2.CAP_PROP_FPS)
    if fps is None or fps <= 0:
        fps = FPS_TARGET

    step = max(1, int(round(fps / FPS_TARGET)))

    frame_id = 0
    saved = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_id % step == 0:
            fname = f"frame_{saved:05d}.jpg"

            local_path = os.path.join(local_dir, fname)
            cv2.imwrite(local_path, frame)

            drive_path = None
            if SAVE_TO_DRIVE:
                drive_path = os.path.join(drive_dir, fname)
                cv2.imwrite(drive_path, frame)

            frames_info.append(
                {
                    "frame_path": drive_path if SAVE_TO_DRIVE else local_path,
                    "local_frame_path": local_path,
                    "subject": subject,
                    "video_name": video_name,
                    "frame_index": saved,
                }
            )

            saved += 1

        frame_id += 1

    cap.release()

# Save metadata
frames_df = pd.DataFrame(frames_info)
frames_df.to_csv(FRAMES_INFO_PATH, index=False)

print(f"Saved {len(frames_df)} frames")
print(f"frames_info.csv saved to: {FRAMES_INFO_PATH}")

frames_df.head()


Extracting frames: 100%|██████████| 320/320 [04:02<00:00,  1.32it/s]


Saved 74631 frames
frames_info.csv saved to: /content/CropsLabeled/frames_info.csv


Unnamed: 0,frame_path,local_frame_path,subject,video_name,frame_index
0,/content/CropsLabeled/frames_tmp/1-FemaleNoGla...,/content/CropsLabeled/frames_tmp/1-FemaleNoGla...,1-FemaleNoGlasses-Normal,1-FemaleNoGlasses-Normal.avi,0
1,/content/CropsLabeled/frames_tmp/1-FemaleNoGla...,/content/CropsLabeled/frames_tmp/1-FemaleNoGla...,1-FemaleNoGlasses-Normal,1-FemaleNoGlasses-Normal.avi,1
2,/content/CropsLabeled/frames_tmp/1-FemaleNoGla...,/content/CropsLabeled/frames_tmp/1-FemaleNoGla...,1-FemaleNoGlasses-Normal,1-FemaleNoGlasses-Normal.avi,2
3,/content/CropsLabeled/frames_tmp/1-FemaleNoGla...,/content/CropsLabeled/frames_tmp/1-FemaleNoGla...,1-FemaleNoGlasses-Normal,1-FemaleNoGlasses-Normal.avi,3
4,/content/CropsLabeled/frames_tmp/1-FemaleNoGla...,/content/CropsLabeled/frames_tmp/1-FemaleNoGla...,1-FemaleNoGlasses-Normal,1-FemaleNoGlasses-Normal.avi,4


In [6]:
# Block 4: Label frames using MAR (MediaPipe FaceMesh)

import os
import cv2
import numpy as np
import pandas as pd
import mediapipe as mp
from tqdm import tqdm

# MUST match Block 3
SAVE_TO_DRIVE = False  # True: frames were saved to Drive; False: local-only frames

# Where the CSV will be saved (keep this on Drive so you don't lose it on runtime reset)
OUTPUT_ROOT = "/content/drive/MyDrive/Dataset/Output"
os.makedirs(OUTPUT_ROOT, exist_ok=True)

# Frame roots (must match Block 3)
LOCAL_FRAMES_ROOT = "/content/CropsLabeled/frames_tmp"
DRIVE_FRAMES_ROOT = "/content/drive/MyDrive/Dataset/Output/frames"
FRAMES_DIR = DRIVE_FRAMES_ROOT if SAVE_TO_DRIVE else LOCAL_FRAMES_ROOT

if not os.path.isdir(FRAMES_DIR):
    raise FileNotFoundError(f"FRAMES_DIR not found: {FRAMES_DIR}")

print("Using frames from:", FRAMES_DIR)

# MediaPipe FaceMesh
mp_face_mesh = mp.solutions.face_mesh

# Mouth landmark indices (MediaPipe FaceMesh)
LIP_L = 61
LIP_R = 291
LIP_TOP = 13
LIP_BOT = 14

def mar_from_landmarks(landmarks, w, h):
    def pt(i):
        return np.array([landmarks[i].x * w, landmarks[i].y * h], dtype=np.float32)

    left = pt(LIP_L)
    right = pt(LIP_R)
    top = pt(LIP_TOP)
    bottom = pt(LIP_BOT)

    mouth_width = np.linalg.norm(right - left) + 1e-6
    mouth_open = np.linalg.norm(bottom - top)
    return float(mouth_open / mouth_width)

def smooth_1d(x, k=7):
    x = np.asarray(x, dtype=np.float32).copy()
    if x.size == 0:
        return x
    if np.all(np.isnan(x)):
        return x
    fill = np.nanmean(x)
    x[np.isnan(x)] = fill
    kernel = np.ones(int(k), dtype=np.float32) / float(k)
    return np.convolve(x, kernel, mode="same")

def detect_yawn_frames(mar, mar_thresh=0.35, min_consecutive=8):
    mar = np.asarray(mar, dtype=np.float32)
    above = mar > float(mar_thresh)
    yawn_mask = np.zeros_like(above, dtype=bool)

    run_start = None
    for i, v in enumerate(above):
        if v and run_start is None:
            run_start = i

        last = (i == len(above) - 1)
        if (not v or last) and run_start is not None:
            run_end = i if (not v) else (i + 1)
            if (run_end - run_start) >= int(min_consecutive):
                yawn_mask[run_start:run_end] = True
            run_start = None

    return yawn_mask

def compute_mar(img_bgr, face_mesh):
    h, w = img_bgr.shape[:2]
    rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    res = face_mesh.process(rgb)
    if not res.multi_face_landmarks:
        return np.nan
    lm = res.multi_face_landmarks[0].landmark
    return mar_from_landmarks(lm, w, h)

video_folders = sorted(
    d for d in os.listdir(FRAMES_DIR)
    if os.path.isdir(os.path.join(FRAMES_DIR, d))
)

if len(video_folders) == 0:
    raise RuntimeError(f"No frame folders found inside: {FRAMES_DIR}")

all_rows = []

with mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
) as face_mesh:

    for vid in tqdm(video_folders, desc="Labeling frames (MAR)"):
        vdir = os.path.join(FRAMES_DIR, vid)

        frame_files = sorted(
            f for f in os.listdir(vdir)
            if f.lower().endswith((".jpg", ".jpeg", ".png"))
        )

        if len(frame_files) == 0:
            continue

        mar_list = []
        frame_paths = []

        for fname in frame_files:
            fp = os.path.join(vdir, fname)
            img = cv2.imread(fp)

            if img is None:
                mar_list.append(np.nan)
            else:
                mar_list.append(compute_mar(img, face_mesh))

            frame_paths.append(fp)

        mar_arr = np.array(mar_list, dtype=np.float32)
        mar_s = smooth_1d(mar_arr, k=7)

        # If face not detected -> treat as closed mouth (not yawning)
        mar_s = np.nan_to_num(mar_s, nan=0.0)

        yawn_mask = detect_yawn_frames(mar_s, mar_thresh=0.35, min_consecutive=8)

        for fp, mar_v, is_yawn in zip(frame_paths, mar_s, yawn_mask):
            all_rows.append(
                {
                    "video_id": vid,
                    "frame_path": fp.replace("\\", "/"),
                    "mar": float(mar_v),
                    "label": 1 if bool(is_yawn) else 0,  # 0=Normal, 1=Yawm
                }
            )

labels_df = pd.DataFrame(all_rows)

LABELS_CSV = os.path.join(OUTPUT_ROOT, "frames_labels.csv")
labels_df.to_csv(LABELS_CSV, index=False)

print("Saved frame labels to:", LABELS_CSV)
print("Label distribution:")
print(labels_df["label"].value_counts(dropna=False))
print("Total labeled frames:", len(labels_df))


Using frames from: /content/CropsLabeled/frames_tmp


Labeling frames (MAR): 100%|██████████| 320/320 [07:53<00:00,  1.48s/it]


Saved frame labels to: /content/drive/MyDrive/Dataset/Output/frames_labels.csv
Label distribution:
label
0    70378
1     4253
Name: count, dtype: int64
Total labeled frames: 74631


In [7]:
# Block 5: Detect face (MediaPipe) -> crop mouth region -> save crops + crops_labeled.csv

import os
import cv2
import pandas as pd
import mediapipe as mp
from tqdm import tqdm

# Toggles
SAVE_CROPS_TO_DRIVE = False  # True: save crops to Drive as well, False: local only (faster)

# Roots / inputs
OUTPUT_ROOT = "/content/drive/MyDrive/Dataset/Output"
LABELS_CSV = os.path.join(OUTPUT_ROOT, "frames_labels.csv")

FRAMES_INFO_CANDIDATES = [
    os.path.join(OUTPUT_ROOT, "frames_info.csv"),
    "/content/CropsLabeled/frames_info.csv",
]
FRAMES_INFO_PATH = next((p for p in FRAMES_INFO_CANDIDATES if os.path.exists(p)), None)
if FRAMES_INFO_PATH is None:
    raise FileNotFoundError("frames_info.csv not found in expected locations.")
if not os.path.exists(LABELS_CSV):
    raise FileNotFoundError(f"frames_labels.csv not found: {LABELS_CSV}")

# Crops output (local + optional Drive)
LOCAL_CROPS_ROOT = "/content/CropsLabeled/crops"
DRIVE_CROPS_ROOT = os.path.join(OUTPUT_ROOT, "crops")

CROPS_ROOT = DRIVE_CROPS_ROOT if SAVE_CROPS_TO_DRIVE else LOCAL_CROPS_ROOT
CROPS_INFO_PATH = (
    os.path.join(OUTPUT_ROOT, "crops_labeled.csv")
    if SAVE_CROPS_TO_DRIVE
    else "/content/CropsLabeled/crops_labeled.csv"
)

os.makedirs(LOCAL_CROPS_ROOT, exist_ok=True)
os.makedirs(CROPS_ROOT, exist_ok=True)

print("Using FRAMES_INFO_PATH:", FRAMES_INFO_PATH)
print("Using LABELS_CSV:", LABELS_CSV)
print("Saving crops to:", CROPS_ROOT)
print("Saving crops CSV to:", CROPS_INFO_PATH)

# Load frames info + labels
frames_df = pd.read_csv(FRAMES_INFO_PATH)
labels_df = pd.read_csv(LABELS_CSV)

def norm_path(p):
    return str(p).replace("\\", "/")

frames_df["frame_path"] = frames_df["frame_path"].map(norm_path)
labels_df["frame_path"] = labels_df["frame_path"].map(norm_path)

# Prefer local_frame_path for faster reads if available
if "local_frame_path" in frames_df.columns:
    frames_df["local_frame_path"] = frames_df["local_frame_path"].map(norm_path)
else:
    frames_df["local_frame_path"] = frames_df["frame_path"]

# Merge labels safely
def make_key(p):
    p = norm_path(p)
    parts = p.split("/")
    fname = parts[-1]
    parent = parts[-2] if len(parts) >= 2 else ""
    return f"{parent}/{fname}"

frames_df["k"] = frames_df["frame_path"].map(make_key)
labels_df["k"] = labels_df["frame_path"].map(make_key)

labels_small = labels_df[["k", "label", "mar"]].drop_duplicates("k")
frames_df = frames_df.merge(labels_small, on="k", how="left")

missing = int(frames_df["label"].isna().sum())
print("Frames loaded:", len(frames_df))
print("Missing labels after merge:", missing)

if missing > 0:
    print(frames_df.loc[frames_df["label"].isna(), ["frame_path", "local_frame_path", "k"]].head(10))
    raise ValueError(
        "Some frames have no labels. This usually means Block 4 labeled a different FRAMES_DIR "
        "than the one used in Block 3."
    )

frames_df["label"] = frames_df["label"].astype(int)

print("Label distribution in frames_df:")
print(frames_df["label"].value_counts())

# MediaPipe FaceMesh
mp_face_mesh = mp.solutions.face_mesh

LIP_L, LIP_R, LIP_TOP, LIP_BOT = 61, 291, 13, 14

def extract_mouth(img_bgr, landmarks, pad=10):
    h, w = img_bgr.shape[:2]

    def pt(i):
        return int(landmarks[i].x * w), int(landmarks[i].y * h)

    xL, yL = pt(LIP_L)
    xR, yR = pt(LIP_R)
    xT, yT = pt(LIP_TOP)
    xB, yB = pt(LIP_BOT)

    x_min = max(0, min(xL, xR) - pad)
    x_max = min(w, max(xL, xR) + pad)
    y_min = max(0, min(yT, yB) - pad)
    y_max = min(h, max(yT, yB) + pad)

    if x_max <= x_min or y_max <= y_min:
        return None

    mouth = img_bgr[y_min:y_max, x_min:x_max]
    if mouth.size == 0:
        return None
    return mouth

# Main loop
crop_rows = []

with mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5
) as face_mesh:

    for _, row in tqdm(frames_df.iterrows(), total=len(frames_df), desc="Cropping mouths"):
        frame_path = row["local_frame_path"]
        subject = str(row["subject"])
        video_name = str(row["video_name"])
        frame_index = int(row["frame_index"])
        label = int(row["label"])

        if not os.path.exists(frame_path):
            continue

        img = cv2.imread(frame_path)
        if img is None:
            continue

        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        result = face_mesh.process(rgb)
        if not result.multi_face_landmarks:
            continue

        landmarks = result.multi_face_landmarks[0].landmark
        mouth = extract_mouth(img, landmarks, pad=10)
        if mouth is None:
            continue

        subj_dir = os.path.join(CROPS_ROOT, subject)
        os.makedirs(subj_dir, exist_ok=True)

        crop_name = f"{os.path.splitext(video_name)[0]}_{frame_index:05d}.jpg"
        crop_path = os.path.join(subj_dir, crop_name).replace("\\", "/")

        ok = cv2.imwrite(crop_path, mouth)
        if not ok:
            continue

        crop_rows.append(
            {
                "crop_path": crop_path,
                "subject": subject,
                "video_name": video_name,
                "frame_index": frame_index,
                "label": label,
            }
        )

# Save crops CSV
crops_df = pd.DataFrame(crop_rows)
crops_df.to_csv(CROPS_INFO_PATH, index=False)

print("Total crops saved:", len(crops_df))
print("Saved crops CSV to:", CROPS_INFO_PATH)

crops_df.head()

Using FRAMES_INFO_PATH: /content/CropsLabeled/frames_info.csv
Using LABELS_CSV: /content/drive/MyDrive/Dataset/Output/frames_labels.csv
Saving crops to: /content/CropsLabeled/crops
Saving crops CSV to: /content/CropsLabeled/crops_labeled.csv
Frames loaded: 74631
Missing labels after merge: 0
Label distribution in frames_df:
label
0    70378
1     4253
Name: count, dtype: int64


Cropping mouths: 100%|██████████| 74631/74631 [11:47<00:00, 105.51it/s]


Total crops saved: 74435
Saved crops CSV to: /content/CropsLabeled/crops_labeled.csv


Unnamed: 0,crop_path,subject,video_name,frame_index,label
0,/content/CropsLabeled/crops/1-FemaleNoGlasses-...,1-FemaleNoGlasses-Normal,1-FemaleNoGlasses-Normal.avi,0,0
1,/content/CropsLabeled/crops/1-FemaleNoGlasses-...,1-FemaleNoGlasses-Normal,1-FemaleNoGlasses-Normal.avi,1,0
2,/content/CropsLabeled/crops/1-FemaleNoGlasses-...,1-FemaleNoGlasses-Normal,1-FemaleNoGlasses-Normal.avi,2,0
3,/content/CropsLabeled/crops/1-FemaleNoGlasses-...,1-FemaleNoGlasses-Normal,1-FemaleNoGlasses-Normal.avi,3,0
4,/content/CropsLabeled/crops/1-FemaleNoGlasses-...,1-FemaleNoGlasses-Normal,1-FemaleNoGlasses-Normal.avi,4,0


In [8]:
# Block 6: Check for conflicting labels per frame

import pandas as pd

CROPS_CSV = "/content/CropsLabeled/crops_labeled.csv"

if not os.path.exists(CROPS_CSV):
    raise FileNotFoundError(f"File not found: {CROPS_CSV}")

crops_df = pd.read_csv(CROPS_CSV)

# A frame should be uniquely identified by (subject, video_name, frame_index)
label_counts = (
    crops_df
    .groupby(["subject", "video_name", "frame_index"])["label"]
    .nunique()
)

conflicts = (label_counts > 1).sum()

print("Frames with conflicting labels:", conflicts)


Frames with conflicting labels: 0


In [9]:
import os
import json
import time
import random
import numpy as np
import pandas as pd
import cv2

import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    confusion_matrix
)


In [10]:
# Block 8: Reproducibility

import random
import numpy as np
import torch

def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


In [11]:
# Block 9: Training configuration

import os
import torch

SEED = 42
set_seed(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)
if device.type == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))

CSV_PATH = "/content/CropsLabeled/crops_labeled.csv"
if not os.path.exists(CSV_PATH):
    raise FileNotFoundError(f"Dataset CSV not found: {CSV_PATH}")

OUTPUT_ROOT = "/content/drive/MyDrive/Dataset/Output"

# Enable models
USE_RESNET18        = True
USE_RESNET50        = False
USE_MOBILENETV2     = False
USE_EFFICIENTNET_B0 = False

if not (USE_RESNET18 or USE_RESNET50 or USE_MOBILENETV2 or USE_EFFICIENTNET_B0):
    raise ValueError("Enable at least one model.")

# MobileNetV2 head params
MOBILENET_HIDDEN  = 128
MOBILENET_DROPOUT = 0.2

# Training hyperparameters
IMG_SIZE = 224
BATCH_SIZE = 32
NUM_WORKERS = 0
EPOCHS = 15
LR = 1e-4
WEIGHT_DECAY = 1e-4

EARLY_STOPPING_PATIENCE = 3

splits_dir = os.path.join(OUTPUT_ROOT, "splits")
ckpt_dir   = os.path.join(OUTPUT_ROOT, "checkpoints")
logs_dir   = os.path.join(OUTPUT_ROOT, "logs")

for d in [OUTPUT_ROOT, splits_dir, ckpt_dir, logs_dir]:
    os.makedirs(d, exist_ok=True)


Device: cuda
GPU: Tesla T4


In [22]:
# Block 10: Load dataset CSV, validate columns

import pandas as pd

df = pd.read_csv(CSV_PATH)

# Required columns for training
required_cols = {"crop_path", "label", "subject"}
missing = required_cols - set(df.columns)
if missing:
    raise ValueError(
        f"Missing required columns: {missing}\n"
        f"Found columns: {list(df.columns)}"
    )

# Normalize paths
df["crop_path"] = df["crop_path"].astype(str).str.replace("\\", "/", regex=False)

# Ensure labels are integers
df["label"] = pd.to_numeric(df["label"], errors="raise").astype(int)

print("Total rows:", len(df))
print("Unique subjects:", df["subject"].nunique())
print("Label distribution after flipping:")
print(df["label"].value_counts())


Total rows: 74435
Unique subjects: 320
Label distribution after flipping:
label
0    70184
1     4251
Name: count, dtype: int64


In [23]:
# Block 11: Subject-safe fixed-size split (10000 train / 3000 val / 3000 test)

import numpy as np
import pandas as pd

def subject_safe_fixed_rows_split(df, n_train=10000, n_val=3000, n_test=3000, seed=42):
    rng = np.random.default_rng(seed)

    required = {"subject", "label"}
    missing = required - set(df.columns)
    if missing:
        raise ValueError(f"df is missing columns: {missing}. Found: {list(df.columns)}")

    total_needed = int(n_train) + int(n_val) + int(n_test)
    if len(df) < total_needed:
        raise ValueError(f"Not enough rows: have {len(df)}, need {total_needed}")

    # Shuffle subjects and assign them to splits until each split has enough rows
    subjects = df["subject"].dropna().unique().copy()
    rng.shuffle(subjects)

    subj_sizes = df.groupby("subject").size().to_dict()

    split_subjects = {"train": [], "val": [], "test": []}
    split_rows = {"train": 0, "val": 0, "test": 0}
    targets = {"train": int(n_train), "val": int(n_val), "test": int(n_test)}

    order = ["train", "val", "test"]
    cur = 0

    for s in subjects:
        while cur < len(order) and split_rows[order[cur]] >= targets[order[cur]]:
            cur += 1
        if cur >= len(order):
            break

        k = order[cur]
        split_subjects[k].append(s)
        split_rows[k] += int(subj_sizes.get(s, 0))

    # Build pools from selected subjects
    train_pool = df[df["subject"].isin(split_subjects["train"])].copy()
    val_pool   = df[df["subject"].isin(split_subjects["val"])].copy()
    test_pool  = df[df["subject"].isin(split_subjects["test"])].copy()

    # If any pool is still short, top it up using unused subjects
    used = set(split_subjects["train"]) | set(split_subjects["val"]) | set(split_subjects["test"])
    remaining = [s for s in subjects if s not in used]

    def top_up_pool(pool, need, name):
        nonlocal remaining
        while len(pool) < need and remaining:
            s = remaining.pop(0)
            pool = pd.concat([pool, df[df["subject"] == s]], ignore_index=True)

        if len(pool) < need:
            raise ValueError(f"Could not top up {name}: pool has {len(pool)} rows, need {need}")

        return pool

    train_pool = top_up_pool(train_pool, targets["train"], "train")
    val_pool   = top_up_pool(val_pool,   targets["val"],   "val")
    test_pool  = top_up_pool(test_pool,  targets["test"],  "test")

    # Sample exactly n rows from each pool, roughly preserving label proportions
    def stratified_exact_sample(pool, n, seed_local):
        rng_local = np.random.default_rng(seed_local)

        # Ensure labels are valid and pool is large enough
        if len(pool) < n:
            raise ValueError(f"Pool too small: have {len(pool)}, need {n}")

        counts = pool["label"].value_counts()
        props = counts / counts.sum()

        desired = (props * n).round().astype(int)

        # Fix rounding so desired sums exactly to n
        diff = int(n - desired.sum())
        if diff != 0:
            labs_order = props.sort_values(ascending=False).index.tolist()
            i = 0
            while diff != 0:
                lab = labs_order[i % len(labs_order)]
                if diff > 0:
                    desired.loc[lab] += 1
                    diff -= 1
                else:
                    if desired.loc[lab] > 0:
                        desired.loc[lab] -= 1
                        diff += 1
                i += 1

        parts = []
        used_idx = set()

        # Sample per label
        for lab, g in pool.groupby("label"):
            take = int(desired.get(lab, 0))
            take = min(take, len(g))
            if take > 0:
                samp = g.sample(n=take, replace=False, random_state=seed_local)
                parts.append(samp)
                used_idx.update(samp.index.tolist())

        out = pd.concat(parts, axis=0) if parts else pool.iloc[0:0].copy()

        # Fill any shortfall (if some label had fewer rows than desired)
        if len(out) < n:
            remaining_rows = pool.drop(index=list(used_idx), errors="ignore")
            need = n - len(out)
            filler = remaining_rows.sample(n=need, replace=False, random_state=seed_local)
            out = pd.concat([out, filler], axis=0)

        # Final shuffle
        out = out.sample(frac=1.0, random_state=seed_local).reset_index(drop=True)
        return out

    train_df = stratified_exact_sample(train_pool, targets["train"], seed)
    val_df   = stratified_exact_sample(val_pool,   targets["val"],   seed + 1)
    test_df  = stratified_exact_sample(test_pool,  targets["test"],  seed + 2)

    trS, vaS, teS = set(train_df["subject"]), set(val_df["subject"]), set(test_df["subject"])
    if (trS & vaS) or (trS & teS) or (vaS & teS):
        raise RuntimeError("Subject leakage detected between splits (should not happen).")

    if len(train_df) != targets["train"] or len(val_df) != targets["val"] or len(test_df) != targets["test"]:
        raise RuntimeError(
            f"Split size mismatch: "
            f"train={len(train_df)} val={len(val_df)} test={len(test_df)} "
            f"(expected {targets})"
        )

    return train_df, val_df, test_df


In [24]:
# Block 12: Create a fixed subset split and save (subject-safe, label-aware)

# Fixed subset sizes (total = 16,000)
N_TRAIN = 10000
N_VAL   = 3000
N_TEST  = 3000

train_df, val_df, test_df = subject_safe_fixed_rows_split(
    df, n_train=N_TRAIN, n_val=N_VAL, n_test=N_TEST, seed=SEED
)

train_path = os.path.join(splits_dir, "train.csv")
val_path   = os.path.join(splits_dir, "val.csv")
test_path  = os.path.join(splits_dir, "test.csv")

train_df.to_csv(train_path, index=False)
val_df.to_csv(val_path, index=False)
test_df.to_csv(test_path, index=False)

print("Total rows in full dataset:", len(df))
print("Subset rows (train/val/test):", len(train_df), len(val_df), len(test_df))
print()

print("Label distribution (train):")
print(train_df["label"].value_counts(normalize=True))
print()

print("Label distribution (val):")
print(val_df["label"].value_counts(normalize=True))
print()

print("Label distribution (test):")
print(test_df["label"].value_counts(normalize=True))


Total rows in full dataset: 74435
Subset rows (train/val/test): 10000 3000 3000

Label distribution (train):
label
0    0.9563
1    0.0437
Name: proportion, dtype: float64

Label distribution (val):
label
0    0.975667
1    0.024333
Name: proportion, dtype: float64

Label distribution (test):
label
0    0.960333
1    0.039667
Name: proportion, dtype: float64


In [25]:
# Block 13: Dataset / DataLoader

import os
import cv2
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Transforms
train_tfms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std =[0.229, 0.224, 0.225]),
])

eval_tfms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std =[0.229, 0.224, 0.225]),
])

class MouthDataset(Dataset):
    def __init__(self, dataframe: pd.DataFrame, transform=None, label_col="label"):
        self.df = dataframe.reset_index(drop=True).copy()
        self.transform = transform
        self.label_col = label_col

        if "crop_path" not in self.df.columns:
            raise ValueError("DataFrame must contain a 'crop_path' column.")
        if self.label_col not in self.df.columns:
            raise ValueError(f"DataFrame must contain a '{self.label_col}' column.")

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = str(row["crop_path"]).replace("\\", "/")
        y = int(row[self.label_col])

        if not os.path.exists(img_path):
            raise FileNotFoundError(f"Missing image file: {img_path}")

        img = cv2.imread(img_path)
        if img is None:
            raise FileNotFoundError(f"Could not read image: {img_path}")

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        if self.transform is not None:
            img = self.transform(img)

        return img, y

# We no longer need the "y" column if labels are already 0/1
train_ds = MouthDataset(train_df, transform=train_tfms, label_col="label")
val_ds   = MouthDataset(val_df,   transform=eval_tfms,  label_col="label")
test_ds  = MouthDataset(test_df,  transform=eval_tfms,  label_col="label")

train_loader = DataLoader(
    train_ds,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=(device.type == "cuda"),
    persistent_workers=(NUM_WORKERS > 0),
)

val_loader = DataLoader(
    val_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=(device.type == "cuda"),
    persistent_workers=(NUM_WORKERS > 0),
)

test_loader = DataLoader(
    test_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=(device.type == "cuda"),
    persistent_workers=(NUM_WORKERS > 0),
)


In [26]:
# Block 14: Model factory

import torch
import torch.nn as nn
import torchvision

def make_model(name: str, num_classes: int) -> nn.Module:
    name = name.lower().strip()

    if name == "resnet18":
        model = torchvision.models.resnet18(
            weights=torchvision.models.ResNet18_Weights.IMAGENET1K_V1
        )
        model.fc = nn.Linear(model.fc.in_features, num_classes)
        return model

    if name == "resnet50":
        model = torchvision.models.resnet50(
            weights=torchvision.models.ResNet50_Weights.IMAGENET1K_V2
        )
        model.fc = nn.Linear(model.fc.in_features, num_classes)
        return model

    if name in ["mobilenetv2", "mobilenet_v2"]:
        model = torchvision.models.mobilenet_v2(
            weights=torchvision.models.MobileNet_V2_Weights.IMAGENET1K_V1
        )
        in_features = model.classifier[1].in_features
        model.classifier = nn.Sequential(
            nn.Dropout(MOBILENET_DROPOUT),
            nn.Linear(in_features, MOBILENET_HIDDEN),
            nn.ReLU(inplace=True),
            nn.Dropout(MOBILENET_DROPOUT),
            nn.Linear(MOBILENET_HIDDEN, num_classes),
        )
        return model

    if name in ["efficientnet_b0", "efficientnetb0"]:
        model = torchvision.models.efficientnet_b0(
            weights=torchvision.models.EfficientNet_B0_Weights.IMAGENET1K_V1
        )
        in_features = model.classifier[1].in_features
        model.classifier[1] = nn.Linear(in_features, num_classes)
        return model

    raise ValueError(f"Unknown model name: {name}")


In [27]:
# Block 15: Train / Eval helpers

import numpy as np
import torch
import torch.nn as nn
from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score, confusion_matrix, precision_recall_fscore_support

criterion = nn.CrossEntropyLoss()

@torch.no_grad()
def evaluate(model: nn.Module, loader: DataLoader, epoch=None, model_name="", split_name="Val"):
    model.eval()

    all_y = []
    all_p = []

    pbar = tqdm(loader, desc=f"[{model_name}] {split_name} E{epoch}", leave=False)
    for x, y in pbar:
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        logits = model(x)
        preds = torch.argmax(logits, dim=1)

        all_y.append(y.detach().cpu().numpy())
        all_p.append(preds.detach().cpu().numpy())

    y_true = np.concatenate(all_y) if len(all_y) else np.array([], dtype=np.int64)
    y_pred = np.concatenate(all_p) if len(all_p) else np.array([], dtype=np.int64)

    if y_true.size == 0:
        return 0.0, 0.0, 0.0, 0.0, np.zeros((0, 0), dtype=int)

    acc = accuracy_score(y_true, y_pred)
    prec, rec, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average="macro", zero_division=0
    )
    cm = confusion_matrix(y_true, y_pred)

    return acc, prec, rec, f1, cm

def train_one_epoch(model: nn.Module, loader: DataLoader, optimizer, scheduler=None, epoch=None, model_name=""):
    model.train()

    running_loss = 0.0
    n = 0

    pbar = tqdm(loader, desc=f"[{model_name}] Train E{epoch}", leave=False)
    for x, y in pbar:
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)

        logits = model(x)
        loss = criterion(logits, y)

        loss.backward()
        optimizer.step()

        # OneCycleLR is stepped per batch
        if scheduler is not None:
            scheduler.step()

        bs = x.size(0)
        running_loss += loss.item() * bs
        n += bs

        pbar.set_postfix(
            loss=f"{(running_loss / max(1, n)):.4f}",
            lr=f"{optimizer.param_groups[0]['lr']:.2e}"
        )

    return running_loss / max(1, n)


In [28]:
# Block 16: Run training for selected models (prints LR + weight decay)

import time
import json
import pandas as pd
import torch

model_names = []
if USE_RESNET18:
    model_names.append("resnet18")
if USE_RESNET50:
    model_names.append("resnet50")
if USE_MOBILENETV2:
    model_names.append("mobilenetv2")
if USE_EFFICIENTNET_B0:
    model_names.append("efficientnet_b0")

if not model_names:
    raise ValueError("No models enabled. Enable at least one toggle in Block 9.")

orig_labels = sorted(df["label"].unique().tolist())
num_classes = len(orig_labels)
if num_classes < 2:
    raise ValueError(f"Need at least 2 classes. Found labels: {orig_labels}")

summary_rows = []

for MODEL_NAME in model_names:
    print("\n" + "-" * 60)
    print(f"Training: {MODEL_NAME}")
    print(f"LR = {LR:.1e} | Weight Decay = {WEIGHT_DECAY:.1e}")
    print("-" * 60)

    model = make_model(MODEL_NAME, num_classes).to(device)

    optimizer = torch.optim.AdamW(
        model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY
    )

    steps_per_epoch = len(train_loader)
    if steps_per_epoch == 0:
        raise RuntimeError("train_loader is empty. Check your split CSVs and dataset paths.")

    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=LR,
        epochs=EPOCHS,
        steps_per_epoch=steps_per_epoch,
        pct_start=0.1,
    )

    best_val_f1 = -1.0
    best_epoch = -1
    no_improve = 0
    history = []

    best_ckpt_path = os.path.join(ckpt_dir, f"{MODEL_NAME}_best.pt")
    hist_csv_path  = os.path.join(logs_dir, f"{MODEL_NAME}_history.csv")
    metrics_path   = os.path.join(logs_dir, f"{MODEL_NAME}_metrics.json")

    t_start = time.time()

    for epoch in range(1, EPOCHS + 1):
        t0 = time.time()

        train_loss = train_one_epoch(
            model, train_loader, optimizer,
            scheduler=scheduler, epoch=epoch, model_name=MODEL_NAME
        )

        val_acc, val_prec, val_rec, val_f1, val_cm = evaluate(
            model, val_loader,
            epoch=epoch, model_name=MODEL_NAME, split_name="Val"
        )

        elapsed = time.time() - t0

        history.append({
            "epoch": int(epoch),
            "train_loss": float(train_loss),
            "val_acc": float(val_acc),
            "val_macro_prec": float(val_prec),
            "val_macro_rec": float(val_rec),
            "val_macro_f1": float(val_f1),
            "epoch_sec": float(elapsed),
            "lr": float(LR),
            "weight_decay": float(WEIGHT_DECAY),
        })

        print(
            f"Epoch {epoch:02d}/{EPOCHS} | "
            f"loss {train_loss:.4f} | "
            f"val_macro_f1 {val_f1:.4f} | "
            f"val_acc {val_acc:.4f} | "
            f"lr {LR:.1e} | wd {WEIGHT_DECAY:.1e} | "
            f"{elapsed:.1f}s"
        )

        if val_f1 > best_val_f1:
            best_val_f1 = float(val_f1)
            best_epoch = int(epoch)
            no_improve = 0

            torch.save(
                {
                    "model_name": MODEL_NAME,
                    "epoch": int(epoch),
                    "state_dict": model.state_dict(),
                    "num_classes": int(num_classes),
                    "orig_labels": [int(x) for x in orig_labels],
                    "best_val_macro_f1": float(best_val_f1),
                    "lr": float(LR),
                    "weight_decay": float(WEIGHT_DECAY),
                },
                best_ckpt_path,
            )
        else:
            no_improve += 1

        if no_improve >= EARLY_STOPPING_PATIENCE:
            print(
                f"Early stopping (no improvement for {EARLY_STOPPING_PATIENCE} epochs)."
            )
            break

    total_time = time.time() - t_start

    pd.DataFrame(history).to_csv(hist_csv_path, index=False)

    ckpt = torch.load(best_ckpt_path, map_location=device, weights_only=False)
    model.load_state_dict(ckpt["state_dict"])

    test_acc, test_prec, test_rec, test_f1, test_cm = evaluate(
        model, test_loader,
        epoch="best", model_name=MODEL_NAME, split_name="Test"
    )

    metrics = {
        "model": MODEL_NAME,
        "lr": float(LR),
        "weight_decay": float(WEIGHT_DECAY),
        "best_epoch": int(best_epoch),
        "best_val_macro_f1": float(best_val_f1),
        "test_accuracy": float(test_acc),
        "test_macro_precision": float(test_prec),
        "test_macro_recall": float(test_rec),
        "test_macro_f1": float(test_f1),
        "train_rows": int(len(train_df)),
        "val_rows": int(len(val_df)),
        "test_rows": int(len(test_df)),
        "num_classes": int(num_classes),
        "orig_labels": [int(x) for x in orig_labels],
        "total_train_time_sec": float(total_time),
        "checkpoint_path": best_ckpt_path,
        "history_csv_path": hist_csv_path,
    }

    with open(metrics_path, "w", encoding="utf-8") as f:
        json.dump(metrics, f, indent=2)

    print("\nBest checkpoint:", best_ckpt_path)
    print("History CSV:", hist_csv_path)
    print("Metrics JSON:", metrics_path)
    print(
        f"TEST | acc {test_acc:.4f} | "
        f"macro_f1 {test_f1:.4f} | "
        f"macro_prec {test_prec:.4f} | "
        f"macro_rec {test_rec:.4f}"
    )

    summary_rows.append(metrics)



------------------------------------------------------------
Training: resnet18
LR = 1.0e-04 | Weight Decay = 1.0e-04
------------------------------------------------------------


[resnet18] Train E1:   0%|          | 0/313 [00:00<?, ?it/s]

[resnet18] Val E1:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch 01/15 | loss 0.2960 | val_macro_f1 0.9826 | val_acc 0.9983 | lr 1.0e-04 | wd 1.0e-04 | 67.5s


[resnet18] Train E2:   0%|          | 0/313 [00:00<?, ?it/s]

[resnet18] Val E2:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch 02/15 | loss 0.0238 | val_macro_f1 0.9658 | val_acc 0.9967 | lr 1.0e-04 | wd 1.0e-04 | 67.0s


[resnet18] Train E3:   0%|          | 0/313 [00:00<?, ?it/s]

[resnet18] Val E3:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch 03/15 | loss 0.0122 | val_macro_f1 0.9621 | val_acc 0.9963 | lr 1.0e-04 | wd 1.0e-04 | 67.2s


[resnet18] Train E4:   0%|          | 0/313 [00:00<?, ?it/s]

[resnet18] Val E4:   0%|          | 0/94 [00:00<?, ?it/s]

Epoch 04/15 | loss 0.0100 | val_macro_f1 0.9649 | val_acc 0.9967 | lr 1.0e-04 | wd 1.0e-04 | 67.2s
Early stopping (no improvement for 3 epochs).


[resnet18] Test Ebest:   0%|          | 0/94 [00:00<?, ?it/s]


Best checkpoint: /content/drive/MyDrive/Dataset/Output/checkpoints/resnet18_best.pt
History CSV: /content/drive/MyDrive/Dataset/Output/logs/resnet18_history.csv
Metrics JSON: /content/drive/MyDrive/Dataset/Output/logs/resnet18_metrics.json
TEST | acc 0.9923 | macro_f1 0.9522 | macro_prec 0.9309 | macro_rec 0.9759


In [29]:
# Block 17: Inference interface (model selection + image upload)

import os
import torch
import torch.nn.functional as F
import numpy as np
import cv2
from torchvision import transforms
from IPython.display import display
import ipywidgets as widgets

# image size and checkpoints folder
IMG_SIZE = IMG_SIZE
ckpt_dir = ckpt_dir

# label meaning (make sure this matches your dataset)
IDX_TO_NAME = {
    0: "Normal",
    1: "Yawning",
}

# same preprocessing used during validation
eval_tfms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ),
])

def load_model(model_name, num_classes):
    model = make_model(model_name, num_classes).to(device)

    ckpt_path = os.path.join(ckpt_dir, f"{model_name}_best.pt")
    if not os.path.exists(ckpt_path):
        raise FileNotFoundError(f"Checkpoint not found: {ckpt_path}")

    ckpt = torch.load(ckpt_path, map_location=device, weights_only=False)
    model.load_state_dict(ckpt["state_dict"])
    model.eval()
    return model

@torch.no_grad()
def predict_image(model, img_bgr):
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    x = eval_tfms(img_rgb).unsqueeze(0).to(device)

    logits = model(x)
    probs = F.softmax(logits, dim=1).cpu().numpy()[0]

    pred = int(np.argmax(probs))
    conf = float(probs[pred])
    return pred, conf, probs

# UI elements
model_selector = widgets.Dropdown(
    options=["resnet18", "resnet50", "mobilenetv2", "efficientnet_b0"],
    value="resnet18",
    description="Model",
)

image_uploader = widgets.FileUpload(
    accept=".jpg,.jpeg,.png",
    multiple=False,
)

predict_btn = widgets.Button(description="Predict")
output_box = widgets.Output()

def run_prediction(_):
    output_box.clear_output()

    if len(image_uploader.value) == 0:
        with output_box:
            print("Please upload a cropped mouth image.")
        return

    uploaded = list(image_uploader.value.values())[0]
    img_array = np.frombuffer(uploaded["content"], dtype=np.uint8)
    img_bgr = cv2.imdecode(img_array, cv2.IMREAD_COLOR)

    if img_bgr is None:
        with output_box:
            print("Invalid image.")
        return

    num_classes = len(df["label"].unique())
    model = load_model(model_selector.value, num_classes)

    pred_idx, conf, probs = predict_image(model, img_bgr)

    with output_box:
        import matplotlib.pyplot as plt
        plt.figure(figsize=(4, 4))
        plt.imshow(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
        plt.axis("off")
        plt.title(f"Prediction: {IDX_TO_NAME[pred_idx]} (confidence = {conf:.3f})")
        plt.show()

        print("Class probabilities:")
        for i, p in enumerate(probs):
            print(f"{IDX_TO_NAME[i]}: {p:.3f}")

predict_btn.on_click(run_prediction)

display(model_selector, image_uploader, predict_btn, output_box)


Dropdown(description='Model', options=('resnet18', 'resnet50', 'mobilenetv2', 'efficientnet_b0'), value='resne…

FileUpload(value={}, accept='.jpg,.jpeg,.png', description='Upload')

Button(description='Predict', style=ButtonStyle())

Output()

In [None]:
# Block 18: Learning curves (training + validation vs epochs) for best run of each architecture

import os
import pandas as pd
import matplotlib.pyplot as plt

logs_dir = logs_dir  # already defined earlier

models = ["resnet18"]
#"resnet18", "resnet50", "mobilenetv2", "efficientnet_b0"

for m in models:
    hist_path = os.path.join(logs_dir, f"{m}_history.csv")
    if not os.path.exists(hist_path):
        print(f"[Skip] {m}: history file not found -> {hist_path}")
        continue

    h = pd.read_csv(hist_path)

    if "epoch" not in h.columns:
        print(f"[Skip] {m}: missing 'epoch' column")
        continue

    # You always have train_loss + val_macro_f1 in your saved history
    # Plot 1: train loss vs epoch
    plt.figure(figsize=(7, 4))
    plt.plot(h["epoch"], h["train_loss"])
    plt.xlabel("Epoch")
    plt.ylabel("Training Loss")
    plt.title(f"{m}: Training Loss vs Epoch")
    plt.grid(True)
    plt.show()

    # Plot 2: validation macro-F1 vs epoch
    if "val_macro_f1" in h.columns:
        plt.figure(figsize=(7, 4))
        plt.plot(h["epoch"], h["val_macro_f1"])
        plt.xlabel("Epoch")
        plt.ylabel("Validation Macro-F1")
        plt.title(f"{m}: Validation Macro-F1 vs Epoch")
        plt.grid(True)
        plt.show()
    else:
        print(f"[Skip] {m}: missing 'val_macro_f1' column")


In [None]:
# Block 19: Best architecture (ResNet18) — performance vs dataset size
# Trains ResNet18 on different TRAIN sizes, keeps the SAME val/test sets, and plots performance vs size.

import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

# Choose best architecture + best hyperparameters (use your best combo)
BEST_MODEL = "resnet18"
BEST_LR = LR
BEST_WD = WEIGHT_DECAY


TRAIN_FRACTIONS = [0.25, 0.50, 0.75, 1.00]

# Helper: build a stratified subset of train_df by label
# (keeps class balance roughly the same)
def stratified_subset(df_in, frac, seed=42):
    if frac >= 1.0:
        return df_in.sample(frac=1.0, random_state=seed).reset_index(drop=True)

    parts = []
    for lab, g in df_in.groupby("label"):
        n_take = max(1, int(round(len(g) * frac)))
        n_take = min(n_take, len(g))
        parts.append(g.sample(n=n_take, random_state=seed))
    out = pd.concat(parts, axis=0).sample(frac=1.0, random_state=seed).reset_index(drop=True)
    return out

# Optional: compute macro-F1 on a loader (train/val)
@torch.no_grad()
def eval_macro_f1(model, loader):
    model.eval()
    ys, ps = [], []
    for x, y in loader:
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)
        logits = model(x)
        pred = torch.argmax(logits, dim=1)
        ys.append(y.detach().cpu().numpy())
        ps.append(pred.detach().cpu().numpy())
    y_true = np.concatenate(ys)
    y_pred = np.concatenate(ps)

    from sklearn.metrics import precision_recall_fscore_support
    _, _, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average="macro", zero_division=0
    )
    return float(f1)

results = []

# Fixed val/test loaders from your full split
fixed_val_loader = val_loader
fixed_test_loader = test_loader

for frac in TRAIN_FRACTIONS:
    sub_train_df = stratified_subset(train_df, frac, seed=SEED)
    n_train = len(sub_train_df)

    # Make a new loader for the subset (keep same transforms)
    sub_train_ds = MouthDataset(sub_train_df, transform=train_tfms)
    sub_train_loader = DataLoader(
        sub_train_ds,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=NUM_WORKERS,
        pin_memory=(device.type == "cuda"),
        persistent_workers=False
    )

    # Fresh model each time
    model = make_model(BEST_MODEL, num_classes).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=BEST_LR, weight_decay=BEST_WD)

    steps_per_epoch = len(sub_train_loader)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=BEST_LR,
        epochs=EPOCHS,
        steps_per_epoch=max(1, steps_per_epoch),
        pct_start=0.1
    )

    best_val = -1.0
    best_state = None
    no_improve = 0

    t0 = time.time()

    for epoch in range(1, EPOCHS + 1):
        _ = train_one_epoch(model, sub_train_loader, optimizer, scheduler=scheduler, epoch=epoch, model_name=f"{BEST_MODEL}-{int(frac*100)}%")

        val_acc, val_prec, val_rec, val_f1, _ = evaluate(model, fixed_val_loader, epoch=epoch, model_name=BEST_MODEL, split_name="Val")

        if val_f1 > best_val:
            best_val = float(val_f1)
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            no_improve = 0
        else:
            no_improve += 1

        if no_improve >= EARLY_STOPPING_PATIENCE:
            break

    train_f1 = eval_macro_f1(model, sub_train_loader)

    # Load best state for final val score (and optionally test, but requirement is train/val)
    if best_state is not None:
        model.load_state_dict(best_state)

    val_f1_final = eval_macro_f1(model, fixed_val_loader)

    elapsed = time.time() - t0

    results.append({
        "train_fraction": float(frac),
        "train_rows": int(n_train),
        "train_macro_f1": float(train_f1),
        "val_macro_f1": float(val_f1_final),
        "best_val_macro_f1_during_training": float(best_val),
        "time_sec": float(elapsed),
        "lr": float(BEST_LR),
        "weight_decay": float(BEST_WD),
        "model": BEST_MODEL,
    })

    print(f"{BEST_MODEL} | train_rows={n_train} | train_f1={train_f1:.4f} | val_f1={val_f1_final:.4f} | time={elapsed:.1f}s")

res_df = pd.DataFrame(results)
display(res_df)

# Plot: performance vs dataset size
plt.figure(figsize=(7, 4))
plt.plot(res_df["train_rows"], res_df["train_macro_f1"], marker="o", label="Train Macro-F1")
plt.plot(res_df["train_rows"], res_df["val_macro_f1"], marker="o", label="Val Macro-F1")
plt.xlabel("Training set size (rows)")
plt.ylabel("Macro-F1")
plt.title(f"{BEST_MODEL}: Performance vs Training Set Size")
plt.grid(True)
plt.legend()
plt.show()

out_path = os.path.join(logs_dir, f"{BEST_MODEL}_perf_vs_size.csv")
res_df.to_csv(out_path, index=False)
print("Saved:", out_path)
