<a href="https://www.kaggle.com/code/khalednabilfathy/01-sign-to-text?scriptVersionId=277857081" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
# ---------- Cell 1: Setup Environment (Kaggle) ----------
import os
import shutil
import sys

# Set base directory (Kaggle has /kaggle/working as workspace)
BASE_DIR = "/kaggle/working/WASL"
os.makedirs(BASE_DIR, exist_ok=True)

# Create subdirectories
os.makedirs(os.path.join(BASE_DIR, "videos"), exist_ok=True)
os.makedirs(os.path.join(BASE_DIR, "manifests"), exist_ok=True)
os.makedirs(os.path.join(BASE_DIR, "preprocessed"), exist_ok=True)
os.makedirs(os.path.join(BASE_DIR, "models", "checkpoints"), exist_ok=True)

print("‚úÖ Base directory:", BASE_DIR)
print("‚úÖ Created subdirectories:")
print("   - videos/")
print("   - manifests/")
print("   - preprocessed/")
print("   - models/checkpoints/")

# Check GPU availability
import torch

if torch.cuda.is_available():
    print(f"\n‚úÖ GPU available: {torch.cuda.get_device_name(0)}")
    print(f"   GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("\n‚ö†Ô∏è No GPU detected - make sure GPU accelerator is enabled")
    print("   (Settings ‚Üí Accelerator ‚Üí GPU)")

# Load pre-trained WLASL100 model (FULL MODEL with architecture)
print("\n" + "="*60)
print("LOADING PRE-TRAINED WLASL100 FULL MODEL (75.15%)")
print("="*60)

# Source paths (NEW - full model with architecture!)
source_model = "/kaggle/input/wlasl-finetuned-full-model/wlasl100_best_model_75.15pct_FULL.pth"
source_label_map = "/kaggle/input/wlasl-finetuned-model/working/label_mapping.json"

# Destination paths
dest_model = os.path.join(BASE_DIR, "models", "checkpoints", "best_model_FULL.pth")
dest_label_map = os.path.join(BASE_DIR, "manifests", "label_mapping.json")

# Copy files
try:
    if os.path.exists(source_model):
        shutil.copy(source_model, dest_model)
        print("‚úÖ Loaded best_model_FULL.pth (75.15% WLASL100)")
        print(f"   From: {source_model}")
        print(f"   To:   {dest_model}")
        print("   ‚≠ê This is the FULL model (architecture + weights)")
    else:
        print(f"‚ùå Model not found at: {source_model}")
        print("   Please add 'wlasl-finetuned-full-model' dataset to your notebook inputs")
        
    if os.path.exists(source_label_map):
        shutil.copy(source_label_map, dest_label_map)
        print("‚úÖ Loaded label_mapping.json")
        print(f"   From: {source_label_map}")
        print(f"   To:   {dest_label_map}")
    else:
        print(f"‚ùå Label mapping not found at: {source_label_map}")
        
except Exception as e:
    print(f"‚ö†Ô∏è Error copying files: {e}")
    print("\n‚ÑπÔ∏è Make sure you added the Kaggle datasets:")
    print("   1. 'wlasl-finetuned-full-model' (NEW - contains full model)")
    print("   2. 'wlasl-finetuned-model' (for label mapping)")

print("\n‚úÖ Ready to start Citizen 100 training!")
print("   Next: Run Cell 9 ‚Üí Cell 14 ‚Üí Cell 16 ‚Üí Cell 17")

‚úÖ Base directory: /kaggle/working/WASL
‚úÖ Created subdirectories:
   - videos/
   - manifests/
   - preprocessed/
   - models/checkpoints/

‚úÖ GPU available: Tesla P100-PCIE-16GB
   GPU Memory: 17.06 GB

LOADING PRE-TRAINED WLASL100 FULL MODEL (75.15%)
‚úÖ Loaded best_model_FULL.pth (75.15% WLASL100)
   From: /kaggle/input/wlasl-finetuned-full-model/wlasl100_best_model_75.15pct_FULL.pth
   To:   /kaggle/working/WASL/models/checkpoints/best_model_FULL.pth
   ‚≠ê This is the FULL model (architecture + weights)
‚úÖ Loaded label_mapping.json
   From: /kaggle/input/wlasl-finetuned-model/working/label_mapping.json
   To:   /kaggle/working/WASL/manifests/label_mapping.json

‚úÖ Ready to start Citizen 100 training!
   Next: Run Cell 9 ‚Üí Cell 14 ‚Üí Cell 16 ‚Üí Cell 17


In [2]:
# ---------- Cell 2: Download WLASL Processed Dataset ----------
import shutil
import zipfile
from pathlib import Path

# Check if dataset is already added as input
KAGGLE_INPUT = "/kaggle/input/wlasl-processed"
if os.path.exists(KAGGLE_INPUT):
    print("‚úÖ WLASL dataset found in Kaggle inputs!")
    print(f"   Path: {KAGGLE_INPUT}")
    
    # List available files
    print("\nüìÇ Available files:")
    for item in os.listdir(KAGGLE_INPUT):
        item_path = os.path.join(KAGGLE_INPUT, item)
        if os.path.isdir(item_path):
            print(f"   üìÅ {item}/")
        else:
            size_mb = os.path.getsize(item_path) / (1024 * 1024)
            print(f"   üìÑ {item} ({size_mb:.2f} MB)")
    
    # Check if videos folder exists
    videos_input = os.path.join(KAGGLE_INPUT, "videos")
    if os.path.exists(videos_input):
        video_count = len([f for f in os.listdir(videos_input) if f.endswith('.mp4')])
        print(f"\n‚úÖ Found {video_count} videos in dataset")
    
    VIDEOS_SOURCE = videos_input
    
else:
    print("‚ö†Ô∏è Dataset not found in inputs!")
    print("\nüìù To add the dataset:")
    print("   1. Click 'Add Data' button (top right)")
    print("   2. Search for 'wlasl-processed'")
    print("   3. Add 'risangbaskoro/wlasl-processed' dataset")
    print("   4. Rerun this cell")
    sys.exit(1)

print("\n‚úÖ Dataset ready - proceed to next cell")

‚úÖ WLASL dataset found in Kaggle inputs!
   Path: /kaggle/input/wlasl-processed

üìÇ Available files:
   üìÑ nslt_2000.json (1.08 MB)
   üìÅ videos/
   üìÑ nslt_1000.json (0.67 MB)
   üìÑ WLASL_v0.3.json (11.38 MB)
   üìÑ wlasl_class_list.txt (0.02 MB)
   üìÑ nslt_300.json (0.26 MB)
   üìÑ missing.txt (0.05 MB)
   üìÑ nslt_100.json (0.10 MB)

‚úÖ Found 11980 videos in dataset

‚úÖ Dataset ready - proceed to next cell


In [3]:
# ---------- Cell 3: Load WLASL Manifest & Filter WLASL100 ----------
import json

import pandas as pd

# Load the main WLASL manifest
manifest_path = os.path.join(KAGGLE_INPUT, "WLASL_v0.3.json")
with open(manifest_path, "r") as f:
    wlasl_data = json.load(f)

print(f"‚úÖ Loaded WLASL manifest: {len(wlasl_data)} total glosses")

# Filter for WLASL100 (first 100 glosses by frequency)
wlasl100_data = wlasl_data[:100]
print(f"‚úÖ Filtered to WLASL100: {len(wlasl100_data)} glosses")

# Extract all video instances for WLASL100
video_records = []
for gloss_entry in wlasl100_data:
    gloss = gloss_entry['gloss']
    for instance in gloss_entry['instances']:
        video_id = instance['video_id']
        bbox = instance.get('bbox', None)
        fps = instance.get('fps', 25)
        frame_start = instance.get('frame_start', None)
        frame_end = instance.get('frame_end', None)
        split = instance.get('split', 'train')  # train/val/test
        
        video_records.append({
            'video_id': video_id,
            'gloss': gloss,
            'split': split,
            'bbox': bbox,
            'fps': fps,
            'frame_start': frame_start,
            'frame_end': frame_end
        })

# Create DataFrame
df = pd.DataFrame(video_records)
print(f"\n‚úÖ Created dataset with {len(df)} video instances")

# Check split distribution
print("\nüìä Split distribution:")
print(df['split'].value_counts())

print("\nüìä Top 10 glosses:")
print(df['gloss'].value_counts().head(10))

# Save manifest to working directory
manifest_save_path = os.path.join(BASE_DIR, "manifests", "wlasl100_manifest.csv")
df.to_csv(manifest_save_path, index=False)
print(f"\n‚úÖ Saved manifest to: {manifest_save_path}")

print("\n‚úÖ Ready for next cell - video verification")

‚úÖ Loaded WLASL manifest: 2000 total glosses
‚úÖ Filtered to WLASL100: 100 glosses

‚úÖ Created dataset with 2038 video instances

üìä Split distribution:
split
train    1442
val       338
test      258
Name: count, dtype: int64

üìä Top 10 glosses:
gloss
book        40
drink       35
computer    30
before      26
chair       26
go          26
clothes     25
who         25
candy       24
cousin      23
Name: count, dtype: int64

‚úÖ Saved manifest to: /kaggle/working/WASL/manifests/wlasl100_manifest.csv

‚úÖ Ready for next cell - video verification


In [4]:
# ---------- Cell 4: Analyze Split Distribution Per Gloss ----------

# Check how one gloss (e.g., "book") is distributed
book_df = df[df['gloss'] == 'book']
print("üìä Example: 'book' gloss distribution:")
print(book_df['split'].value_counts())
print(f"   Total: {len(book_df)} videos\n")

# Check another example
drink_df = df[df['gloss'] == 'drink']
print("üìä Example: 'drink' gloss distribution:")
print(drink_df['split'].value_counts())
print(f"   Total: {len(drink_df)} videos\n")

# Summary statistics
print("üìä Overall statistics:")
print(f"   ‚Ä¢ Total glosses: {df['gloss'].nunique()}")
print(f"   ‚Ä¢ Total videos: {len(df)}")
print(f"   ‚Ä¢ Avg videos per gloss: {len(df) / df['gloss'].nunique():.1f}")
print(f"\n   ‚Ä¢ Train videos: {len(df[df['split']=='train'])} ({len(df[df['split']=='train'])/len(df)*100:.1f}%)")
print(f"   ‚Ä¢ Val videos: {len(df[df['split']=='val'])} ({len(df[df['split']=='val'])/len(df)*100:.1f}%)")
print(f"   ‚Ä¢ Test videos: {len(df[df['split']=='test'])} ({len(df[df['split']=='test'])/len(df)*100:.1f}%)")

print("\n‚úÖ Each of the 100 glosses has its videos split across train/val/test")
print("‚úÖ This ensures the model sees each sign during training and is tested on unseen examples")

üìä Example: 'book' gloss distribution:
split
train    30
val       6
test      4
Name: count, dtype: int64
   Total: 40 videos

üìä Example: 'drink' gloss distribution:
split
train    25
val       6
test      4
Name: count, dtype: int64
   Total: 35 videos

üìä Overall statistics:
   ‚Ä¢ Total glosses: 100
   ‚Ä¢ Total videos: 2038
   ‚Ä¢ Avg videos per gloss: 20.4

   ‚Ä¢ Train videos: 1442 (70.8%)
   ‚Ä¢ Val videos: 338 (16.6%)
   ‚Ä¢ Test videos: 258 (12.7%)

‚úÖ Each of the 100 glosses has its videos split across train/val/test
‚úÖ This ensures the model sees each sign during training and is tested on unseen examples


In [5]:
# ---------- Cell 5: Verify Available Videos & Match with Manifest ----------
import os

# Get list of all available video files
available_videos = set()
for video_file in os.listdir(VIDEOS_SOURCE):
    if video_file.endswith('.mp4'):
        # Extract video_id (filename without extension)
        video_id = video_file.replace('.mp4', '')
        available_videos.add(video_id)

print(f"‚úÖ Found {len(available_videos)} available videos in dataset")

# Check which videos from manifest are actually available
df['video_available'] = df['video_id'].isin(available_videos)
df['video_path'] = df['video_id'].apply(
    lambda vid: os.path.join(VIDEOS_SOURCE, f"{vid}.mp4") if vid in available_videos else None
)

# Statistics
total_required = len(df)
total_available = df['video_available'].sum()
missing_count = total_required - total_available

print("\nüìä Video Availability:")
print(f"   ‚Ä¢ Required by manifest: {total_required}")
print(f"   ‚Ä¢ Available: {total_available} ({total_available/total_required*100:.1f}%)")
print(f"   ‚Ä¢ Missing: {missing_count} ({missing_count/total_required*100:.1f}%)")

# Check availability by split
print("\nüìä Availability by split:")
for split_name in ['train', 'val', 'test']:
    split_df = df[df['split'] == split_name]
    available = split_df['video_available'].sum()
    total = len(split_df)
    print(f"   ‚Ä¢ {split_name}: {available}/{total} ({available/total*100:.1f}%)")

# Filter to only available videos
df_available = df[df['video_available'] == True].copy()
print(f"\n‚úÖ Working dataset: {len(df_available)} videos across {df_available['gloss'].nunique()} glosses")

# Save filtered manifest
filtered_manifest_path = os.path.join(BASE_DIR, "manifests", "wlasl100_available.csv")
df_available.to_csv(filtered_manifest_path, index=False)
print(f"‚úÖ Saved available videos manifest to: {filtered_manifest_path}")

# Check if any glosses lost all videos
glosses_with_videos = df_available['gloss'].value_counts()
original_glosses = df['gloss'].nunique()
remaining_glosses = len(glosses_with_videos)

if remaining_glosses < original_glosses:
    print(f"\n‚ö†Ô∏è Warning: {original_glosses - remaining_glosses} glosses have no available videos")
else:
    print(f"\n‚úÖ All {remaining_glosses} glosses have at least one video")

print("\n‚úÖ Ready for next cell - video preprocessing")

‚úÖ Found 11980 available videos in dataset

üìä Video Availability:
   ‚Ä¢ Required by manifest: 2038
   ‚Ä¢ Available: 1013 (49.7%)
   ‚Ä¢ Missing: 1025 (50.3%)

üìä Availability by split:
   ‚Ä¢ train: 748/1442 (51.9%)
   ‚Ä¢ val: 165/338 (48.8%)
   ‚Ä¢ test: 100/258 (38.8%)

‚úÖ Working dataset: 1013 videos across 100 glosses
‚úÖ Saved available videos manifest to: /kaggle/working/WASL/manifests/wlasl100_available.csv

‚úÖ All 100 glosses have at least one video

‚úÖ Ready for next cell - video preprocessing


In [6]:
# ---------- Cell 6: Video Preprocessing - Setup & Utilities ----------
import cv2
import numpy as np
import torch
from tqdm import tqdm

print("‚úÖ OpenCV version:", cv2.__version__)
print("‚úÖ PyTorch version:", torch.__version__)
print("‚úÖ CUDA available:", torch.cuda.is_available())

# Define preprocessing parameters
PREPROCESS_CONFIG = {
    'target_fps': 25,           # Resample all videos to 25 fps
    'target_frames': 32,        # Extract 32 frames per video (standard for I3D)
    'target_size': (224, 224),  # Resize frames to 224x224 (I3D input size)
    'normalize': True,          # Normalize pixel values to [0, 1]
}

print("\nüìã Preprocessing Configuration:")
for key, value in PREPROCESS_CONFIG.items():
    print(f"   ‚Ä¢ {key}: {value}")

# Video loading utility function
def load_video_frames(video_path, target_frames=32, target_size=(224, 224)):
    """
    Load video and extract uniformly sampled frames.
    
    Args:
        video_path: Path to video file
        target_frames: Number of frames to extract
        target_size: Target spatial size (H, W)
    
    Returns:
        frames: numpy array of shape (T, H, W, C) - T=target_frames, C=3 (RGB)
    """
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        raise ValueError(f"Cannot open video: {video_path}")
    
    # Get video properties
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    # Calculate frame indices to sample uniformly
    if total_frames < target_frames:
        # If video has fewer frames, repeat last frame
        indices = np.linspace(0, total_frames - 1, target_frames, dtype=int)
    else:
        # Sample uniformly across video duration
        indices = np.linspace(0, total_frames - 1, target_frames, dtype=int)
    
    frames = []
    for idx in indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        
        if ret:
            # Convert BGR (OpenCV) to RGB
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Resize to target size
            frame = cv2.resize(frame, target_size)
            frames.append(frame)
        else:
            # If frame read fails, repeat last valid frame
            if len(frames) > 0:
                frames.append(frames[-1])
            else:
                # Create blank frame if no valid frames yet
                frames.append(np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8))
    
    cap.release()
    
    # Stack frames into single array: (T, H, W, C)
    frames = np.stack(frames, axis=0)
    
    return frames, total_frames, fps


# Test the function on one video
print("\nüß™ Testing video loader on sample video...")
sample_row = df_available.iloc[0]
sample_video_path = sample_row['video_path']
sample_gloss = sample_row['gloss']

try:
    frames, original_frames, original_fps = load_video_frames(
        sample_video_path, 
        target_frames=PREPROCESS_CONFIG['target_frames'],
        target_size=PREPROCESS_CONFIG['target_size']
    )
    
    print(f"‚úÖ Successfully loaded video for gloss: '{sample_gloss}'")
    print(f"   ‚Ä¢ Original: {original_frames} frames @ {original_fps:.1f} fps")
    print(f"   ‚Ä¢ Processed: {frames.shape[0]} frames @ {PREPROCESS_CONFIG['target_fps']} fps")
    print(f"   ‚Ä¢ Frame shape: {frames.shape[1:]} (H, W, C)")
    print(f"   ‚Ä¢ Pixel value range: [{frames.min()}, {frames.max()}]")
    print(f"   ‚Ä¢ Memory size: {frames.nbytes / 1024 / 1024:.2f} MB")
    
except Exception as e:
    print(f"‚ùå Error loading video: {e}")

print("\n‚úÖ Video preprocessing utilities ready")
print("‚úÖ Ready for next cell - batch preprocessing")

‚úÖ OpenCV version: 4.12.0
‚úÖ PyTorch version: 2.6.0+cu124
‚úÖ CUDA available: True

üìã Preprocessing Configuration:
   ‚Ä¢ target_fps: 25
   ‚Ä¢ target_frames: 32
   ‚Ä¢ target_size: (224, 224)
   ‚Ä¢ normalize: True

üß™ Testing video loader on sample video...
‚úÖ Successfully loaded video for gloss: 'book'
   ‚Ä¢ Original: 75 frames @ 30.0 fps
   ‚Ä¢ Processed: 32 frames @ 25 fps
   ‚Ä¢ Frame shape: (224, 224, 3) (H, W, C)
   ‚Ä¢ Pixel value range: [0, 255]
   ‚Ä¢ Memory size: 4.59 MB

‚úÖ Video preprocessing utilities ready
‚úÖ Ready for next cell - batch preprocessing


In [None]:
# ---------- Cell 7: Batch Preprocess All Videos ----------
# SMART SKIP: This cell automatically skips already-processed videos!
# Only new/missing videos will be processed.


# Create preprocessed data directories
PREPROCESSED_DIR = os.path.join(BASE_DIR, "preprocessed")
os.makedirs(os.path.join(PREPROCESSED_DIR, "train"), exist_ok=True)
os.makedirs(os.path.join(PREPROCESSED_DIR, "val"), exist_ok=True)
os.makedirs(os.path.join(PREPROCESSED_DIR, "test"), exist_ok=True)

print("ÔøΩ Preprocessed data will be saved to:")
print(f"   {PREPROCESSED_DIR}")

# Check existing preprocessed videos
existing_counts = {}
for split_name in ['train', 'val', 'test']:
    split_dir = os.path.join(PREPROCESSED_DIR, split_name)
    existing_counts[split_name] = len(list(Path(split_dir).glob("*.npz")))

total_existing = sum(existing_counts.values())
print(f"\nÔøΩ Found {total_existing} already preprocessed videos:")
for split_name, count in existing_counts.items():
    print(f"   ‚Ä¢ {split_name}: {count} videos")

if total_existing > 0:
    print("\nüí° These videos will be SKIPPED (fast!)")
    print("   Only new/missing videos will be processed.")

# Function to preprocess and save videos for one split
def preprocess_split(df_split, split_name):
    """
    Preprocess all videos in a split and save as compressed .npz files (uint8).
    
    STORAGE OPTIMIZATION:
    - Store as uint8 (0-255) instead of float32: 75% less space
    - Use .npz compression: additional 40% savings
    - Normalize on-the-fly during training (no speed loss)
    
    Expected storage: ~2.5 GB (vs 9.5 GB with old method)
    
    Args:
        df_split: DataFrame containing videos for this split
        split_name: 'train', 'val', or 'test'
    """
    print(f"\n{'='*60}")
    print(f"Processing {split_name.upper()} split: {len(df_split)} videos")
    print(f"{'='*60}")
    
    split_dir = os.path.join(PREPROCESSED_DIR, split_name)
    
    processed_records = []
    failed_videos = []
    skipped_videos = 0
    
    for idx, row in tqdm(df_split.iterrows(), total=len(df_split), desc=f"{split_name}"):
        video_id = row['video_id']
        video_path = row['video_path']
        gloss = row['gloss']
        
        # Check if video is already preprocessed
        save_path = os.path.join(split_dir, f"{video_id}.npz")
        
        if os.path.exists(save_path):
            # Skip processing - load metadata from existing file
            try:
                data = np.load(save_path)
                frames = data['frames']
                
                processed_records.append({
                    'video_id': video_id,
                    'gloss': gloss,
                    'split': split_name,
                    'save_path': save_path,
                    'original_frames': -1,  # Unknown (not saved in .npz)
                    'original_fps': -1,     # Unknown
                    'processed_frames': frames.shape[0],
                    'frame_shape': frames.shape[1:],
                    'dataset': 'wlasl'
                })
                skipped_videos += 1
                continue  # Skip to next video
                
            except Exception:
                # If can't load existing file, reprocess it
                print(f"\n‚ö†Ô∏è Corrupted file {video_id}, reprocessing...")
        
        try:
            # Load and preprocess video
            frames, orig_frames, orig_fps = load_video_frames(
                video_path,
                target_frames=PREPROCESS_CONFIG['target_frames'],
                target_size=PREPROCESS_CONFIG['target_size']
            )
            
            # DO NOT normalize here - keep as uint8 (0-255) for storage efficiency
            # Normalization will happen on-the-fly during training
            # frames stays as uint8 dtype
            
            # Save as compressed .npz file (uint8 + gzip compression)
            np.savez_compressed(save_path, frames=frames)
            
            # Record metadata
            processed_records.append({
                'video_id': video_id,
                'gloss': gloss,
                'split': split_name,
                'save_path': save_path,
                'original_frames': orig_frames,
                'original_fps': orig_fps,
                'processed_frames': frames.shape[0],
                'frame_shape': frames.shape[1:],
                'dataset': 'wlasl'
            })
            
        except Exception as e:
            failed_videos.append({
                'video_id': video_id,
                'gloss': gloss,
                'error': str(e)
            })
            print(f"\n‚ö†Ô∏è Failed to process {video_id} ({gloss}): {e}")
    
    # Summary statistics
    print(f"\n‚úÖ {split_name.upper()} split complete:")
    print(f"   ‚Ä¢ Total videos: {len(df_split)}")
    print(f"   ‚Ä¢ Skipped (already exists): {skipped_videos}")
    print(f"   ‚Ä¢ Newly processed: {len(processed_records) - skipped_videos}")
    print(f"   ‚Ä¢ Failed: {len(failed_videos)}")
    
    if len(failed_videos) > 0:
        print(f"\n‚ö†Ô∏è Failed videos saved to: {PREPROCESSED_DIR}/{split_name}_failed.txt")
        with open(os.path.join(PREPROCESSED_DIR, f"{split_name}_failed.txt"), 'w') as f:
            for fail in failed_videos:
                f.write(f"{fail['video_id']},{fail['gloss']},{fail['error']}\n")
    
    return processed_records, failed_videos


# Process each split
all_processed = {}
all_failed = {}

for split_name in ['train', 'val', 'test']:
    df_split = df_available[df_available['split'] == split_name].copy()
    processed, failed = preprocess_split(df_split, split_name)
    all_processed[split_name] = processed
    all_failed[split_name] = failed

# Create final preprocessed manifest
final_records = []
for split_name in ['train', 'val', 'test']:
    final_records.extend(all_processed[split_name])

df_preprocessed = pd.DataFrame(final_records)

# Save preprocessed manifest
preprocessed_manifest_path = os.path.join(BASE_DIR, "manifests", "wlasl100_preprocessed.csv")
df_preprocessed.to_csv(preprocessed_manifest_path, index=False)

print("\n" + "="*60)
print("PREPROCESSING COMPLETE")
print("="*60)
print("\nüìä Final Statistics:")
print(f"   ‚Ä¢ Total videos processed: {len(df_preprocessed)}")
print(f"   ‚Ä¢ Train: {len(all_processed['train'])}")
print(f"   ‚Ä¢ Val: {len(all_processed['val'])}")
print(f"   ‚Ä¢ Test: {len(all_processed['test'])}")
print(f"\n   ‚Ä¢ Total failed: {sum(len(all_failed[s]) for s in ['train', 'val', 'test'])}")

# Calculate disk space used
total_size = 0
for split_name in ['train', 'val', 'test']:
    split_dir = os.path.join(PREPROCESSED_DIR, split_name)
    for npz_file in Path(split_dir).glob("*.npz"):
        total_size += npz_file.stat().st_size

print(f"\nüíæ Disk space used: {total_size / (1024**3):.2f} GB")
print(f"   ‚Ä¢ Average per video: {total_size / len(df_preprocessed) / (1024**2):.2f} MB")

print("\n‚úÖ Preprocessed manifest saved to:")
print(f"   {preprocessed_manifest_path}")

print("\n‚úÖ Ready for next cell - verify preprocessed data")

In [None]:
# ---------- Cell 8: Verify Preprocessed Data & Splits ----------
import pandas as pd

print("="*60)
print("VERIFYING PREPROCESSED DATA & SPLITS")
print("="*60)

# Load preprocessed manifest
df_preprocessed = pd.read_csv(os.path.join(BASE_DIR, "manifests", "wlasl100_preprocessed.csv"))

print(f"\n‚úÖ Loaded preprocessed manifest: {len(df_preprocessed)} videos")

# Verify splits
print("\nüìä Split Distribution:")
for split in ['train', 'val', 'test']:
    split_df = df_preprocessed[df_preprocessed['split'] == split]
    print(f"   ‚Ä¢ {split.capitalize()}: {len(split_df)} videos")
    print(f"      - Unique glosses: {split_df['gloss'].nunique()}")

# Verify all glosses have videos in each split
print("\nüîç Checking gloss coverage across splits...")
unique_glosses = df_preprocessed['gloss'].unique()
print(f"   Total glosses: {len(unique_glosses)}")

for split in ['train', 'val', 'test']:
    split_glosses = df_preprocessed[df_preprocessed['split']==split]['gloss'].unique()
    missing = set(unique_glosses) - set(split_glosses)
    if len(missing) > 0:
        print(f"   ‚ö†Ô∏è {split.capitalize()} missing {len(missing)} glosses:")
        for gloss in list(missing)[:5]:
            print(f"      - {gloss}")
    else:
        print(f"   ‚úÖ {split.capitalize()} has all {len(unique_glosses)} glosses")

# Sample a few preprocessed files to verify
print("\nüß™ Testing preprocessed file loading...")
sample_records = df_preprocessed.sample(min(3, len(df_preprocessed)))

for idx, row in sample_records.iterrows():
    try:
        data = np.load(row['save_path'])
        frames = data['frames']
        print(f"   ‚úÖ {row['video_id']}: {frames.shape} ({frames.dtype}, range [{frames.min()}, {frames.max()}])")
    except Exception as e:
        print(f"   ‚ùå {row['video_id']}: Error loading - {e}")

print("\n" + "="*60)
print("‚úÖ PREPROCESSED DATA VERIFIED")
print("‚úÖ Ready for model loading (Cell 8)")
print("="*60)

In [7]:
# ---------- Cell 9: Define WLASLDataset Class ----------
import cv2
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset

print("="*60)
print("DEFINING WLASLDATASET CLASS")
print("="*60)
print("\nüí° Model already loaded in Cell 1 - skipping model loading")
print("   Just defining the Dataset class for preprocessing...")

# Dataset class with IMPROVED AUGMENTATION
class WLASLDataset(Dataset):
    """
    PyTorch Dataset for WLASL/Citizen preprocessed videos.
    
    Loads preprocessed .npz files and applies augmentation.
    """
    
    def __init__(self, df, augment=False):
        """
        Args:
            df: DataFrame with columns ['video_id', 'gloss', 'label', 'save_path', 'dataset']
            augment: Whether to apply data augmentation
        """
        self.df = df.reset_index(drop=True)
        self.augment = augment
        
        print(f"\n{'='*60}")
        print("Creating WLASLDataset:")
        print(f"{'='*60}")
        print(f"   ‚Ä¢ Total videos: {len(self.df)}")
        print(f"   ‚Ä¢ Augmentation: {'ENABLED (6 techniques)' if augment else 'DISABLED'}")
        print(f"   ‚Ä¢ Unique glosses: {self.df['gloss'].nunique()}")
        
        # Show dataset composition
        if 'dataset' in self.df.columns:
            dataset_counts = self.df['dataset'].value_counts()
            print("   ‚Ä¢ Dataset sources:")
            for dataset_name, count in dataset_counts.items():
                print(f"      - {dataset_name}: {count} videos")
        
        print(f"{'='*60}")
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        
        # Load preprocessed frames
        npz_path = row['save_path']
        data = np.load(npz_path)
        frames = data['frames']  # (T, H, W, C), uint8, [0, 255]
        
        # Apply augmentation if enabled
        if self.augment:
            frames = self._augment_video(frames)
        
        # Convert to float32 and normalize to [0, 1]
        frames = frames.astype(np.float32) / 255.0
        
        # Convert to PyTorch tensor: (T, H, W, C) ‚Üí (C, T, H, W)
        frames = torch.from_numpy(frames).permute(3, 0, 1, 2)
        
        return {
            'frames': frames,
            'label': torch.tensor(row['label'], dtype=torch.long),
            'video_id': row['video_id'],
            'gloss': row['gloss']
        }
    
    def _augment_video(self, frames):
        """
        Apply IMPROVED data augmentation to video frames (uint8 format).
        
        6 augmentation techniques:
        1. Horizontal flip (30% chance - reduced for sign language)
        2. Temporal cropping (75-100% frames - more aggressive) + RESAMPLE to 32 frames
        3. Brightness (0.85-1.15√ó - wider range)
        4. Contrast (0.85-1.15√ó - NEW)
        5. Rotation (¬±3¬∞ - NEW, conservative for sign language)
        6. Spatial crop + resize (85-100% - NEW)
        7. Gaussian noise (œÉ=5, 20% chance - NEW)
        
        CRITICAL: Always returns exactly T frames (no shape mismatch in batching)
        
        Args:
            frames: (T, H, W, C) numpy array, uint8, range [0, 255]
        
        Returns:
            Augmented frames (T, H, W, C), uint8, range [0, 255]
        """
        T, H, W, C = frames.shape
        original_T = T  # Save original frame count to restore at the end
        
        # 1. Horizontal flip (30% chance - REDUCED for sign language)
        if np.random.rand() < 0.3:
            frames = np.flip(frames, axis=2).copy()
        
        # 2. Temporal cropping (75-100% frames - MORE AGGRESSIVE)
        # CRITICAL FIX: After cropping, resample back to original_T frames
        crop_ratio = np.random.uniform(0.75, 1.0)
        num_frames = max(int(T * crop_ratio), 16)  # At least 16 frames
        
        if num_frames < T:
            start_idx = np.random.randint(0, T - num_frames + 1)
            cropped_frames = frames[start_idx:start_idx + num_frames]
            
            # Resample back to original T frames using uniform sampling
            indices = np.linspace(0, num_frames - 1, original_T, dtype=int)
            frames = cropped_frames[indices]
            T = original_T  # Restore original frame count
        
        # 3. Brightness adjustment (0.85-1.15√ó - WIDER RANGE)
        brightness_factor = np.random.uniform(0.85, 1.15)
        frames = np.clip(frames.astype(np.float32) * brightness_factor, 0, 255).astype(np.uint8)
        
        # 4. Contrast adjustment (0.85-1.15√ó - NEW)
        contrast_factor = np.random.uniform(0.85, 1.15)
        mean = frames.mean()
        frames = np.clip((frames.astype(np.float32) - mean) * contrast_factor + mean, 0, 255).astype(np.uint8)
        
        # 5. Rotation (¬±3¬∞ - NEW, very conservative for sign language)
        angle = np.random.uniform(-3, 3)
        center = (W // 2, H // 2)
        M = cv2.getRotationMatrix2D(center, angle, 1.0)
        
        rotated_frames = np.zeros_like(frames)
        for t in range(T):
            rotated_frames[t] = cv2.warpAffine(frames[t], M, (W, H), borderMode=cv2.BORDER_REPLICATE)
        frames = rotated_frames
        
        # 6. Spatial crop + resize (85-100% - NEW)
        crop_ratio_spatial = np.random.uniform(0.85, 1.0)
        crop_h = int(H * crop_ratio_spatial)
        crop_w = int(W * crop_ratio_spatial)
        
        top = np.random.randint(0, H - crop_h + 1) if crop_h < H else 0
        left = np.random.randint(0, W - crop_w + 1) if crop_w < W else 0
        
        cropped_frames = np.zeros_like(frames)
        for t in range(T):
            cropped = frames[t, top:top+crop_h, left:left+crop_w]
            cropped_frames[t] = cv2.resize(cropped, (W, H), interpolation=cv2.INTER_LINEAR)
        frames = cropped_frames
        
        # 7. Gaussian noise (œÉ=5, 20% chance - NEW)
        if np.random.rand() < 0.2:
            noise = np.random.normal(0, 5, frames.shape).astype(np.float32)
            frames = np.clip(frames.astype(np.float32) + noise, 0, 255).astype(np.uint8)
        
        # FINAL SAFETY CHECK: Ensure output has exactly original_T frames
        if frames.shape[0] != original_T:
            # Resample to original_T frames if somehow we lost/gained frames
            indices = np.linspace(0, frames.shape[0] - 1, original_T, dtype=int)
            frames = frames[indices]
        
        return frames

print("\n‚úÖ WLASLDataset class defined with IMPROVED augmentation!")
print("   ‚Ä¢ 6 augmentation techniques (vs 3 before)")
print("   ‚Ä¢ Horizontal flip: 30% (reduced from 50%)")
print("   ‚Ä¢ Temporal crop: 75-100% (more aggressive)")
print("   ‚Ä¢ Brightness: 0.85-1.15√ó (wider range)")
print("   ‚Ä¢ Contrast: 0.85-1.15√ó (NEW)")
print("   ‚Ä¢ Rotation: ¬±3¬∞ (NEW, conservative)")
print("   ‚Ä¢ Spatial crop: 85-100% + resize (NEW)")
print("   ‚Ä¢ Gaussian noise: œÉ=5, 20% chance (NEW)")

print("\n" + "="*60)
print("‚úÖ MODEL + DATASET READY")
print("="*60)


DEFINING WLASLDATASET CLASS

üí° Model already loaded in Cell 1 - skipping model loading
   Just defining the Dataset class for preprocessing...

‚úÖ WLASLDataset class defined with IMPROVED augmentation!
   ‚Ä¢ 6 augmentation techniques (vs 3 before)
   ‚Ä¢ Horizontal flip: 30% (reduced from 50%)
   ‚Ä¢ Temporal crop: 75-100% (more aggressive)
   ‚Ä¢ Brightness: 0.85-1.15√ó (wider range)
   ‚Ä¢ Contrast: 0.85-1.15√ó (NEW)
   ‚Ä¢ Rotation: ¬±3¬∞ (NEW, conservative)
   ‚Ä¢ Spatial crop: 85-100% + resize (NEW)
   ‚Ä¢ Gaussian noise: œÉ=5, 20% chance (NEW)

‚úÖ MODEL + DATASET READY


In [8]:
# ---------- Cell 10: Create WLASL100 DataLoaders ----------
import json

import pandas as pd
import torch
from torch.utils.data import DataLoader

print("="*60)
print("CREATING WLASL100 LABEL MAPPING")
print("="*60)

# Check if we have preprocessed manifest (full pipeline)
manifest_path = os.path.join(BASE_DIR, "manifests", "wlasl100_preprocessed.csv")

if os.path.exists(manifest_path):
    # Full pipeline: Load from preprocessed manifest
    print("\n‚úÖ Found preprocessed manifest - loading from there...")
    df_preprocessed = pd.read_csv(manifest_path)
    
    print(f"\n‚úÖ Loaded manifest: {len(df_preprocessed)} videos")
    print(f"   ‚Ä¢ Train: {len(df_preprocessed[df_preprocessed['split']=='train'])}")
    print(f"   ‚Ä¢ Val: {len(df_preprocessed[df_preprocessed['split']=='val'])}")
    print(f"   ‚Ä¢ Test: {len(df_preprocessed[df_preprocessed['split']=='test'])}")
    
    # Create label mapping from preprocessed data
    unique_glosses = sorted(df_preprocessed['gloss'].unique())
    
else:
    # Quick pipeline: Load from WLASL manifest (Cell 3)
    print("\n‚ö° Preprocessed manifest not found - creating label mapping from WLASL manifest...")
    print("   (This is expected when using pre-trained model)")
    
    # Load the WLASL manifest from Cell 3
    manifest_cell3_path = os.path.join(BASE_DIR, "manifests", "wlasl100_manifest.csv")
    
    if not os.path.exists(manifest_cell3_path):
        print("\n‚ùå Error: WLASL manifest not found!")
        print("   Please run Cell 3 first to load the WLASL manifest.")
        import sys
        sys.exit(1)
    
    df_wlasl = pd.read_csv(manifest_cell3_path)
    print(f"\n‚úÖ Loaded WLASL manifest: {len(df_wlasl)} videos")
    
    # Create label mapping from all glosses in WLASL100
    unique_glosses = sorted(df_wlasl['gloss'].unique())
    print(f"   ‚Ä¢ Found {len(unique_glosses)} unique classes")

# Create label mapping (gloss name ‚Üí integer label)
gloss_to_label = {gloss: idx for idx, gloss in enumerate(unique_glosses)}
label_to_gloss = {idx: gloss for gloss, idx in gloss_to_label.items()}

print(f"\n‚úÖ Created label mapping for {len(unique_glosses)} classes")
print("\nüìã Sample labels:")
for i, (gloss, label) in enumerate(list(gloss_to_label.items())[:10]):
    print(f"   {label:2d} ‚Üí {gloss}")

# Save label mapping
label_map_path = os.path.join(BASE_DIR, "manifests", "label_mapping.json")
with open(label_map_path, 'w') as f:
    json.dump({
        'gloss_to_label': gloss_to_label,
        'label_to_gloss': label_to_gloss,
        'num_classes': len(gloss_to_label)
    }, f, indent=2)

print(f"\nüíæ Saved label mapping to: {label_map_path}")

# Only create DataLoaders if we have preprocessed data
if os.path.exists(manifest_path):
    # Add numeric labels to dataframe
    df_preprocessed['label'] = df_preprocessed['gloss'].map(gloss_to_label)
    
    # Create split DataFrames
    train_df = df_preprocessed[df_preprocessed['split'] == 'train'].copy()
    val_df = df_preprocessed[df_preprocessed['split'] == 'val'].copy()
    test_df = df_preprocessed[df_preprocessed['split'] == 'test'].copy()
    
    print("\nüìä Split distribution:")
    print(f"   ‚Ä¢ Train: {len(train_df)} videos")
    print(f"   ‚Ä¢ Val: {len(val_df)} videos")
    print(f"   ‚Ä¢ Test: {len(test_df)} videos")
    
    # Create datasets
    train_dataset = WLASLDataset(train_df, augment=True)
    val_dataset = WLASLDataset(val_df, augment=False)
    test_dataset = WLASLDataset(test_df, augment=False)
    
    print("\n‚úÖ Created datasets:")
    print(f"   ‚Ä¢ Train: {len(train_dataset)} samples (augmented)")
    print(f"   ‚Ä¢ Val: {len(val_dataset)} samples")
    print(f"   ‚Ä¢ Test: {len(test_dataset)} samples")
    
    # Create DataLoaders
    batch_size = 8
    num_workers = 2
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
        drop_last=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )
    
    print(f"\n‚úÖ Created DataLoaders (batch_size={batch_size}):")
    print(f"   ‚Ä¢ Train: {len(train_loader)} batches")
    print(f"   ‚Ä¢ Val: {len(val_loader)} batches")
    print(f"   ‚Ä¢ Test: {len(test_loader)} batches")
    
    print("\n‚úÖ Ready for training (Cell 11)")
else:
    print("\n‚ö° Skipping DataLoader creation (not needed for Citizen training)")

print("\n" + "="*60)
print("‚úÖ CELL 10 COMPLETE")
print("="*60)

CREATING WLASL100 LABEL MAPPING

‚ö° Preprocessed manifest not found - creating label mapping from WLASL manifest...
   (This is expected when using pre-trained model)

‚úÖ Loaded WLASL manifest: 2038 videos
   ‚Ä¢ Found 100 unique classes

‚úÖ Created label mapping for 100 classes

üìã Sample labels:
    0 ‚Üí accident
    1 ‚Üí africa
    2 ‚Üí all
    3 ‚Üí apple
    4 ‚Üí basketball
    5 ‚Üí bed
    6 ‚Üí before
    7 ‚Üí bird
    8 ‚Üí birthday
    9 ‚Üí black

üíæ Saved label mapping to: /kaggle/working/WASL/manifests/label_mapping.json

‚ö° Skipping DataLoader creation (not needed for Citizen training)

‚úÖ CELL 10 COMPLETE


In [None]:
# ---------- Cell 11: Training Loop (Official WLASL Config) ----------

import os
import time

import torch
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Check if model exists
if 'model' not in dir():
    raise RuntimeError("‚ùå ERROR: Model not found! Run Cell 8 (Load I3D Model) first!")

# Check if dataloaders exist
if 'train_loader' not in dir() or 'val_loader' not in dir():
    raise RuntimeError("‚ùå ERROR: DataLoaders not found! Run Cell 9 (Create DataLoaders) first!")

print("‚úÖ Model and DataLoaders found")

# Clear CUDA cache
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("üîÑ CUDA cache cleared")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Training config - OFFICIAL WLASL SETTINGS
config = {
    'num_epochs': 100,
    'learning_rate': 1e-4,  # Official WLASL
    'weight_decay': 1e-8,   # Official WLASL 
    'adam_eps': 1e-3,       # Official WLASL
    'patience': 10,
    'grad_clip': 1.0,
    'use_amp': False,      
}

print("="*60)
print("TRAINING SETUP (OFFICIAL WLASL CONFIG)")
print("="*60)
print(f"Device: {device}")
print(f"Epochs: {config['num_epochs']}")
print(f"LR: {config['learning_rate']} | Weight Decay: {config['weight_decay']}")
print(f"Train batches: {len(train_loader)} | Val batches: {len(val_loader)}")

# Optimizer - OFFICIAL WLASL USES ADAM (NOT AdamW) with SAME LR for all layers
optimizer = optim.Adam(
    model.parameters(),
    lr=config['learning_rate'],
    weight_decay=config['weight_decay'],
    eps=config['adam_eps']
)

# Learning rate scheduler - OFFICIAL WLASL USES ReduceLROnPlateau

scheduler = ReduceLROnPlateau(
    optimizer,
    mode='min',
    patience=5,
    factor=0.3,
    verbose=True
)

criterion = nn.CrossEntropyLoss()

# Training tracking
history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [], 'learning_rates': []}
best_val_loss = float('inf')
best_val_acc = 0.0
patience_counter = 0
best_epoch = 0
checkpoint_dir = os.path.join(BASE_DIR, "models", "checkpoints")
os.makedirs(checkpoint_dir, exist_ok=True)


def train_epoch(model, dataloader, criterion, optimizer, device, epoch):
    """Train for one epoch - Official WLASL style (no FP16, standard PyTorch)."""
    model.train()
    
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(dataloader, desc=f"Epoch {epoch+1} [Train]")
    
    for batch_idx, batch in enumerate(pbar):
        frames = batch['frames'].to(device)
        labels = batch['label'].to(device)
        
        optimizer.zero_grad()
        
        outputs = model(frames)
        
        # I3D outputs: [batch, classes, time] - average over time
        if outputs.dim() == 3:
            outputs = outputs.mean(dim=2)
        
        loss = criterion(outputs, labels)
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), config['grad_clip'])
        optimizer.step()
        
        # Calculate accuracy (move to CPU for computation)
        with torch.no_grad():
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
            running_loss += loss.item()
        
        # Update progress bar
        avg_loss = running_loss / (batch_idx + 1)
        avg_acc = 100 * correct / total
        pbar.set_postfix({
            'loss': f'{avg_loss:.4f}',
            'acc': f'{avg_acc:.2f}%'
        })
    
    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100 * correct / total
    
    return epoch_loss, epoch_acc


def validate(model, dataloader, criterion, device, epoch):
    """Validate on validation set - Official WLASL style."""
    model.eval()
    
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(dataloader, desc=f"Epoch {epoch+1} [Val]  ")
    
    with torch.no_grad():
        for batch_idx, batch in enumerate(pbar):
            frames = batch['frames'].to(device)
            labels = batch['label'].to(device)
            
            outputs = model(frames)
            
            # I3D outputs: [batch, classes, time] - average over time
            if outputs.dim() == 3:
                outputs = outputs.mean(dim=2)
            
            loss = criterion(outputs, labels)
            
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
            running_loss += loss.item()
            
            avg_loss = running_loss / (batch_idx + 1)
            avg_acc = 100 * correct / total
            pbar.set_postfix({
                'loss': f'{avg_loss:.4f}',
                'acc': f'{avg_acc:.2f}%'
            })
    
    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100 * correct / total
    
    return epoch_loss, epoch_acc

# Main training loop
print("\n" + "="*60)
print("STARTING TRAINING")
print("="*60 + "\n")

start_time = time.time()

for epoch in range(config['num_epochs']):
    epoch_start = time.time()
    
    # Train
    train_loss, train_acc = train_epoch(
        model, train_loader, criterion, optimizer, device, epoch
    )
    
    # Validate
    val_loss, val_acc = validate(
        model, val_loader, criterion, device, epoch
    )
    
    # Update scheduler (ReduceLROnPlateau - needs val_loss)
    scheduler.step(val_loss)
    current_lr = optimizer.param_groups[0]['lr']
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    history['learning_rates'].append(current_lr)
    
    # Calculate times
    epoch_time = time.time() - epoch_start
    elapsed_time = time.time() - start_time
    
    # Print summary
    print(f"\n{'='*60}")
    print(f"Epoch {epoch+1}/{config['num_epochs']} Summary:")
    print(f"{'='*60}")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
    print(f"Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.2f}%")
    print(f"Learning Rate: {current_lr:.6f}")
    print(f"Epoch Time: {epoch_time:.1f}s | Total Time: {elapsed_time/60:.1f}min")
    
    # GPU memory stats
    if epoch == 0:
        allocated = torch.cuda.memory_allocated(0) / 1e9
        reserved = torch.cuda.memory_reserved(0) / 1e9
        print(f"GPU Memory: {allocated:.2f} GB allocated, {reserved:.2f} GB reserved")
    
    # Check if best model
    is_best = val_acc > best_val_acc
    
    if is_best:
        best_val_acc = val_acc
        best_val_loss = val_loss
        best_epoch = epoch + 1
        patience_counter = 0
        
        # Save best model
        best_model_path = os.path.join(checkpoint_dir, "best_model.pth")
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'train_loss': train_loss,
            'train_acc': train_acc,
            'val_loss': val_loss,
            'val_acc': val_acc,
            'history': history
        }, best_model_path)
        
        print(f"‚úÖ New best model saved! (Val Acc: {val_acc:.2f}%)")
    else:
        patience_counter += 1
        print(f"‚è≥ No improvement. Patience: {patience_counter}/{config['patience']}")
    
    print(f"üèÜ Best Val Acc so far: {best_val_acc:.2f}% (Epoch {best_epoch})")
    print("="*60 + "\n")
    
    # Early stopping
    if patience_counter >= config['patience']:
        print(f"\nüõë Early stopping triggered after {epoch+1} epochs")
        print(f"   No improvement for {config['patience']} epochs")
        print(f"   Best model from epoch {best_epoch} will be used")
        break
    
    # Save checkpoint every 5 epochs
    if (epoch + 1) % 5 == 0:
        checkpoint_path = os.path.join(checkpoint_dir, f"checkpoint_epoch_{epoch+1}.pth")
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'history': history
        }, checkpoint_path)
        print(f"üíæ Checkpoint saved: epoch_{epoch+1}.pth\n")
    
    # Clear cache every 3 epochs (helps with memory fragmentation)
    if (epoch + 1) % 3 == 0:
        torch.cuda.empty_cache()

# Training complete
total_time = time.time() - start_time

print("\n" + "="*60)
print("TRAINING COMPLETE")
print("="*60)
print("\nüìä Final Results:")
print(f"   ‚Ä¢ Total epochs: {len(history['train_loss'])}")
print(f"   ‚Ä¢ Total time: {total_time/60:.1f} minutes")
print(f"   ‚Ä¢ Best validation accuracy: {best_val_acc:.2f}% (Epoch {best_epoch})")
print(f"   ‚Ä¢ Best validation loss: {best_val_loss:.4f}")
print(f"\n   ‚Ä¢ Final train accuracy: {history['train_acc'][-1]:.2f}%")
print(f"   ‚Ä¢ Final val accuracy: {history['val_acc'][-1]:.2f}%")

# Load best model
print(f"\nüîÑ Loading best model (epoch {best_epoch})...")
best_checkpoint = torch.load(os.path.join(checkpoint_dir, "best_model.pth"))
model.load_state_dict(best_checkpoint['model_state_dict'])
print("‚úÖ Best model loaded")

# Save training history
history_path = os.path.join(BASE_DIR, "models", "training_history.json")
with open(history_path, 'w') as f:
    json.dump(history, f, indent=2)
print(f"\nüíæ Training history saved to: {history_path}")
print("\n‚úÖ Ready for next cell - evaluation on test set")

In [9]:
# ---------- Cell 12: Load ASL Citizen Dataset ----------
import json
import os

import pandas as pd

print("="*60)
print("LOADING ASL CITIZEN DATASET")
print("="*60)

# Check if ASL Citizen dataset is added as Kaggle input
CITIZEN_INPUT = "/kaggle/input/asl-citizen"

if os.path.exists(CITIZEN_INPUT):
    print("‚úÖ ASL Citizen dataset found!")
    print(f"   Path: {CITIZEN_INPUT}")
    
    # Check for ASL_Citizen subdirectory
    asl_citizen_subdir = os.path.join(CITIZEN_INPUT, "ASL_Citizen")
    if os.path.exists(asl_citizen_subdir):
        CITIZEN_ROOT = asl_citizen_subdir
        print("‚úÖ Found ASL_Citizen subdirectory!")
    else:
        CITIZEN_ROOT = CITIZEN_INPUT
        print("‚ö†Ô∏è No ASL_Citizen subdirectory, using root")
    
    # Check videos and splits folders
    CITIZEN_VIDEOS = os.path.join(CITIZEN_ROOT, "videos")
    CITIZEN_SPLITS = os.path.join(CITIZEN_ROOT, "splits")
    
    if os.path.exists(CITIZEN_VIDEOS) and os.path.exists(CITIZEN_SPLITS):
        video_count = len([f for f in os.listdir(CITIZEN_VIDEOS) if f.endswith(('.mp4', '.MP4'))])
        print(f"\n‚úÖ Found {video_count} videos")
        print("‚úÖ ASL Citizen paths configured:")
        print(f"   ‚Ä¢ Videos: {CITIZEN_VIDEOS}")
        print(f"   ‚Ä¢ Splits: {CITIZEN_SPLITS}")
    else:
        print("\n‚ùå ERROR: Required folders not found!")
        import sys
        sys.exit(1)
else:
    print("‚ö†Ô∏è ASL Citizen dataset not found in inputs!")
    print("\nüìù To add the dataset:")
    print("   1. Click 'Add Data' button")
    print("   2. Search for 'asl-citizen'")
    print("   3. Add and rerun")
    import sys
    sys.exit(1)

print("\n" + "="*60)
print("‚úÖ ASL Citizen dataset loaded")
print("="*60)

LOADING ASL CITIZEN DATASET
‚úÖ ASL Citizen dataset found!
   Path: /kaggle/input/asl-citizen
‚úÖ Found ASL_Citizen subdirectory!

‚úÖ Found 83399 videos
‚úÖ ASL Citizen paths configured:
   ‚Ä¢ Videos: /kaggle/input/asl-citizen/ASL_Citizen/videos
   ‚Ä¢ Splits: /kaggle/input/asl-citizen/ASL_Citizen/splits

‚úÖ ASL Citizen dataset loaded


In [10]:
# ---------- Cell 13: Parse Citizen & Prepare 100 Labels (OPTIMIZED) ----------
import json
import os

import pandas as pd

print("="*60)
print("PREPARING CITIZEN 100 LABEL DATASET (OPTIMIZED)")
print("="*60)

# Load WLASL100 labels (REQUIRED for this strategy)
wlasl_label_path = os.path.join(BASE_DIR, "manifests", "label_mapping.json")
if os.path.exists(wlasl_label_path):
    with open(wlasl_label_path, 'r') as f:
        wlasl_mapping = json.load(f)
    wlasl100_glosses = set(wlasl_mapping['gloss_to_label'].keys())
    print(f"\n‚úÖ Loaded WLASL100 labels: {len(wlasl100_glosses)} glosses")
else:
    print("\n‚ùå Error: WLASL100 labels not found!")
    print("   Please run Cell 3 and Cell 10 first.")
    import sys
    sys.exit(1)

# Citizen dataset paths
CITIZEN_BASE = "/kaggle/input/asl-citizen/ASL_Citizen"
CITIZEN_VIDEOS = os.path.join(CITIZEN_BASE, "videos")
CITIZEN_SPLITS = os.path.join(CITIZEN_BASE, "splits")

print("\nüîç Building video filename index...")

# Build video filename index: gloss -> list of video filenames
# Format: "000017451997373907346-LIBRARY.mp4" -> gloss = "library"
from collections import defaultdict

video_files_by_gloss = defaultdict(list)

for video_file in os.listdir(CITIZEN_VIDEOS):
    if not video_file.endswith(('.mp4', '.MP4')):
        continue
    
    # Extract gloss from filename: "000017451997373907346-LIBRARY.mp4" -> "library"
    if '-' in video_file:
        gloss_part = video_file.split('-')[-1].replace('.mp4', '').replace('.MP4', '')
        gloss = ''.join([c for c in gloss_part if not c.isdigit()]).strip().lower()
        video_files_by_gloss[gloss].append(video_file)

print(f"‚úÖ Indexed {len(video_files_by_gloss)} unique glosses")
print(f"   ‚Ä¢ Total videos: {sum(len(v) for v in video_files_by_gloss.values())}")

# Parse splits and match with videos
print("\nüîç Matching CSV splits with videos...")

from collections import defaultdict

citizen_inventory = defaultdict(list)

for split_file in ['train.csv', 'val.csv', 'test.csv']:
    split_path = os.path.join(CITIZEN_SPLITS, split_file)
    if not os.path.exists(split_path):
        continue
    
    split_name = split_file.replace('.csv', '')
    df_split = pd.read_csv(split_path)
    
    print(f"\nüìã Parsing {split_file}: {len(df_split)} rows")
    
    gloss_column = 'Gloss' if 'Gloss' in df_split.columns else df_split.columns[0]
    
    for idx, row in df_split.iterrows():
        gloss_raw = str(row[gloss_column])
        
        # Clean: "APPLE" -> "apple", "SOCCER2" -> "soccer"
        gloss = ''.join([c for c in gloss_raw if not c.isdigit()]).strip().lower()
        
        # Check if we have videos for this gloss
        if gloss in video_files_by_gloss and len(video_files_by_gloss[gloss]) > 0:
            # Take first available video
            video_filename = video_files_by_gloss[gloss].pop(0)
            video_path = os.path.join(CITIZEN_VIDEOS, video_filename)
            video_id = video_filename.replace('.mp4', '').replace('.MP4', '')
            
            citizen_inventory[gloss].append({
                'video_id': video_id,
                'video_filename': video_filename,
                'video_path': video_path,
                'split': split_name,
                'gloss': gloss
            })

# Count videos per gloss
gloss_counts = {gloss: len(videos) for gloss, videos in citizen_inventory.items()}
print(f"\n‚úÖ Matched {len(gloss_counts)} unique glosses")
print(f"   ‚Ä¢ Total videos: {sum(gloss_counts.values())}")

# STRATEGY: WLASL intersect + top non-intersect = 100
print("\n" + "="*60)
print("OPTIMIZED STRATEGY: WLASL INTERSECT + TOP NON-INTERSECT")
print("="*60)

# 1. Find WLASL100 intersection
citizen_glosses = set(gloss_counts.keys())
intersection = wlasl100_glosses.intersection(citizen_glosses)

print(f"\n1Ô∏è‚É£ WLASL100 ‚à© Citizen: {len(intersection)} glosses")
intersection_videos = sum([gloss_counts[g] for g in intersection])
print(f"   ‚Ä¢ Total videos: {intersection_videos}")

# 2. Get top non-intersect glosses by video count
non_intersect_glosses = [g for g in citizen_glosses if g not in intersection]
non_intersect_sorted = sorted(non_intersect_glosses, key=lambda g: gloss_counts[g], reverse=True)

# Select to fill to 100
num_to_add = 100 - len(intersection)
if num_to_add > 0:
    additional_glosses = non_intersect_sorted[:num_to_add]
    print(f"\n2Ô∏è‚É£ Adding top {num_to_add} non-intersect glosses:")
    for g in additional_glosses[:5]:
        print(f"   ‚Ä¢ {g}: {gloss_counts[g]} videos")
    if num_to_add > 5:
        print(f"   ‚Ä¢ ... and {num_to_add - 5} more")
else:
    additional_glosses = []
    print("\n2Ô∏è‚É£ No additional glosses needed")

# 3. Combine final selection
selected_glosses = list(intersection) + additional_glosses

print(f"\n‚úÖ FINAL SELECTION: {len(selected_glosses)} glosses")
print(f"   ‚Ä¢ WLASL intersect: {len(intersection)}")
print(f"   ‚Ä¢ Additional: {len(additional_glosses)}")
print(f"   ‚Ä¢ WLASL overlap: {len(intersection)}/{len(wlasl100_glosses)} ({len(intersection)/len(wlasl100_glosses)*100:.1f}%)")

# 4. Extract all videos for selected glosses
citizen_records = []
for gloss in selected_glosses:
    for video_info in citizen_inventory[gloss]:
        citizen_records.append({
            'video_id': video_info['video_id'],
            'video_filename': video_info['video_filename'],
            'gloss': gloss,
            'split': video_info['split'],
            'video_path': video_info['video_path'],
            'dataset': 'citizen'
        })

df_citizen_100 = pd.DataFrame(citizen_records)

print("\nüìä Citizen 100 dataset:")
print(f"   ‚Ä¢ Total videos: {len(df_citizen_100)}")
print(f"   ‚Ä¢ Train: {len(df_citizen_100[df_citizen_100['split']=='train'])}")
print(f"   ‚Ä¢ Val: {len(df_citizen_100[df_citizen_100['split']=='val'])}")
print(f"   ‚Ä¢ Test: {len(df_citizen_100[df_citizen_100['split']=='test'])}")

# Create label mapping
unique_glosses_final = sorted(df_citizen_100['gloss'].unique())
gloss_to_label_final = {gloss: idx for idx, gloss in enumerate(unique_glosses_final)}
label_to_gloss_final = {idx: gloss for gloss, idx in gloss_to_label_final.items()}

df_citizen_100['label'] = df_citizen_100['gloss'].map(gloss_to_label_final)

# Save manifest and label mapping
citizen_manifest = os.path.join(BASE_DIR, "manifests", "citizen_100.csv")
df_citizen_100.to_csv(citizen_manifest, index=False)

label_map_path = os.path.join(BASE_DIR, "manifests", "label_mapping_100.json")
with open(label_map_path, 'w') as f:
    json.dump({
        'gloss_to_label': gloss_to_label_final,
        'label_to_gloss': label_to_gloss_final,
        'num_classes': len(gloss_to_label_final),
        'intersection_labels': list(intersection),
        'additional_labels': additional_glosses,
        'total_wlasl_overlap': len(intersection)
    }, f, indent=2)

# Save intersection analysis
intersection_analysis = {
    'wlasl100_total': len(wlasl100_glosses),
    'citizen_total': len(citizen_glosses),
    'intersection_count': len(intersection),
    'intersection_labels': sorted(list(intersection)),
    'missing_from_citizen': sorted(list(wlasl100_glosses - citizen_glosses)),
    'citizen_final_count': len(selected_glosses),
    'citizen_final_wlasl_overlap': len(intersection)
}

intersection_path = os.path.join(BASE_DIR, "manifests", "wlasl_citizen_intersection.json")
with open(intersection_path, 'w') as f:
    json.dump(intersection_analysis, f, indent=2)

print("\nüíæ Saved:")
print(f"   ‚Ä¢ {citizen_manifest}")
print(f"   ‚Ä¢ {label_map_path}")
print(f"   ‚Ä¢ {intersection_path}")

print("\n" + "="*60)
print("‚úÖ OPTIMIZED STRATEGY APPLIED!")
print(f"‚úÖ {len(selected_glosses)} total glosses")
print(f"‚úÖ {len(intersection)}/{len(wlasl100_glosses)} WLASL100 glosses included")
print("‚úÖ MAXIMUM transfer learning overlap!")
print("="*60)

PREPARING CITIZEN 100 LABEL DATASET (OPTIMIZED)

‚úÖ Loaded WLASL100 labels: 100 glosses

üîç Building video filename index...
‚úÖ Indexed 4593 unique glosses
   ‚Ä¢ Total videos: 83399

üîç Matching CSV splits with videos...

üìã Parsing train.csv: 40154 rows

üìã Parsing val.csv: 10304 rows

üìã Parsing test.csv: 32941 rows

‚úÖ Matched 2018 unique glosses
   ‚Ä¢ Total videos: 71943

OPTIMIZED STRATEGY: WLASL INTERSECT + TOP NON-INTERSECT

1Ô∏è‚É£ WLASL100 ‚à© Citizen: 98 glosses
   ‚Ä¢ Total videos: 4129

2Ô∏è‚É£ Adding top 2 non-intersect glosses:
   ‚Ä¢ shave: 148 videos
   ‚Ä¢ erase: 147 videos

‚úÖ FINAL SELECTION: 100 glosses
   ‚Ä¢ WLASL intersect: 98
   ‚Ä¢ Additional: 2
   ‚Ä¢ WLASL overlap: 98/100 (98.0%)

üìä Citizen 100 dataset:
   ‚Ä¢ Total videos: 4424
   ‚Ä¢ Train: 2192
   ‚Ä¢ Val: 560
   ‚Ä¢ Test: 1672

üíæ Saved:
   ‚Ä¢ /kaggle/working/WASL/manifests/citizen_100.csv
   ‚Ä¢ /kaggle/working/WASL/manifests/label_mapping_100.json
   ‚Ä¢ /kaggle/working/WASL/manife

In [12]:
# ---------- Cell 14: Preprocess Citizen 100 Videos ----------
from pathlib import Path

import cv2
import numpy as np

print("="*60)
print("PREPROCESSING CITIZEN 100 VIDEOS")
print("="*60)

# Define preprocessing config (SAME AS WLASL - 32 FRAMES!)
PREPROCESS_CONFIG = {
    'target_fps': 25,
    'target_frames': 32,  # SAME as WLASL and I3D pretraining!
    'target_size': (224, 224),
    'normalize': True,
}

print("\nüí° Using SAME config as WLASL (32 frames):")
print(f"   ‚Ä¢ Frames: {PREPROCESS_CONFIG['target_frames']}")
print(f"   ‚Ä¢ Size: {PREPROCESS_CONFIG['target_size']}")
print("   ‚Ä¢ Perfect consistency with I3D pretraining & WLASL100!")

# Load video utility function
def load_video_frames(video_path, target_frames=32, target_size=(224, 224)):
    """
    Load video and extract uniformly sampled frames.
    
    Args:
        video_path: Path to video file
        target_frames: Number of frames to extract
        target_size: Target spatial size (H, W)
    
    Returns:
        frames: numpy array of shape (T, H, W, C) - uint8 [0-255]
        original_frames: original frame count
        original_fps: original FPS
    """
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        raise ValueError(f"Cannot open video: {video_path}")
    
    # Get video properties
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    # Calculate frame indices to sample uniformly
    if total_frames < target_frames:
        indices = np.linspace(0, total_frames - 1, target_frames, dtype=int)
    else:
        indices = np.linspace(0, total_frames - 1, target_frames, dtype=int)
    
    frames = []
    for idx in indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        
        if not ret:
            if len(frames) > 0:
                frame = frames[-1].copy()
            else:
                raise ValueError(f"Cannot read frame {idx} from {video_path}")
        
        # Convert BGR to RGB
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Resize to target size
        frame = cv2.resize(frame, target_size, interpolation=cv2.INTER_LINEAR)
        
        frames.append(frame)
    
    cap.release()
    
    # Stack frames: (T, H, W, C)
    frames = np.stack(frames, axis=0).astype(np.uint8)
    
    return frames, total_frames, fps

# Create directories (ONLY train and val - no test to save storage!)
CITIZEN_PREPROCESSED = os.path.join(BASE_DIR, "preprocessed_citizen_100")
for split in ['train', 'val']:
    os.makedirs(os.path.join(CITIZEN_PREPROCESSED, split), exist_ok=True)

print(f"\nüìÅ Output: {CITIZEN_PREPROCESSED}")
print("‚ö†Ô∏è ONLY processing train + val (test skipped to save ~2GB storage)")

# Check existing preprocessed videos (SMART SKIP)
existing_counts = {}
for split_name in ['train', 'val']:
    split_dir = os.path.join(CITIZEN_PREPROCESSED, split_name)
    existing_counts[split_name] = len(list(Path(split_dir).glob("*.npz")))

total_existing = sum(existing_counts.values())
print(f"\nüìä Found {total_existing} already preprocessed videos:")
for split_name, count in existing_counts.items():
    print(f"   ‚Ä¢ {split_name}: {count} videos")

if total_existing > 0:
    print("\nüí° These videos will be SKIPPED (fast!)")

# Process ONLY train and val splits
processed_records = []
failed_videos = []

for split in ['train', 'val']:
    df_split = df_citizen_100[df_citizen_100['split'] == split].copy()
    
    if len(df_split) == 0:
        continue
    
    print(f"\n{'='*60}")
    print(f"Processing {split.upper()} split: {len(df_split)} videos")
    print(f"{'='*60}")
    
    split_dir = os.path.join(CITIZEN_PREPROCESSED, split)
    skipped_videos = 0
    
    for idx, row in tqdm(df_split.iterrows(), total=len(df_split), desc=f"{split}"):
        video_id = row['video_id']
        video_path = row['video_path']
        gloss = row['gloss']
        
        # Smart skip: check if already preprocessed
        save_path = os.path.join(split_dir, f"{video_id}.npz")
        
        if os.path.exists(save_path):
            # Skip processing - load metadata
            try:
                data = np.load(save_path)
                frames = data['frames']
                
                processed_records.append({
                    'video_id': video_id,
                    'gloss': gloss,
                    'split': split,
                    'save_path': save_path,
                    'original_frames': -1,
                    'original_fps': -1,
                    'processed_frames': frames.shape[0],
                    'frame_shape': frames.shape[1:],
                    'dataset': 'citizen'
                })
                skipped_videos += 1
                continue
                
            except Exception:
                print(f"\n‚ö†Ô∏è Corrupted file {video_id}, reprocessing...")
        
        try:
            # Load and preprocess video (32 frames)
            frames, orig_frames, orig_fps = load_video_frames(
                video_path,
                target_frames=PREPROCESS_CONFIG['target_frames'],
                target_size=PREPROCESS_CONFIG['target_size']
            )
            
            # Save as compressed .npz (uint8 for storage efficiency)
            np.savez_compressed(save_path, frames=frames)
            
            # Record metadata
            processed_records.append({
                'video_id': video_id,
                'gloss': gloss,
                'split': split,
                'save_path': save_path,
                'original_frames': orig_frames,
                'original_fps': orig_fps,
                'processed_frames': frames.shape[0],
                'frame_shape': frames.shape[1:],
                'dataset': 'citizen'
            })
            
        except Exception as e:
            failed_videos.append({
                'video_id': video_id,
                'gloss': gloss,
                'error': str(e)
            })
            print(f"\n‚ö†Ô∏è Failed: {video_id} ({gloss}): {e}")
    
    # Summary
    print(f"\n‚úÖ {split.upper()} complete:")
    print(f"   ‚Ä¢ Total: {len(df_split)}")
    print(f"   ‚Ä¢ Skipped: {skipped_videos}")
    print(f"   ‚Ä¢ Newly processed: {len([r for r in processed_records if r['split']==split]) - skipped_videos}")
    print(f"   ‚Ä¢ Failed: {len([f for f in failed_videos if f.get('split')==split])}")

# Create preprocessed manifest
df_preprocessed_citizen = pd.DataFrame(processed_records)

# Save manifest
citizen_preprocessed_manifest = os.path.join(BASE_DIR, "manifests", "citizen_100_preprocessed.csv")
df_preprocessed_citizen.to_csv(citizen_preprocessed_manifest, index=False)

# Calculate storage (train + val only)
total_size = 0
for split_name in ['train', 'val']:
    split_dir = os.path.join(CITIZEN_PREPROCESSED, split_name)
    for npz_file in Path(split_dir).glob("*.npz"):
        total_size += npz_file.stat().st_size

print("\n" + "="*60)
print("PREPROCESSING COMPLETE (Train + Val only)")
print("="*60)
print("\nüìä Statistics:")
print(f"   ‚Ä¢ Total processed: {len(df_preprocessed_citizen)}")
print(f"   ‚Ä¢ Train: {len([r for r in processed_records if r['split']=='train'])}")
print(f"   ‚Ä¢ Val: {len([r for r in processed_records if r['split']=='val'])}")
print(f"   ‚Ä¢ Failed: {len(failed_videos)}")

print(f"\nüíæ Storage: {total_size / (1024**3):.2f} GB")
print(f"   ‚Ä¢ Avg per video: {total_size / len(df_preprocessed_citizen) / (1024**2):.2f} MB")

print(f"\n‚úÖ Manifest saved: {citizen_preprocessed_manifest}")
print("\n‚úÖ Ready for Cell 15 - Create DataLoaders")

PREPROCESSING CITIZEN 100 VIDEOS

üí° Using SAME config as WLASL (32 frames):
   ‚Ä¢ Frames: 32
   ‚Ä¢ Size: (224, 224)
   ‚Ä¢ Perfect consistency with I3D pretraining & WLASL100!

üìÅ Output: /kaggle/working/WASL/preprocessed_citizen_100
‚ö†Ô∏è ONLY processing train + val (test skipped to save ~2GB storage)

üìä Found 1698 already preprocessed videos:
   ‚Ä¢ train: 1698 videos
   ‚Ä¢ val: 0 videos

üí° These videos will be SKIPPED (fast!)

Processing TRAIN split: 2192 videos


train: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2192/2192 [12:19<00:00,  2.96it/s]



‚úÖ TRAIN complete:
   ‚Ä¢ Total: 2192
   ‚Ä¢ Skipped: 1698
   ‚Ä¢ Newly processed: 494
   ‚Ä¢ Failed: 0

Processing VAL split: 560 videos


val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 560/560 [12:48<00:00,  1.37s/it]


‚úÖ VAL complete:
   ‚Ä¢ Total: 560
   ‚Ä¢ Skipped: 0
   ‚Ä¢ Newly processed: 560
   ‚Ä¢ Failed: 0

PREPROCESSING COMPLETE (Train + Val only)

üìä Statistics:
   ‚Ä¢ Total processed: 2752
   ‚Ä¢ Train: 2192
   ‚Ä¢ Val: 560
   ‚Ä¢ Failed: 0

üíæ Storage: 8.17 GB
   ‚Ä¢ Avg per video: 3.04 MB

‚úÖ Manifest saved: /kaggle/working/WASL/manifests/citizen_100_preprocessed.csv

‚úÖ Ready for Cell 15 - Create DataLoaders





In [13]:
# ---------- VERIFY Cell 14 Output ----------
import os
from pathlib import Path

print("="*60)
print("VERIFYING CELL 14 OUTPUT")
print("="*60)

# Check manifests directory
manifests_dir = os.path.join(BASE_DIR, "manifests")
print(f"\nüìÅ Manifests directory: {manifests_dir}")
print(f"   Exists: {os.path.exists(manifests_dir)}")

if os.path.exists(manifests_dir):
    print("\nüìÑ Files in manifests:")
    for f in os.listdir(manifests_dir):
        file_path = os.path.join(manifests_dir, f)
        size_mb = os.path.getsize(file_path) / (1024**2)
        print(f"   ‚Ä¢ {f} ({size_mb:.2f} MB)")

# Check preprocessed directory
preprocessed_dir = os.path.join(BASE_DIR, "preprocessed_citizen_100")
print(f"\nüìÅ Preprocessed directory: {preprocessed_dir}")
print(f"   Exists: {os.path.exists(preprocessed_dir)}")

if os.path.exists(preprocessed_dir):
    for split in ['train', 'val']:
        split_dir = os.path.join(preprocessed_dir, split)
        if os.path.exists(split_dir):
            count = len(list(Path(split_dir).glob("*.npz")))
            print(f"   ‚Ä¢ {split}: {count} videos")

# Check specific files Cell 15 needs
required_files = [
    "citizen_100_preprocessed.csv",
    "label_mapping_100.json"
]

print("\nüîç Files Cell 15 needs:")
for req_file in required_files:
    file_path = os.path.join(manifests_dir, req_file)
    exists = os.path.exists(file_path)
    status = "‚úÖ" if exists else "‚ùå"
    print(f"   {status} {req_file}")
    if exists:
        size_kb = os.path.getsize(file_path) / 1024
        print(f"      Size: {size_kb:.2f} KB")

print("\n" + "="*60)

VERIFYING CELL 14 OUTPUT

üìÅ Manifests directory: /kaggle/working/WASL/manifests
   Exists: True

üìÑ Files in manifests:
   ‚Ä¢ label_mapping_100.json (0.00 MB)
   ‚Ä¢ wlasl100_available.csv (0.10 MB)
   ‚Ä¢ label_mapping.json (0.00 MB)
   ‚Ä¢ wlasl100_manifest.csv (0.09 MB)
   ‚Ä¢ citizen_100.csv (0.62 MB)
   ‚Ä¢ wlasl_citizen_intersection.json (0.00 MB)
   ‚Ä¢ citizen_100_preprocessed.csv (0.40 MB)

üìÅ Preprocessed directory: /kaggle/working/WASL/preprocessed_citizen_100
   Exists: True
   ‚Ä¢ train: 2192 videos
   ‚Ä¢ val: 560 videos

üîç Files Cell 15 needs:
   ‚úÖ citizen_100_preprocessed.csv
      Size: 411.14 KB
   ‚úÖ label_mapping_100.json
      Size: 4.89 KB



In [14]:
# ---------- Cell 15: Create Citizen 100 DataLoaders ----------
import json

import pandas as pd
import torch
from torch.utils.data import DataLoader

print("="*60)
print("CREATING CITIZEN 100 DATALOADERS")
print("="*60)

# Load preprocessed manifest
citizen_prep_manifest = os.path.join(BASE_DIR, "manifests", "citizen_100_preprocessed.csv")
df_citizen_100_prep = pd.read_csv(citizen_prep_manifest)

# Load label mapping
with open(os.path.join(BASE_DIR, "manifests", "label_mapping_100.json"), 'r') as f:
    label_mapping_100 = json.load(f)
    num_classes_100 = label_mapping_100['num_classes']
    gloss_to_label = label_mapping_100['gloss_to_label']

print(f"\n‚úÖ Loaded Citizen 100: {len(df_citizen_100_prep)} videos")
print(f"‚úÖ Classes: {num_classes_100}")

# CRITICAL FIX: Add 'label' column using gloss_to_label mapping
df_citizen_100_prep['label'] = df_citizen_100_prep['gloss'].map(gloss_to_label)

# Verify all labels were mapped successfully
if df_citizen_100_prep['label'].isna().any():
    print(f"\n‚ö†Ô∏è WARNING: {df_citizen_100_prep['label'].isna().sum()} videos have missing labels!")
    missing_glosses = df_citizen_100_prep[df_citizen_100_prep['label'].isna()]['gloss'].unique()
    print(f"   Missing glosses: {missing_glosses}")
else:
    print(f"‚úÖ All {len(df_citizen_100_prep)} videos successfully labeled")

# Create splits (ONLY train and val - no test)
train_df_100 = df_citizen_100_prep[df_citizen_100_prep['split'] == 'train'].copy()
val_df_100 = df_citizen_100_prep[df_citizen_100_prep['split'] == 'val'].copy()

print("\nüìä Split distribution:")
print(f"   ‚Ä¢ Train: {len(train_df_100)}")
print(f"   ‚Ä¢ Val: {len(val_df_100)}")

# Create datasets (with improved augmentation!)
train_dataset_100 = WLASLDataset(train_df_100, augment=True)
val_dataset_100 = WLASLDataset(val_df_100, augment=False)

# Create dataloaders
BATCH_SIZE_100 = 8

train_loader_100 = DataLoader(
    train_dataset_100,
    batch_size=BATCH_SIZE_100,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

val_loader_100 = DataLoader(
    val_dataset_100,
    batch_size=BATCH_SIZE_100,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

print("\n‚úÖ Created DataLoaders:")
print(f"   ‚Ä¢ Train: {len(train_loader_100)} batches")
print(f"   ‚Ä¢ Val: {len(val_loader_100)} batches")

print("\n‚úÖ Ready for Cell 16 - Train on Citizen 100")
print("="*60)

CREATING CITIZEN 100 DATALOADERS

‚úÖ Loaded Citizen 100: 2752 videos
‚úÖ Classes: 100
‚úÖ All 2752 videos successfully labeled

üìä Split distribution:
   ‚Ä¢ Train: 2192
   ‚Ä¢ Val: 560

Creating WLASLDataset:
   ‚Ä¢ Total videos: 2192
   ‚Ä¢ Augmentation: ENABLED (6 techniques)
   ‚Ä¢ Unique glosses: 100
   ‚Ä¢ Dataset sources:
      - citizen: 2192 videos

Creating WLASLDataset:
   ‚Ä¢ Total videos: 560
   ‚Ä¢ Augmentation: DISABLED
   ‚Ä¢ Unique glosses: 100
   ‚Ä¢ Dataset sources:
      - citizen: 560 videos

‚úÖ Created DataLoaders:
   ‚Ä¢ Train: 274 batches
   ‚Ä¢ Val: 70 batches

‚úÖ Ready for Cell 16 - Train on Citizen 100


In [15]:
# ---------- I3D Model Architecture ----------
# WLASL I3D implementation - needed for unpickling the saved model
# This MUST be defined before loading the checkpoint

import torch
import torch.nn as nn
import torch.nn.functional as F


class MaxPool3dSamePadding(nn.MaxPool3d):
    def compute_pad(self, dim, s):
        if s % self.stride[dim] == 0:
            return max(self.kernel_size[dim] - self.stride[dim], 0)
        else:
            return max(self.kernel_size[dim] - (s % self.stride[dim]), 0)

    def forward(self, x):
        (batch, channel, t, h, w) = x.size()
        pad_t = self.compute_pad(0, t)
        pad_h = self.compute_pad(1, h)
        pad_w = self.compute_pad(2, w)

        pad_t_f = pad_t // 2
        pad_t_b = pad_t - pad_t_f
        pad_h_f = pad_h // 2
        pad_h_b = pad_h - pad_h_f
        pad_w_f = pad_w // 2
        pad_w_b = pad_w - pad_w_f

        pad = (pad_w_f, pad_w_b, pad_h_f, pad_h_b, pad_t_f, pad_t_b)
        x = F.pad(x, pad)
        return super(MaxPool3dSamePadding, self).forward(x)


class Unit3D(nn.Module):
    def __init__(self, in_channels, output_channels, kernel_shape=(1, 1, 1),
                 stride=(1, 1, 1), padding=0, activation_fn=F.relu, use_batch_norm=True,
                 use_bias=False, name='unit_3d'):
        super(Unit3D, self).__init__()
        
        self._output_channels = output_channels
        self._kernel_shape = kernel_shape
        self._stride = stride
        self._use_batch_norm = use_batch_norm
        self._activation_fn = activation_fn
        self._use_bias = use_bias
        self.name = name
        self.padding = padding
        
        self.conv3d = nn.Conv3d(in_channels=in_channels,
                                out_channels=self._output_channels,
                                kernel_size=self._kernel_shape,
                                stride=self._stride,
                                padding=0,
                                bias=self._use_bias)
        
        if self._use_batch_norm:
            self.bn = nn.BatchNorm3d(self._output_channels, eps=0.001, momentum=0.01)

    def compute_pad(self, dim, s):
        if s % self._stride[dim] == 0:
            return max(self._kernel_shape[dim] - self._stride[dim], 0)
        else:
            return max(self._kernel_shape[dim] - (s % self._stride[dim]), 0)

    def forward(self, x):
        (batch, channel, t, h, w) = x.size()
        pad_t = self.compute_pad(0, t)
        pad_h = self.compute_pad(1, h)
        pad_w = self.compute_pad(2, w)

        pad_t_f = pad_t // 2
        pad_t_b = pad_t - pad_t_f
        pad_h_f = pad_h // 2
        pad_h_b = pad_h - pad_h_f
        pad_w_f = pad_w // 2
        pad_w_b = pad_w - pad_w_f

        pad = (pad_w_f, pad_w_b, pad_h_f, pad_h_b, pad_t_f, pad_t_b)
        x = F.pad(x, pad)
        
        x = self.conv3d(x)
        if self._use_batch_norm:
            x = self.bn(x)
        if self._activation_fn is not None:
            x = self._activation_fn(x)
        return x


class InceptionModule(nn.Module):
    def __init__(self, in_channels, out_channels, name):
        super(InceptionModule, self).__init__()

        self.b0 = Unit3D(in_channels=in_channels, output_channels=out_channels[0],
                         kernel_shape=[1, 1, 1], padding=0, name=name+'/Branch_0/Conv3d_0a_1x1')
        self.b1a = Unit3D(in_channels=in_channels, output_channels=out_channels[1],
                          kernel_shape=[1, 1, 1], padding=0, name=name+'/Branch_1/Conv3d_0a_1x1')
        self.b1b = Unit3D(in_channels=out_channels[1], output_channels=out_channels[2],
                          kernel_shape=[3, 3, 3], name=name+'/Branch_1/Conv3d_0b_3x3')
        self.b2a = Unit3D(in_channels=in_channels, output_channels=out_channels[3],
                          kernel_shape=[1, 1, 1], padding=0, name=name+'/Branch_2/Conv3d_0a_1x1')
        self.b2b = Unit3D(in_channels=out_channels[3], output_channels=out_channels[4],
                          kernel_shape=[3, 3, 3], name=name+'/Branch_2/Conv3d_0b_3x3')
        self.b3a = MaxPool3dSamePadding(kernel_size=[3, 3, 3], stride=(1, 1, 1), padding=0)
        self.b3b = Unit3D(in_channels=in_channels, output_channels=out_channels[5],
                          kernel_shape=[1, 1, 1], padding=0, name=name+'/Branch_3/Conv3d_0b_1x1')
        self.name = name

    def forward(self, x):
        b0 = self.b0(x)
        b1 = self.b1b(self.b1a(x))
        b2 = self.b2b(self.b2a(x))
        b3 = self.b3b(self.b3a(x))
        return torch.cat([b0, b1, b2, b3], dim=1)


class InceptionI3d(nn.Module):
    """Inception-v1 I3D architecture."""

    def __init__(self, num_classes=400, spatial_squeeze=True,
                 final_endpoint='Logits', name='inception_i3d', in_channels=3, dropout_keep_prob=0.5):
        super(InceptionI3d, self).__init__()

        self._num_classes = num_classes
        self._spatial_squeeze = spatial_squeeze
        self._final_endpoint = final_endpoint
        self.logits = None

        if self._final_endpoint not in ['Conv3d_1a_7x7', 'MaxPool3d_2a_3x3', 'Conv3d_2b_1x1', 'Conv3d_2c_3x3',
                                          'MaxPool3d_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'MaxPool3d_4a_3x3',
                                          'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_4f',
                                          'MaxPool3d_5a_2x2', 'Mixed_5b', 'Mixed_5c', 'Logits', 'Predictions']:
            raise ValueError('Unknown final endpoint %s' % self._final_endpoint)

        # Build network
        self.end_points = {}
        end_point = 'Conv3d_1a_7x7'
        self.end_points[end_point] = Unit3D(in_channels=in_channels, output_channels=64, kernel_shape=[7, 7, 7],
                                             stride=(2, 2, 2), padding=(3,3,3),  name=end_point)
        if self._final_endpoint == end_point: return

        end_point = 'MaxPool3d_2a_3x3'
        self.end_points[end_point] = MaxPool3dSamePadding(kernel_size=[1, 3, 3], stride=(1, 2, 2), padding=0)
        if self._final_endpoint == end_point: return

        end_point = 'Conv3d_2b_1x1'
        self.end_points[end_point] = Unit3D(in_channels=64, output_channels=64, kernel_shape=[1, 1, 1], padding=0,
                                             name=end_point)
        if self._final_endpoint == end_point: return

        end_point = 'Conv3d_2c_3x3'
        self.end_points[end_point] = Unit3D(in_channels=64, output_channels=192, kernel_shape=[3, 3, 3], padding=1,
                                             name=end_point)
        if self._final_endpoint == end_point: return

        end_point = 'MaxPool3d_3a_3x3'
        self.end_points[end_point] = MaxPool3dSamePadding(kernel_size=[1, 3, 3], stride=(1, 2, 2), padding=0)
        if self._final_endpoint == end_point: return

        end_point = 'Mixed_3b'
        self.end_points[end_point] = InceptionModule(192, [64, 96, 128, 16, 32, 32], name=end_point)
        if self._final_endpoint == end_point: return

        end_point = 'Mixed_3c'
        self.end_points[end_point] = InceptionModule(256, [128, 128, 192, 32, 96, 64], name=end_point)
        if self._final_endpoint == end_point: return

        end_point = 'MaxPool3d_4a_3x3'
        self.end_points[end_point] = MaxPool3dSamePadding(kernel_size=[3, 3, 3], stride=(2, 2, 2), padding=0)
        if self._final_endpoint == end_point: return

        end_point = 'Mixed_4b'
        self.end_points[end_point] = InceptionModule(128+192+96+64, [192, 96, 208, 16, 48, 64], name=end_point)
        if self._final_endpoint == end_point: return

        end_point = 'Mixed_4c'
        self.end_points[end_point] = InceptionModule(192+208+48+64, [160, 112, 224, 24, 64, 64], name=end_point)
        if self._final_endpoint == end_point: return

        end_point = 'Mixed_4d'
        self.end_points[end_point] = InceptionModule(160+224+64+64, [128, 128, 256, 24, 64, 64], name=end_point)
        if self._final_endpoint == end_point: return

        end_point = 'Mixed_4e'
        self.end_points[end_point] = InceptionModule(128+256+64+64, [112, 144, 288, 32, 64, 64], name=end_point)
        if self._final_endpoint == end_point: return

        end_point = 'Mixed_4f'
        self.end_points[end_point] = InceptionModule(112+288+64+64, [256, 160, 320, 32, 128, 128], name=end_point)
        if self._final_endpoint == end_point: return

        end_point = 'MaxPool3d_5a_2x2'
        self.end_points[end_point] = MaxPool3dSamePadding(kernel_size=[2, 2, 2], stride=(2, 2, 2), padding=0)
        if self._final_endpoint == end_point: return

        end_point = 'Mixed_5b'
        self.end_points[end_point] = InceptionModule(256+320+128+128, [256, 160, 320, 32, 128, 128], name=end_point)
        if self._final_endpoint == end_point: return

        end_point = 'Mixed_5c'
        self.end_points[end_point] = InceptionModule(256+320+128+128, [384, 192, 384, 48, 128, 128], name=end_point)
        if self._final_endpoint == end_point: return

        end_point = 'Logits'
        self.avg_pool = nn.AvgPool3d(kernel_size=[2, 7, 7], stride=(1, 1, 1))
        self.dropout = nn.Dropout(dropout_keep_prob)
        self.logits = Unit3D(in_channels=384+384+128+128, output_channels=self._num_classes,
                             kernel_shape=[1, 1, 1],
                             padding=0,
                             activation_fn=None,
                             use_batch_norm=False,
                             use_bias=True,
                             name='logits')

        self.build()

    def replace_logits(self, num_classes):
        self._num_classes = num_classes
        self.logits = Unit3D(in_channels=384+384+128+128, output_channels=self._num_classes,
                             kernel_shape=[1, 1, 1],
                             padding=0,
                             activation_fn=None,
                             use_batch_norm=False,
                             use_bias=True,
                             name='logits')

    def build(self):
        for k in self.end_points.keys():
            self.add_module(k, self.end_points[k])

    def forward(self, x):
        for end_point in self.end_points:
            if end_point in self.end_points:
                x = self._modules[end_point](x)

        x = self.logits(self.dropout(self.avg_pool(x)))
        if self._spatial_squeeze:
            x = x.squeeze(3).squeeze(3)
        
        x = x.mean(2)
        return x

# Register the classes in sys.modules so PyTorch can find them when unpickling
# The checkpoint was saved with CV.models.i3d namespace
import sys
from types import ModuleType

# Create dummy module structure
CV = ModuleType('CV')
CV_models = ModuleType('CV.models')
CV_models_i3d = ModuleType('CV.models.i3d')

# Add our I3D classes to the fake module
CV_models_i3d.MaxPool3dSamePadding = MaxPool3dSamePadding
CV_models_i3d.Unit3D = Unit3D
CV_models_i3d.InceptionModule = InceptionModule
CV_models_i3d.InceptionI3d = InceptionI3d

# Register in sys.modules
sys.modules['CV'] = CV
sys.modules['CV.models'] = CV_models
sys.modules['CV.models.i3d'] = CV_models_i3d

print("‚úÖ I3D Model Architecture Loaded")
print("‚úÖ Module registered as CV.models.i3d for checkpoint loading")

‚úÖ I3D Model Architecture Loaded
‚úÖ Module registered as CV.models.i3d for checkpoint loading


In [16]:
# ---------- Cell 19: Train on Citizen 100 Labels ----------
import sys
import time

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

print("="*60)
print("TRAINING ON CITIZEN 100 LABELS")
print("="*60)

# Load FULL pre-trained model (architecture + weights)
checkpoint_path = os.path.join(BASE_DIR, "models", "checkpoints", "best_model_FULL.pth")

if not os.path.exists(checkpoint_path):
    print("‚ùå Error: WLASL100 FULL checkpoint not found!")
    print("   Please run Cell 1 to load the pre-trained model.")
    sys.exit(1)

print(f"‚úÖ Loading FULL checkpoint: {checkpoint_path}")
print("   (Contains complete I3D model with architecture + weights)")

# Load the complete checkpoint (MUST use weights_only=False for full model)
# InceptionI3d class is already defined in the previous cell
checkpoint = torch.load(checkpoint_path, map_location='cpu', weights_only=False)

# Check what's in the checkpoint
print("\nüì¶ Checkpoint contents:")
for key in checkpoint.keys():
    print(f"   ‚Ä¢ {key}")

# Extract the FULL model object
if 'model' in checkpoint:
    model = checkpoint['model']
    print("\n‚úÖ Loaded FULL model object!")
    print(f"   ‚Ä¢ Model type: {type(model).__name__}")
    print(f"   ‚Ä¢ Validation accuracy: {checkpoint.get('val_acc', 75.15):.2f}%")
    print("   ‚Ä¢ Classes: 100")
else:
    print("\n‚ùå Error: 'model' key not found in checkpoint!")
    print(f"   Available keys: {list(checkpoint.keys())}")
    print("   Please make sure you uploaded the FULL model file.")
    import sys
    sys.exit(1)

# Move to device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
print(f"\n‚úÖ Model moved to: {device}")
print("   ‚Ä¢ Ready to train on Citizen 100!")

# Training config
config_100 = {
    'num_epochs': 50,
    'learning_rate': 1e-4,
    'weight_decay': 1e-8,
    'adam_eps': 1e-3,
    'patience': 10,
    'grad_clip': 1.0,
}

print("\nüìã Training configuration:")
for key, value in config_100.items():
    print(f"   ‚Ä¢ {key}: {value}")

# Optimizer
optimizer_100 = optim.Adam(
    model.parameters(),
    lr=config_100['learning_rate'],
    weight_decay=config_100['weight_decay'],
    eps=config_100['adam_eps']
)

scheduler_100 = ReduceLROnPlateau(
    optimizer_100,
    mode='min',
    factor=0.5,
    patience=5,
    verbose=True
)

criterion = nn.CrossEntropyLoss()

# Checkpoint directory
checkpoint_dir_100 = os.path.join(BASE_DIR, "models", "checkpoints")
os.makedirs(checkpoint_dir_100, exist_ok=True)

# Training history
history_100 = {
    'train_loss': [],
    'train_acc': [],
    'val_loss': [],
    'val_acc': [],
    'lr': []
}

best_val_acc_100 = 0.0
best_val_loss_100 = float('inf')
best_epoch_100 = 0
patience_counter_100 = 0

print(f"\n{'='*60}")
print("STARTING TRAINING")
print(f"{'='*60}\n")

start_time_100 = time.time()

for epoch in range(config_100['num_epochs']):
    epoch_start = time.time()
    
    # TRAINING
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    
    train_pbar = tqdm(train_loader_100, desc=f"Epoch {epoch+1}/{config_100['num_epochs']} [Train]")
    for batch_idx, batch in enumerate(train_pbar):
        # FIX: Dataset already returns (C, T, H, W) and normalized [0, 1]
        videos = batch['frames'].to(device)  # Already (B, C, T, H, W), float32, [0, 1]
        labels = batch['label'].to(device)
        
        optimizer_100.zero_grad()
        outputs = model(videos)
        loss = criterion(outputs, labels)
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), config_100['grad_clip'])
        optimizer_100.step()
        
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        train_correct += predicted.eq(labels).sum().item()
        train_total += labels.size(0)
        
        train_pbar.set_postfix({
            'loss': f"{loss.item():.4f}",
            'acc': f"{100.0 * train_correct / train_total:.2f}%"
        })
    
    train_loss /= len(train_loader_100)
    train_acc = 100.0 * train_correct / train_total
    
    # VALIDATION
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    
    with torch.no_grad():
        val_pbar = tqdm(val_loader_100, desc=f"Epoch {epoch+1}/{config_100['num_epochs']} [Val]")
        for batch in val_pbar:
            # FIX: Dataset already returns (C, T, H, W) and normalized [0, 1]
            videos = batch['frames'].to(device)  # Already (B, C, T, H, W), float32, [0, 1]
            labels = batch['label'].to(device)
            
            outputs = model(videos)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            val_correct += predicted.eq(labels).sum().item()
            val_total += labels.size(0)
            
            val_pbar.set_postfix({
                'loss': f"{loss.item():.4f}",
                'acc': f"{100.0 * val_correct / val_total:.2f}%"
            })
    
    val_loss /= len(val_loader_100)
    val_acc = 100.0 * val_correct / val_total
    
    # Update scheduler
    scheduler_100.step(val_loss)
    current_lr = optimizer_100.param_groups[0]['lr']
    
    # Record history
    history_100['train_loss'].append(train_loss)
    history_100['train_acc'].append(train_acc)
    history_100['val_loss'].append(val_loss)
    history_100['val_acc'].append(val_acc)
    history_100['lr'].append(current_lr)
    
    epoch_time = time.time() - epoch_start
    elapsed_time = time.time() - start_time_100
    
    # Print summary
    print(f"\n{'='*60}")
    print(f"Citizen 100 - Epoch {epoch+1}/{config_100['num_epochs']}:")
    print(f"{'='*60}")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
    print(f"Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.2f}%")
    print(f"Learning Rate: {current_lr:.6f}")
    print(f"Time: {epoch_time:.1f}s | Total: {elapsed_time/60:.1f}min")
    
    # Check if best
    is_best = val_acc > best_val_acc_100
    
    if is_best:
        best_val_acc_100 = val_acc
        best_val_loss_100 = val_loss
        best_epoch_100 = epoch + 1
        patience_counter_100 = 0
        
        # Save best model
        best_model_path_100 = os.path.join(checkpoint_dir_100, "best_model_100.pth")
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer_100.state_dict(),
            'scheduler_state_dict': scheduler_100.state_dict(),
            'train_loss': train_loss,
            'train_acc': train_acc,
            'val_loss': val_loss,
            'val_acc': val_acc,
            'history': history_100,
            'num_classes': num_classes_100
        }, best_model_path_100)
        
        print(f"‚úÖ New best model saved! Val acc: {val_acc:.2f}%")
    else:
        patience_counter_100 += 1
        print(f"‚è≥ Patience: {patience_counter_100}/{config_100['patience']}")
    
    # Early stopping
    if patience_counter_100 >= config_100['patience']:
        print(f"\n‚ö†Ô∏è Early stopping triggered after {epoch+1} epochs")
        break
    
    print()

total_time = time.time() - start_time_100

print("="*60)
print("TRAINING COMPLETE")
print("="*60)
print("\nüìä Best Results:")
print(f"   ‚Ä¢ Best Epoch: {best_epoch_100}")
print(f"   ‚Ä¢ Best Val Acc: {best_val_acc_100:.2f}%")
print(f"   ‚Ä¢ Best Val Loss: {best_val_loss_100:.4f}")
print(f"   ‚Ä¢ Total Time: {total_time/60:.1f} minutes")

print("\nüíæ Best model saved to:")
print(f"   {best_model_path_100}")

print("="*60)


TRAINING ON CITIZEN 100 LABELS
‚úÖ Loading FULL checkpoint: /kaggle/working/WASL/models/checkpoints/best_model_FULL.pth
   (Contains complete I3D model with architecture + weights)

üì¶ Checkpoint contents:
   ‚Ä¢ model
   ‚Ä¢ val_acc
   ‚Ä¢ epoch
   ‚Ä¢ train_loss
   ‚Ä¢ train_acc
   ‚Ä¢ val_loss
   ‚Ä¢ history

‚úÖ Loaded FULL model object!
   ‚Ä¢ Model type: InceptionI3d
   ‚Ä¢ Validation accuracy: 75.15%
   ‚Ä¢ Classes: 100

‚úÖ Model moved to: cuda
   ‚Ä¢ Ready to train on Citizen 100!

üìã Training configuration:
   ‚Ä¢ num_epochs: 50
   ‚Ä¢ learning_rate: 0.0001
   ‚Ä¢ weight_decay: 1e-08
   ‚Ä¢ adam_eps: 0.001
   ‚Ä¢ patience: 10
   ‚Ä¢ grad_clip: 1.0





STARTING TRAINING



Epoch 1/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:48<00:00,  1.20it/s, loss=3.2499, acc=35.45%]
Epoch 1/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.05it/s, loss=2.9058, acc=46.25%]



Citizen 100 - Epoch 1/50:
Train Loss: 2.8318 | Train Acc: 35.45%
Val Loss:   2.0787 | Val Acc:   46.25%
Learning Rate: 0.000100
Time: 251.0s | Total: 4.2min
‚úÖ New best model saved! Val acc: 46.25%



Epoch 2/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:47<00:00,  1.20it/s, loss=2.5946, acc=50.41%]
Epoch 2/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:23<00:00,  3.03it/s, loss=2.1401, acc=59.64%]



Citizen 100 - Epoch 2/50:
Train Loss: 2.1128 | Train Acc: 50.41%
Val Loss:   1.5362 | Val Acc:   59.64%
Learning Rate: 0.000100
Time: 251.1s | Total: 8.4min
‚úÖ New best model saved! Val acc: 59.64%



Epoch 3/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:48<00:00,  1.20it/s, loss=2.1010, acc=58.35%]
Epoch 3/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.07it/s, loss=1.3801, acc=66.43%]



Citizen 100 - Epoch 3/50:
Train Loss: 1.7176 | Train Acc: 58.35%
Val Loss:   1.2752 | Val Acc:   66.43%
Learning Rate: 0.000100
Time: 251.5s | Total: 12.6min
‚úÖ New best model saved! Val acc: 66.43%



Epoch 4/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:43<00:00,  1.22it/s, loss=1.3566, acc=66.38%]
Epoch 4/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.08it/s, loss=1.1442, acc=70.18%]



Citizen 100 - Epoch 4/50:
Train Loss: 1.4109 | Train Acc: 66.38%
Val Loss:   1.0561 | Val Acc:   70.18%
Learning Rate: 0.000100
Time: 246.6s | Total: 16.7min
‚úÖ New best model saved! Val acc: 70.18%



Epoch 5/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:44<00:00,  1.22it/s, loss=0.6068, acc=70.71%]
Epoch 5/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.10it/s, loss=1.1868, acc=73.75%]



Citizen 100 - Epoch 5/50:
Train Loss: 1.2130 | Train Acc: 70.71%
Val Loss:   0.9464 | Val Acc:   73.75%
Learning Rate: 0.000100
Time: 247.4s | Total: 20.8min
‚úÖ New best model saved! Val acc: 73.75%



Epoch 6/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:49<00:00,  1.20it/s, loss=1.0778, acc=76.64%]
Epoch 6/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.09it/s, loss=0.9747, acc=77.68%]



Citizen 100 - Epoch 6/50:
Train Loss: 1.0312 | Train Acc: 76.64%
Val Loss:   0.8519 | Val Acc:   77.68%
Learning Rate: 0.000100
Time: 251.9s | Total: 25.0min
‚úÖ New best model saved! Val acc: 77.68%



Epoch 7/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:46<00:00,  1.21it/s, loss=0.3352, acc=79.24%]
Epoch 7/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.07it/s, loss=0.9116, acc=78.04%]



Citizen 100 - Epoch 7/50:
Train Loss: 0.8798 | Train Acc: 79.24%
Val Loss:   0.7809 | Val Acc:   78.04%
Learning Rate: 0.000100
Time: 249.2s | Total: 29.2min
‚úÖ New best model saved! Val acc: 78.04%



Epoch 8/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:48<00:00,  1.20it/s, loss=0.2656, acc=82.07%]
Epoch 8/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.07it/s, loss=0.7904, acc=79.29%]



Citizen 100 - Epoch 8/50:
Train Loss: 0.7734 | Train Acc: 82.07%
Val Loss:   0.7244 | Val Acc:   79.29%
Learning Rate: 0.000100
Time: 250.9s | Total: 33.4min
‚úÖ New best model saved! Val acc: 79.29%



Epoch 9/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:45<00:00,  1.21it/s, loss=0.8334, acc=84.58%]
Epoch 9/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.08it/s, loss=0.7459, acc=80.71%]



Citizen 100 - Epoch 9/50:
Train Loss: 0.6884 | Train Acc: 84.58%
Val Loss:   0.6930 | Val Acc:   80.71%
Learning Rate: 0.000100
Time: 248.4s | Total: 37.5min
‚úÖ New best model saved! Val acc: 80.71%



Epoch 10/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:46<00:00,  1.21it/s, loss=0.4962, acc=87.09%]
Epoch 10/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.05it/s, loss=0.8757, acc=80.00%]



Citizen 100 - Epoch 10/50:
Train Loss: 0.6028 | Train Acc: 87.09%
Val Loss:   0.6792 | Val Acc:   80.00%
Learning Rate: 0.000100
Time: 249.2s | Total: 41.7min
‚è≥ Patience: 1/10



Epoch 11/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:50<00:00,  1.19it/s, loss=0.3849, acc=87.59%]
Epoch 11/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.10it/s, loss=0.6377, acc=82.32%]



Citizen 100 - Epoch 11/50:
Train Loss: 0.5292 | Train Acc: 87.59%
Val Loss:   0.6255 | Val Acc:   82.32%
Learning Rate: 0.000100
Time: 253.3s | Total: 45.9min
‚úÖ New best model saved! Val acc: 82.32%



Epoch 12/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:44<00:00,  1.22it/s, loss=0.6928, acc=89.96%]
Epoch 12/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.08it/s, loss=0.6828, acc=83.75%]



Citizen 100 - Epoch 12/50:
Train Loss: 0.4767 | Train Acc: 89.96%
Val Loss:   0.5932 | Val Acc:   83.75%
Learning Rate: 0.000100
Time: 247.6s | Total: 50.0min
‚úÖ New best model saved! Val acc: 83.75%



Epoch 13/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:51<00:00,  1.18it/s, loss=0.4397, acc=91.20%]
Epoch 13/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.06it/s, loss=0.4956, acc=83.21%]



Citizen 100 - Epoch 13/50:
Train Loss: 0.4085 | Train Acc: 91.20%
Val Loss:   0.6083 | Val Acc:   83.21%
Learning Rate: 0.000100
Time: 254.4s | Total: 54.3min
‚è≥ Patience: 1/10



Epoch 14/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:50<00:00,  1.19it/s, loss=0.5955, acc=91.10%]
Epoch 14/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:23<00:00,  3.03it/s, loss=0.5065, acc=84.82%]



Citizen 100 - Epoch 14/50:
Train Loss: 0.3955 | Train Acc: 91.10%
Val Loss:   0.5487 | Val Acc:   84.82%
Learning Rate: 0.000100
Time: 253.7s | Total: 58.5min
‚úÖ New best model saved! Val acc: 84.82%



Epoch 15/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:47<00:00,  1.20it/s, loss=0.4110, acc=92.47%]
Epoch 15/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.07it/s, loss=0.4116, acc=84.82%]



Citizen 100 - Epoch 15/50:
Train Loss: 0.3368 | Train Acc: 92.47%
Val Loss:   0.5284 | Val Acc:   84.82%
Learning Rate: 0.000100
Time: 250.7s | Total: 62.7min
‚è≥ Patience: 1/10



Epoch 16/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:48<00:00,  1.20it/s, loss=0.2153, acc=92.93%]
Epoch 16/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.08it/s, loss=0.6145, acc=85.71%]



Citizen 100 - Epoch 16/50:
Train Loss: 0.3079 | Train Acc: 92.93%
Val Loss:   0.5266 | Val Acc:   85.71%
Learning Rate: 0.000100
Time: 251.1s | Total: 66.9min
‚úÖ New best model saved! Val acc: 85.71%



Epoch 17/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:49<00:00,  1.19it/s, loss=0.1137, acc=93.98%]
Epoch 17/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.09it/s, loss=0.6122, acc=84.82%]



Citizen 100 - Epoch 17/50:
Train Loss: 0.2816 | Train Acc: 93.98%
Val Loss:   0.5541 | Val Acc:   84.82%
Learning Rate: 0.000100
Time: 252.6s | Total: 71.1min
‚è≥ Patience: 1/10



Epoch 18/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:49<00:00,  1.19it/s, loss=0.0608, acc=94.98%]
Epoch 18/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:23<00:00,  3.03it/s, loss=0.6184, acc=83.21%]



Citizen 100 - Epoch 18/50:
Train Loss: 0.2466 | Train Acc: 94.98%
Val Loss:   0.5526 | Val Acc:   83.21%
Learning Rate: 0.000100
Time: 252.4s | Total: 75.3min
‚è≥ Patience: 2/10



Epoch 19/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:47<00:00,  1.20it/s, loss=0.0470, acc=95.39%]
Epoch 19/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.05it/s, loss=0.5005, acc=85.18%]



Citizen 100 - Epoch 19/50:
Train Loss: 0.2182 | Train Acc: 95.39%
Val Loss:   0.5313 | Val Acc:   85.18%
Learning Rate: 0.000100
Time: 250.6s | Total: 79.5min
‚è≥ Patience: 3/10



Epoch 20/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:48<00:00,  1.20it/s, loss=0.0532, acc=95.85%]
Epoch 20/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.07it/s, loss=0.5248, acc=83.93%]



Citizen 100 - Epoch 20/50:
Train Loss: 0.1894 | Train Acc: 95.85%
Val Loss:   0.5569 | Val Acc:   83.93%
Learning Rate: 0.000100
Time: 251.8s | Total: 83.7min
‚è≥ Patience: 4/10



Epoch 21/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:46<00:00,  1.21it/s, loss=0.1355, acc=96.94%]
Epoch 21/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.09it/s, loss=0.3448, acc=85.36%]



Citizen 100 - Epoch 21/50:
Train Loss: 0.1658 | Train Acc: 96.94%
Val Loss:   0.5234 | Val Acc:   85.36%
Learning Rate: 0.000100
Time: 249.3s | Total: 87.8min
‚è≥ Patience: 5/10



Epoch 22/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:46<00:00,  1.21it/s, loss=0.0201, acc=97.08%]
Epoch 22/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.11it/s, loss=0.2390, acc=85.00%]



Citizen 100 - Epoch 22/50:
Train Loss: 0.1525 | Train Acc: 97.08%
Val Loss:   0.5007 | Val Acc:   85.00%
Learning Rate: 0.000100
Time: 249.4s | Total: 92.0min
‚è≥ Patience: 6/10



Epoch 23/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:48<00:00,  1.20it/s, loss=0.0851, acc=97.13%]
Epoch 23/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:23<00:00,  3.04it/s, loss=0.4539, acc=86.25%]



Citizen 100 - Epoch 23/50:
Train Loss: 0.1382 | Train Acc: 97.13%
Val Loss:   0.5131 | Val Acc:   86.25%
Learning Rate: 0.000100
Time: 251.1s | Total: 96.2min
‚úÖ New best model saved! Val acc: 86.25%



Epoch 24/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:47<00:00,  1.20it/s, loss=0.1448, acc=97.31%]
Epoch 24/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:23<00:00,  3.04it/s, loss=0.6382, acc=84.82%]



Citizen 100 - Epoch 24/50:
Train Loss: 0.1230 | Train Acc: 97.31%
Val Loss:   0.5396 | Val Acc:   84.82%
Learning Rate: 0.000100
Time: 250.8s | Total: 100.4min
‚è≥ Patience: 1/10



Epoch 25/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:46<00:00,  1.21it/s, loss=0.1872, acc=98.04%]
Epoch 25/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.12it/s, loss=0.4000, acc=86.43%]



Citizen 100 - Epoch 25/50:
Train Loss: 0.1082 | Train Acc: 98.04%
Val Loss:   0.5088 | Val Acc:   86.43%
Learning Rate: 0.000100
Time: 248.8s | Total: 104.5min
‚úÖ New best model saved! Val acc: 86.43%



Epoch 26/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:44<00:00,  1.22it/s, loss=0.1337, acc=97.67%]
Epoch 26/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.07it/s, loss=0.5886, acc=87.14%]



Citizen 100 - Epoch 26/50:
Train Loss: 0.1052 | Train Acc: 97.67%
Val Loss:   0.5204 | Val Acc:   87.14%
Learning Rate: 0.000100
Time: 247.2s | Total: 108.6min
‚úÖ New best model saved! Val acc: 87.14%



Epoch 27/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:43<00:00,  1.23it/s, loss=0.0065, acc=97.99%]
Epoch 27/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:23<00:00,  3.02it/s, loss=0.5678, acc=85.00%]



Citizen 100 - Epoch 27/50:
Train Loss: 0.1010 | Train Acc: 97.99%
Val Loss:   0.5614 | Val Acc:   85.00%
Learning Rate: 0.000100
Time: 246.5s | Total: 112.7min
‚è≥ Patience: 1/10



Epoch 28/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:48<00:00,  1.20it/s, loss=0.0851, acc=98.40%]
Epoch 28/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.08it/s, loss=0.4465, acc=86.61%]



Citizen 100 - Epoch 28/50:
Train Loss: 0.0848 | Train Acc: 98.40%
Val Loss:   0.5209 | Val Acc:   86.61%
Learning Rate: 0.000050
Time: 251.4s | Total: 116.9min
‚è≥ Patience: 2/10



Epoch 29/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:47<00:00,  1.20it/s, loss=0.3476, acc=98.45%]
Epoch 29/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.10it/s, loss=0.3031, acc=87.14%]



Citizen 100 - Epoch 29/50:
Train Loss: 0.0754 | Train Acc: 98.45%
Val Loss:   0.5143 | Val Acc:   87.14%
Learning Rate: 0.000050
Time: 250.3s | Total: 121.1min
‚è≥ Patience: 3/10



Epoch 30/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:46<00:00,  1.21it/s, loss=0.0350, acc=98.40%]
Epoch 30/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.12it/s, loss=0.2500, acc=86.43%]



Citizen 100 - Epoch 30/50:
Train Loss: 0.0717 | Train Acc: 98.40%
Val Loss:   0.5340 | Val Acc:   86.43%
Learning Rate: 0.000050
Time: 248.8s | Total: 125.2min
‚è≥ Patience: 4/10



Epoch 31/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:42<00:00,  1.23it/s, loss=0.0148, acc=98.91%]
Epoch 31/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.08it/s, loss=0.2130, acc=87.32%]



Citizen 100 - Epoch 31/50:
Train Loss: 0.0591 | Train Acc: 98.91%
Val Loss:   0.5281 | Val Acc:   87.32%
Learning Rate: 0.000050
Time: 245.5s | Total: 129.3min
‚úÖ New best model saved! Val acc: 87.32%



Epoch 32/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:44<00:00,  1.22it/s, loss=0.0060, acc=98.45%]
Epoch 32/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.06it/s, loss=0.2607, acc=86.61%]



Citizen 100 - Epoch 32/50:
Train Loss: 0.0684 | Train Acc: 98.45%
Val Loss:   0.5275 | Val Acc:   86.61%
Learning Rate: 0.000050
Time: 247.6s | Total: 133.5min
‚è≥ Patience: 1/10



Epoch 33/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:45<00:00,  1.21it/s, loss=0.0185, acc=98.95%]
Epoch 33/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:23<00:00,  3.04it/s, loss=0.2053, acc=87.32%]



Citizen 100 - Epoch 33/50:
Train Loss: 0.0542 | Train Acc: 98.95%
Val Loss:   0.5145 | Val Acc:   87.32%
Learning Rate: 0.000050
Time: 249.0s | Total: 137.6min
‚è≥ Patience: 2/10



Epoch 34/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:47<00:00,  1.20it/s, loss=0.0297, acc=99.09%]
Epoch 34/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.10it/s, loss=0.1552, acc=87.50%]



Citizen 100 - Epoch 34/50:
Train Loss: 0.0525 | Train Acc: 99.09%
Val Loss:   0.5156 | Val Acc:   87.50%
Learning Rate: 0.000025
Time: 250.6s | Total: 141.8min
‚úÖ New best model saved! Val acc: 87.50%



Epoch 35/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:47<00:00,  1.21it/s, loss=0.3577, acc=98.81%]
Epoch 35/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:23<00:00,  3.02it/s, loss=0.2357, acc=86.79%]



Citizen 100 - Epoch 35/50:
Train Loss: 0.0561 | Train Acc: 98.81%
Val Loss:   0.5186 | Val Acc:   86.79%
Learning Rate: 0.000025
Time: 250.5s | Total: 146.0min
‚è≥ Patience: 1/10



Epoch 36/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:45<00:00,  1.22it/s, loss=0.0122, acc=99.32%]
Epoch 36/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.05it/s, loss=0.2078, acc=86.43%]



Citizen 100 - Epoch 36/50:
Train Loss: 0.0480 | Train Acc: 99.32%
Val Loss:   0.5183 | Val Acc:   86.43%
Learning Rate: 0.000025
Time: 248.0s | Total: 150.1min
‚è≥ Patience: 2/10



Epoch 37/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:48<00:00,  1.20it/s, loss=0.0185, acc=99.09%]
Epoch 37/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.06it/s, loss=0.2767, acc=86.96%]



Citizen 100 - Epoch 37/50:
Train Loss: 0.0404 | Train Acc: 99.09%
Val Loss:   0.5222 | Val Acc:   86.96%
Learning Rate: 0.000025
Time: 251.1s | Total: 154.3min
‚è≥ Patience: 3/10



Epoch 38/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:50<00:00,  1.19it/s, loss=0.0087, acc=99.22%]
Epoch 38/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.05it/s, loss=0.2937, acc=86.96%]



Citizen 100 - Epoch 38/50:
Train Loss: 0.0443 | Train Acc: 99.22%
Val Loss:   0.5039 | Val Acc:   86.96%
Learning Rate: 0.000025
Time: 253.9s | Total: 158.5min
‚è≥ Patience: 4/10



Epoch 39/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:47<00:00,  1.21it/s, loss=0.0076, acc=99.13%]
Epoch 39/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.10it/s, loss=0.2729, acc=86.79%]



Citizen 100 - Epoch 39/50:
Train Loss: 0.0444 | Train Acc: 99.13%
Val Loss:   0.5136 | Val Acc:   86.79%
Learning Rate: 0.000025
Time: 250.0s | Total: 162.7min
‚è≥ Patience: 5/10



Epoch 40/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:50<00:00,  1.19it/s, loss=0.0110, acc=99.13%]
Epoch 40/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.10it/s, loss=0.3249, acc=86.61%]



Citizen 100 - Epoch 40/50:
Train Loss: 0.0433 | Train Acc: 99.13%
Val Loss:   0.5081 | Val Acc:   86.61%
Learning Rate: 0.000013
Time: 253.0s | Total: 166.9min
‚è≥ Patience: 6/10



Epoch 41/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:48<00:00,  1.20it/s, loss=0.0079, acc=99.54%]
Epoch 41/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.08it/s, loss=0.2300, acc=86.96%]



Citizen 100 - Epoch 41/50:
Train Loss: 0.0375 | Train Acc: 99.54%
Val Loss:   0.5034 | Val Acc:   86.96%
Learning Rate: 0.000013
Time: 251.2s | Total: 171.1min
‚è≥ Patience: 7/10



Epoch 42/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:42<00:00,  1.23it/s, loss=0.0179, acc=99.18%]
Epoch 42/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.08it/s, loss=0.2282, acc=86.25%]



Citizen 100 - Epoch 42/50:
Train Loss: 0.0415 | Train Acc: 99.18%
Val Loss:   0.5079 | Val Acc:   86.25%
Learning Rate: 0.000013
Time: 245.2s | Total: 175.2min
‚è≥ Patience: 8/10



Epoch 43/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:41<00:00,  1.24it/s, loss=0.0075, acc=99.18%]
Epoch 43/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.08it/s, loss=0.2149, acc=86.43%]



Citizen 100 - Epoch 43/50:
Train Loss: 0.0450 | Train Acc: 99.18%
Val Loss:   0.5017 | Val Acc:   86.43%
Learning Rate: 0.000013
Time: 244.3s | Total: 179.3min
‚è≥ Patience: 9/10



Epoch 44/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:41<00:00,  1.24it/s, loss=0.0636, acc=99.41%]
Epoch 44/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:23<00:00,  3.02it/s, loss=0.2173, acc=87.68%]



Citizen 100 - Epoch 44/50:
Train Loss: 0.0382 | Train Acc: 99.41%
Val Loss:   0.4974 | Val Acc:   87.68%
Learning Rate: 0.000013
Time: 244.3s | Total: 183.3min
‚úÖ New best model saved! Val acc: 87.68%



Epoch 45/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:50<00:00,  1.19it/s, loss=0.0159, acc=99.32%]
Epoch 45/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.09it/s, loss=0.2431, acc=87.50%]



Citizen 100 - Epoch 45/50:
Train Loss: 0.0416 | Train Acc: 99.32%
Val Loss:   0.5028 | Val Acc:   87.50%
Learning Rate: 0.000013
Time: 253.5s | Total: 187.6min
‚è≥ Patience: 1/10



Epoch 46/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:44<00:00,  1.22it/s, loss=0.0338, acc=99.50%]
Epoch 46/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.12it/s, loss=0.2475, acc=87.50%]



Citizen 100 - Epoch 46/50:
Train Loss: 0.0331 | Train Acc: 99.50%
Val Loss:   0.5070 | Val Acc:   87.50%
Learning Rate: 0.000013
Time: 247.2s | Total: 191.7min
‚è≥ Patience: 2/10



Epoch 47/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:45<00:00,  1.22it/s, loss=0.0106, acc=99.41%]
Epoch 47/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:23<00:00,  3.04it/s, loss=0.1915, acc=87.32%]



Citizen 100 - Epoch 47/50:
Train Loss: 0.0359 | Train Acc: 99.41%
Val Loss:   0.5026 | Val Acc:   87.32%
Learning Rate: 0.000013
Time: 248.2s | Total: 195.8min
‚è≥ Patience: 3/10



Epoch 48/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:43<00:00,  1.23it/s, loss=0.0063, acc=99.32%]
Epoch 48/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.10it/s, loss=0.2139, acc=87.14%]



Citizen 100 - Epoch 48/50:
Train Loss: 0.0340 | Train Acc: 99.32%
Val Loss:   0.5004 | Val Acc:   87.14%
Learning Rate: 0.000013
Time: 246.1s | Total: 199.9min
‚è≥ Patience: 4/10



Epoch 49/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:48<00:00,  1.20it/s, loss=0.0068, acc=99.32%]
Epoch 49/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:23<00:00,  3.03it/s, loss=0.1874, acc=87.50%]



Citizen 100 - Epoch 49/50:
Train Loss: 0.0368 | Train Acc: 99.32%
Val Loss:   0.4962 | Val Acc:   87.50%
Learning Rate: 0.000013
Time: 252.0s | Total: 204.1min
‚è≥ Patience: 5/10



Epoch 50/50 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 274/274 [03:47<00:00,  1.20it/s, loss=0.0716, acc=99.41%]
Epoch 50/50 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [00:22<00:00,  3.06it/s, loss=0.1894, acc=87.32%]


Citizen 100 - Epoch 50/50:
Train Loss: 0.0391 | Train Acc: 99.41%
Val Loss:   0.4996 | Val Acc:   87.32%
Learning Rate: 0.000013
Time: 250.3s | Total: 208.3min
‚è≥ Patience: 6/10

TRAINING COMPLETE

üìä Best Results:
   ‚Ä¢ Best Epoch: 44
   ‚Ä¢ Best Val Acc: 87.68%
   ‚Ä¢ Best Val Loss: 0.4974
   ‚Ä¢ Total Time: 208.3 minutes

üíæ Best model saved to:
   /kaggle/working/WASL/models/checkpoints/best_model_100.pth

‚úÖ Ready for Cell 18 - Fine-tune Citizen 100 ‚Üí WLASL100!





In [17]:
# copy to output root
import os
import shutil

print("="*60)
print("COPYING MODEL TO OUTPUT")
print("="*60)

source = '/kaggle/working/WASL/models/checkpoints/best_model_100.pth'

if os.path.exists(source):
    destination = '/kaggle/working/best_model_citizen100_87pct.pth'
    shutil.copy(source, destination)
    
    print("‚úÖ Model copied to output!")
    print(f"üì¶ Size: {os.path.getsize(destination) / 1024 / 1024:.2f} MB")
    print("üì• Ready to download!")
else:
    print("‚ùå Model not found!")

print("="*60)

COPYING MODEL TO OUTPUT
‚úÖ Model copied to output!
üì¶ Size: 142.10 MB
üì• Ready to download!


In [18]:
# compress model to zip
import os

print("="*60)
print("COMPRESSING MODEL")
print("="*60)

source_file = '/kaggle/working/best_model_citizen100_87pct.pth'
zip_file = '/kaggle/working/best_model_citizen100_87pct.zip'

if os.path.exists(source_file):
    # Create zip file
    with zipfile.ZipFile(zip_file, 'w', zipfile.ZIP_DEFLATED, compresslevel=9) as zipf:
        zipf.write(source_file, os.path.basename(source_file))
    
    original_size = os.path.getsize(source_file) / 1024 / 1024
    compressed_size = os.path.getsize(zip_file) / 1024 / 1024
    compression_ratio = (1 - compressed_size / original_size) * 100
    
    print("‚úÖ Model compressed successfully!")
    print(f"üì¶ Original:   {original_size:.2f} MB")
    print(f"üóúÔ∏è  Compressed: {compressed_size:.2f} MB")
    print(f"üíæ Saved:      {compression_ratio:.1f}%")
    print(f"üì• Download: {zip_file}")
else:
    print("‚ùå Model file not found!")

print("="*60)

COMPRESSING MODEL
‚úÖ Model compressed successfully!
üì¶ Original:   142.10 MB
üóúÔ∏è  Compressed: 126.24 MB
üíæ Saved:      11.2%
üì• Download: /kaggle/working/best_model_citizen100_87pct.zip
