## Installation

In [None]:
# !pip install -U transformers accelerate torch opencv-python pillow av scikit-learn
!pip install av

## Imports

In [None]:
import wandb
import os
import cv2
import av
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
from tqdm import tqdm
import pickle
import shutil
import warnings
warnings.filterwarnings('ignore')

In [None]:
from transformers import AutoProcessor, AutoModelForVision2Seq
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
from sklearn.model_selection import train_test_split

## WandB Setup

**Important**: Set your WandB API key before running the training cells.

In [None]:
import os

# Set your WandB API key here
WANDB_API_KEY = ""

if WANDB_API_KEY:
    os.environ["WANDB_API_KEY"] = WANDB_API_KEY
    wandb.login()
    print("‚úì WandB API key set and logged in")
else:
    print("‚ö† WARNING: WandB API key not set. Please update WANDB_API_KEY variable.")

# WandB configuration
WANDB_PROJECT = "daisee-embedding-classifier"  # Change this to your project name
WANDB_ENTITY = None  # Set to your WandB username/team if needed
WANDB_ENABLED = True  # Set to False to disable WandB tracking

## Configuration

In [None]:
# Paths - DAiSEE Dataset
BASE_PATH = "/kaggle/input/daisee/DAiSEE"
TRAIN_DATA_PATH = f"{BASE_PATH}/DataSet/Train"
VAL_DATA_PATH = f"{BASE_PATH}/DataSet/Validation"
TEST_DATA_PATH = f"{BASE_PATH}/DataSet/Test"

TRAIN_LABELS_PATH = f"{BASE_PATH}/Labels/TrainLabels.csv"
VAL_LABELS_PATH = f"{BASE_PATH}/Labels/ValidationLabels.csv"
TEST_LABELS_PATH = f"{BASE_PATH}/Labels/TestLabels.csv"

# Paths - Facial Data (for addressing class imbalance)
# UPDATE THIS PATH to your facial data input directory
FACIAL_DATA_PATH = "/kaggle/input/facial-data-mendeley"
FACIAL_DATA_ENABLED = True  # Set to True to use facial data augmentation

# Input path for existing embeddings (moved from previous run)
EXISTING_EMBEDDINGS_DIR = Path("/kaggle/input/qwen-daisee-embeddings/embeddings")
EXISTING_FACIAL_EMBEDDINGS_DIR = Path("/kaggle/input/facial-data-embeddings/facial_embeddings")

# Output paths
EMBEDDINGS_DIR = Path("/kaggle/working/embeddings")
EMBEDDINGS_DIR.mkdir(exist_ok=True)

MODEL_DIR = Path("/kaggle/working/models")
MODEL_DIR.mkdir(exist_ok=True)

# Model configuration
MODEL_NAME = "Qwen/Qwen2.5-VL-7B-Instruct"
FPS = 1  # Sample 1 frame per second (same as inference notebook)

# Label categories
CATEGORIES = ["Boredom", "Engagement", "Confusion", "Frustration "]
NUM_CLASSES = 4  # Levels 0-3

TRAIN_TRANSFORMER = False

# Training configuration
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
NUM_EPOCHS = 50
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

HIDDEN_DIM = 512
DROPOUT = 0.3

# ============================================================================
# WORKFLOW CONFIGURATION
# ============================================================================
SKIP_EXTRACTION = False  # Set to True to skip ALL extraction and only train
                        # Set to False to enable extraction (configure what to extract below)

# Extraction targets (only used when SKIP_EXTRACTION = False)
EXTRACT_DAISEE = False      # Extract DAiSEE video embeddings
EXTRACT_FACIAL = True     # Extract facial data embeddings (requires FACIAL_DATA_ENABLED = True)

# ============================================================================
# FACIAL EMBEDDINGS DIRECTORY CONFIGURATION
# ============================================================================
# Automatically configure facial embeddings directory based on extraction mode
if FACIAL_DATA_ENABLED:
    if not SKIP_EXTRACTION and EXTRACT_FACIAL:
        # Extracting: use working directory for output
        FACIAL_EMBEDDINGS_DIR = Path("/kaggle/working/facial_embeddings")
        FACIAL_EMBEDDINGS_DIR.mkdir(exist_ok=True)
    else:
        # Not extracting: use existing embeddings from input
        # Check if input directory exists, otherwise fall back to working directory
        if EXISTING_FACIAL_EMBEDDINGS_DIR.exists():
            FACIAL_EMBEDDINGS_DIR = EXISTING_FACIAL_EMBEDDINGS_DIR
        else:
            # Input directory doesn't exist - use working directory as fallback
            print(f"‚ö† WARNING: EXISTING_FACIAL_EMBEDDINGS_DIR not found: {EXISTING_FACIAL_EMBEDDINGS_DIR}")
            print(f"  Falling back to working directory")
            FACIAL_EMBEDDINGS_DIR = Path("/kaggle/working/facial_embeddings")
            FACIAL_EMBEDDINGS_DIR.mkdir(exist_ok=True)
else:
    FACIAL_EMBEDDINGS_DIR = None

# ============================================================================
# CONFIGURATION OUTPUT
# ============================================================================
print(f"Device: {DEVICE}")
print(f"Workflow Mode: {'TRAINING ONLY' if SKIP_EXTRACTION else 'EXTRACTION + TRAINING'}")
print(f"Facial Data Augmentation: {'ENABLED' if FACIAL_DATA_ENABLED else 'DISABLED'}")

if not SKIP_EXTRACTION:
    print(f"\nExtraction Targets:")
    print(f"  - DAiSEE videos: {'YES' if EXTRACT_DAISEE else 'NO'}")
    print(f"  - Facial data: {'YES' if EXTRACT_FACIAL and FACIAL_DATA_ENABLED else 'NO' if FACIAL_DATA_ENABLED else 'N/A (disabled)'}")

print(f"\nDAiSEE Embeddings input: {EXISTING_EMBEDDINGS_DIR}")
if FACIAL_DATA_ENABLED:
    print(f"Facial Data input: {FACIAL_DATA_PATH}")
    if not SKIP_EXTRACTION and EXTRACT_FACIAL:
        print(f"Facial Embeddings output: {FACIAL_EMBEDDINGS_DIR}")
    else:
        print(f"Facial Embeddings input: {FACIAL_EMBEDDINGS_DIR}")
print(f"New DAiSEE embeddings output: {EMBEDDINGS_DIR}")
print(f"Models will be saved to: {MODEL_DIR}")

# Check if existing embeddings directory exists
if EXISTING_EMBEDDINGS_DIR.exists():
    print(f"\n‚úì Found existing DAiSEE embeddings directory")
    for split in ['train', 'validation', 'test']:
        split_dir = EXISTING_EMBEDDINGS_DIR / split
        if split_dir.exists():
            num_files = len(list(split_dir.glob('*.npy')))
            embedding_map_exists = (split_dir / 'embedding_map.pkl').exists()
            print(f"  - {split}: {num_files} embedding files, map: {'‚úì' if embedding_map_exists else '‚úó'}")
    
    if SKIP_EXTRACTION:
        print(f"\n‚ö† SKIP_EXTRACTION=True: Will use existing embeddings for training")
        print(f"  Make sure all splits have embedding_map.pkl files!")
else:
    if SKIP_EXTRACTION:
        print(f"\n‚ö† WARNING: SKIP_EXTRACTION=True but no existing embeddings found!")
        print(f"  Please set SKIP_EXTRACTION=False to extract embeddings first")
    else:
        print(f"\n‚ö† Existing embeddings directory not found, will extract all from scratch")
# Check facial data if enabled
if FACIAL_DATA_ENABLED:
    facial_data_path = Path(FACIAL_DATA_PATH)
    if facial_data_path.exists():
        print(f"\n‚úì Facial data directory found")
    else:
        print(f"\n‚ö† WARNING: Facial data directory not found at {FACIAL_DATA_PATH}")
        print(f"  Please update FACIAL_DATA_PATH or set FACIAL_DATA_ENABLED=False")
    
    # Check facial embeddings directory
    if SKIP_EXTRACTION or not EXTRACT_FACIAL:
        if FACIAL_EMBEDDINGS_DIR and FACIAL_EMBEDDINGS_DIR.exists():
            print(f"‚úì Existing facial embeddings directory found")
            for category in ['boredom', 'confusion', 'engagement', 'neutral', 'surprise']:
                cat_dir = FACIAL_EMBEDDINGS_DIR / category
                if cat_dir.exists():
                    num_files = len(list(cat_dir.glob('*.npy')))
                    map_exists = (cat_dir / 'embedding_map.pkl').exists()
                    if num_files > 0 or map_exists:
                        print(f"  - {category}: {num_files} files, map: {'‚úì' if map_exists else '‚úó'}")
        else:
            print(f"‚ö† WARNING: Facial embeddings directory not found at {FACIAL_EMBEDDINGS_DIR}")
            print(f"  Please update EXISTING_FACIAL_EMBEDDINGS_DIR or set EXTRACT_FACIAL=True")

## Stage 1: Feature Extraction

### Load Labels

In [None]:
# Load all label files from DAiSEE
train_df = pd.read_csv(TRAIN_LABELS_PATH)
val_df = pd.read_csv(VAL_LABELS_PATH)
test_df = pd.read_csv(TEST_LABELS_PATH)

print("DAiSEE Dataset:")
print(f"Train samples: {len(train_df)}")
print(f"Validation samples: {len(val_df)}")
print(f"Test samples: {len(test_df)}")
print(f"Total: {len(train_df) + len(val_df) + len(test_df)}")

# Analyze class distribution in DAiSEE
print("\n" + "="*80)
print("DAiSEE CLASS DISTRIBUTION")
print("="*80)
for category in CATEGORIES:
    category_clean = category.strip()
    print(f"\n{category_clean}:")
    train_dist = train_df[category].value_counts().sort_index()
    for level in range(NUM_CLASSES):
        count = train_dist.get(level, 0)
        pct = (count / len(train_df)) * 100
        print(f"  Level {level}: {count:4d} samples ({pct:5.2f}%)")
print("="*80)

### Load Facial Data (Optional - for Class Imbalance)

In [None]:
facial_data_dfs = {}

if FACIAL_DATA_ENABLED:
    print("\n" + "="*80)
    print("LOADING FACIAL DATA FOR CLASS IMBALANCE AUGMENTATION")
    print("="*80)
    
    facial_data_path = Path(FACIAL_DATA_PATH)
    
    # Expected structure: FACIAL_DATA_PATH contains CSV files for each emotion
    # e.g., boring.csv, confused.csv, happiness.csv (for engagement), neutral.csv, surprise.csv
    # Each CSV should have columns: image_path, label (0-3 for intensity levels)
    
    emotion_mapping = {
        'Boredom': ['boring.csv', 'boredom.csv'],
        'Engagement': ['happiness.csv', 'happy.csv', 'engaged.csv'],
        'Confusion': ['confused.csv', 'confusion.csv'],
        'Frustration': ['surprise.csv', 'frustrated.csv', 'frustration.csv']
    }
    
    for category in CATEGORIES:
        category_clean = category.strip()
        
        # Try to find matching CSV file
        csv_found = None
        for possible_name in emotion_mapping.get(category_clean, []):
            csv_path = facial_data_path / possible_name
            if csv_path.exists():
                csv_found = csv_path
                break
        
        if csv_found:
            try:
                # Read CSV without headers (raw pixel data)
                df = pd.read_csv(csv_found, header=None)
                
                # Add metadata columns
                # image_id: unique identifier for each image (row index)
                # emotion: the category this CSV represents
                # label: intensity level (assign all to level 2 for moderate intensity)
                df.insert(0, 'image_id', range(len(df)))
                df.insert(1, 'emotion', category_clean)
                df.insert(2, 'label', 2)  # Assign all facial data to intensity level 2
                
                facial_data_dfs[category_clean] = df
                
                print(f"\n‚úì Loaded {category_clean} from {csv_found.name}")
                print(f"  Total samples: {len(df)}")
                print(f"  Image columns: {len(df.columns) - 3}")  # Subtract metadata columns
                
            except Exception as e:
                print(f"\n‚úó Error loading {category_clean}: {e}")
                import traceback
                traceback.print_exc()
        else:
            print(f"\n‚ö† No facial data found for {category_clean}")
            print(f"  Tried: {emotion_mapping.get(category_clean, [])}")
    
    print("\n" + "="*80)
    print(f"Loaded facial data for {len(facial_data_dfs)}/{len(CATEGORIES)} categories")
    print("="*80)
else:
    print("\nFacial data augmentation disabled (FACIAL_DATA_ENABLED=False)")
    print("Using DAiSEE dataset only")

### Video Processing Utilities

In [None]:
def find_video_path(clip_id, data_path):
    """
    Find the full path to a video file given its ClipID.
    Structure: DataSet/Split/Subject/VideoFolder/VideoFile.avi
    """
    data_path = Path(data_path)
    clip_name = clip_id.replace('.avi', '')
    subject_id = clip_name[:6]
    video_path = data_path / subject_id / clip_name / clip_id
    return video_path


# Test video path
test_clip = train_df.iloc[0]['ClipID']
test_path = find_video_path(test_clip, TRAIN_DATA_PATH)
print(f"Test video: {test_clip}")
print(f"Path exists: {test_path.exists()}")

### Load Qwen2.5-VL Model for Embedding Extraction

**Note**: Only needed if `SKIP_EXTRACTION = False`

In [None]:
if not SKIP_EXTRACTION and (EXTRACT_DAISEE or EXTRACT_FACIAL):
    print("Loading Qwen2.5-VL model...")
    processor = AutoProcessor.from_pretrained(MODEL_NAME)
    vision_model = AutoModelForVision2Seq.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        device_map="auto" if torch.cuda.is_available() else None
    )
    vision_model.eval()
    print(f"Model loaded on {DEVICE}")
else:
    if SKIP_EXTRACTION:
        print("Skipping model loading (SKIP_EXTRACTION=True)")
    else:
        print("Skipping model loading (no extraction targets enabled)")
    processor = None
    vision_model = None

### Extract Embeddings Function

**Note**: Only needed if `SKIP_EXTRACTION = False`

In [None]:
if not SKIP_EXTRACTION:
    def extract_video_embeddings(video_path, processor, model, fps=1):
        """
        Extract embeddings from video using Qwen model.
        Uses the same video processing approach as inference notebook with fps parameter.
        
        Args:
            video_path: Path to the video file
            processor: Qwen processor
            model: Qwen model
            fps: Frames per second for temporal sampling (same as inference notebook)
        
        Returns:
            numpy array of embeddings with shape (num_frames, embedding_dim)
        """
        try:
            # Create a dummy message structure to use apply_chat_template for video processing
            # This ensures fps parameter is handled correctly like in inference notebook
            messages = [
                {
                    "role": "user",
                    "content": [
                        {"type": "video", "video": str(video_path)},
                        {"type": "text", "text": "Analyze this video."}
                    ]
                }
            ]
            
            # Process video with fps parameter (same as inference notebook)
            with torch.no_grad():
                inputs = processor.apply_chat_template(
                    messages,
                    fps=fps,
                    add_generation_prompt=True,
                    tokenize=True,
                    return_dict=True,
                    return_tensors="pt",
                )
                
                # Move to device
                inputs = {k: v.to(model.device) for k, v in inputs.items()}
                
                # Forward pass through model to get hidden states
                outputs = model(**inputs, output_hidden_states=True)
                
                # Extract embeddings from the last hidden state
                # Shape: (batch_size, sequence_length, hidden_dim)
                hidden_states = outputs.hidden_states[-1]
                
                # Remove batch dimension but keep temporal structure
                # This gives us (sequence_length, hidden_dim) for temporal modeling
                embeddings = hidden_states.squeeze(0)
                
                # Convert to CPU numpy
                embeddings = embeddings.cpu().numpy()
            
            return embeddings
            
        except Exception as e:
            print(f"Error extracting embeddings from {video_path}: {e}")
            import traceback
            traceback.print_exc()
            return None


    # Test embedding extraction
    if test_path.exists():
        print("\nTesting embedding extraction...")
        test_embeddings = extract_video_embeddings(test_path, processor, vision_model, fps=FPS)
        if test_embeddings is not None:
            print(f"Embeddings shape: {test_embeddings.shape}")
            print(f"Number of frames: {test_embeddings.shape[0]}")
            print(f"Embedding dimension: {test_embeddings.shape[1]}")
        else:
            print("Failed to extract embeddings")
else:
    print("Skipping embedding extraction function (SKIP_EXTRACTION=True)")

# Extract and Save Embeddings for All Videos

In [None]:
def extract_facial_embeddings(facial_data_dfs, processor, model):
    """
    Extract embeddings from facial data CSV files containing raw pixel arrays.
    
    Args:
        facial_data_dfs: Dictionary of DataFrames with pixel data
        processor: Qwen processor
        model: Qwen vision model
    
    Returns:
        Dictionary mapping category -> (image_id -> embedding_path)
    """
    if not facial_data_dfs:
        print("No facial data to process")
        return {}
    
    print("\n" + "="*80)
    print("EXTRACTING FACIAL DATA EMBEDDINGS")
    print("="*80)
    
    facial_embedding_maps = {}
    
    for category, df in facial_data_dfs.items():
        print(f"\nProcessing {category} ({len(df):,} images)...")
        
        category_dir = FACIAL_EMBEDDINGS_DIR / category.lower()
        category_dir.mkdir(exist_ok=True)
        
        embedding_map = {}
        
        for idx, row in tqdm(df.iterrows(), total=len(df), desc=category):
            try:
                # Extract pixel values (all columns except metadata: image_id, emotion, label)
                # Pixel columns start from index 3 onwards
                pixels = row.iloc[3:].values.astype(np.uint8)
                
                # Verify we have the right number of pixels
                if len(pixels) != 256 * 256 * 3:
                    print(f"\nWarning: Image {row['image_id']} has {len(pixels)} pixels, expected {256*256*3}")
                    continue
                
                # Reshape to 256x256x3 RGB image
                image = pixels.reshape(256, 256, 3)
                
                # Convert to PIL Image for processor
                from PIL import Image
                pil_image = Image.fromarray(image)
                
                # Save image temporarily (Qwen2.5-VL requires file path or URL)
                temp_image_path = FACIAL_EMBEDDINGS_DIR / f"temp_{idx}.jpg"
                pil_image.save(temp_image_path)
                
                try:
                    # Process with Qwen using message-based API (same as video extraction)
                    messages = [
                        {
                            "role": "user",
                            "content": [
                                {"type": "image", "image": str(temp_image_path)},
                                {"type": "text", "text": "Analyze this image."}
                            ]
                        }
                    ]
                    
                    # Extract embeddings
                    with torch.no_grad():
                        inputs = processor.apply_chat_template(
                            messages,
                            add_generation_prompt=True,
                            tokenize=True,
                            return_dict=True,
                            return_tensors="pt",
                        )
                        
                        # Move to device
                        inputs = {k: v.to(model.device) for k, v in inputs.items()}
                        
                        # Forward pass
                        outputs = model(**inputs, output_hidden_states=True)
                        hidden_states = outputs.hidden_states[-1]
                        
                        # Global average pooling over spatial dimensions
                        # Shape: (batch, seq_len, hidden_dim) -> (hidden_dim,)
                        embedding = hidden_states.mean(dim=1).squeeze(0)
                        embedding = embedding.cpu().numpy()
                finally:
                    # Clean up temporary image
                    if temp_image_path.exists():
                        temp_image_path.unlink()
                
                # Save embedding
                image_id = row['image_id']
                save_path = category_dir / f"{image_id}.npy"
                np.save(save_path, embedding)
                
                embedding_map[image_id] = str(save_path)
                
                # Clear cache periodically
                if (idx + 1) % 50 == 0:
                    torch.cuda.empty_cache()
                    
            except Exception as e:
                print(f"\nError processing {category} image {idx}: {e}")
                continue
        
        facial_embedding_maps[category] = embedding_map
        print(f"‚úì Extracted {len(embedding_map):,} embeddings for {category}")
        
        # Save embedding map
        map_path = category_dir / "embedding_map.pkl"
        with open(map_path, 'wb') as f:
            pickle.dump(embedding_map, f)
        print(f"  Saved to {category_dir}")
    
    print("\n" + "="*80)
    print(f"Facial embeddings extraction complete")
    print(f"Total categories: {len(facial_embedding_maps)}")
    total_embeddings = sum(len(m) for m in facial_embedding_maps.values())
    print(f"Total embeddings: {total_embeddings:,}")
    print("="*80)
    
    return facial_embedding_maps


# Extract facial embeddings if enabled
facial_embedding_maps = {}

if FACIAL_DATA_ENABLED and not SKIP_EXTRACTION and EXTRACT_FACIAL and facial_data_dfs:
    if processor is not None and vision_model is not None:
        facial_embedding_maps = extract_facial_embeddings(facial_data_dfs, processor, vision_model)
    else:
        print("\n‚ö† WARNING: Cannot extract facial embeddings - model not loaded")
        print("  Set SKIP_EXTRACTION=False and EXTRACT_FACIAL=True to extract embeddings")
elif FACIAL_DATA_ENABLED and (SKIP_EXTRACTION or not EXTRACT_FACIAL):
    # Load existing facial embeddings
    print("\n" + "="*80)
    print("LOADING EXISTING FACIAL EMBEDDINGS")
    print("="*80)
    
    for category in facial_data_dfs.keys():
        category_dir = FACIAL_EMBEDDINGS_DIR / category.lower()
        map_path = category_dir / "embedding_map.pkl"
        
        if map_path.exists():
            try:
                with open(map_path, 'rb') as f:
                    embedding_map = pickle.load(f)
                facial_embedding_maps[category] = embedding_map
                print(f"‚úì Loaded {len(embedding_map):,} embeddings for {category}")
            except Exception as e:
                print(f"‚ö† Error loading {category} embeddings: {e}")
        else:
            print(f"‚ö† No embedding map found for {category} at {map_path}")
    
    if facial_embedding_maps:
        total = sum(len(m) for m in facial_embedding_maps.values())
        print(f"\nTotal facial embeddings loaded: {total:,}")
    print("="*80)
else:
    if not FACIAL_DATA_ENABLED:
        print("\nSkipping facial embeddings (FACIAL_DATA_ENABLED=False)")
    elif not EXTRACT_FACIAL:
        print("\nSkipping facial embeddings (EXTRACT_FACIAL=False)")
    else:
        print("\nSkipping facial embeddings (no data loaded)")

### Extract Facial Data Embeddings

**Note**: Only needed if `FACIAL_DATA_ENABLED = True` and `SKIP_EXTRACTION = False`

In [None]:
def extract_and_save_embeddings(df, data_path, split_name):
    """
    Extract embeddings for all videos in a dataset split and save to disk.
    Resumes from existing embeddings if available.
    
    Args:
        df: DataFrame with video labels
        data_path: Path to video data
        split_name: Name of split (train/val/test)
    
    Returns:
        Dictionary mapping ClipID to embedding filepath
    """
    # Check for existing embeddings
    existing_split_dir = EXISTING_EMBEDDINGS_DIR / split_name
    output_split_dir = EMBEDDINGS_DIR / split_name
    output_split_dir.mkdir(exist_ok=True)
    
    embedding_map = {}
    failed_videos = []
    skipped_count = 0
    
    # Load existing embedding map if available
    existing_map_path = existing_split_dir / "embedding_map.pkl" if existing_split_dir.exists() else None
    if existing_map_path and existing_map_path.exists():
        try:
            with open(existing_map_path, 'rb') as f:
                existing_map = pickle.load(f)
            print(f"‚úì Loaded existing embedding map with {len(existing_map)} entries")
            
            # Copy existing embeddings to output directory and update paths
            for clip_id, old_path in existing_map.items():
                old_path = Path(old_path)
                if old_path.exists():
                    # Copy to output directory
                    new_path = output_split_dir / old_path.name
                    if not new_path.exists():
                        shutil.copy2(old_path, new_path)
                    embedding_map[clip_id] = str(new_path)
                    skipped_count += 1
            
            print(f"‚úì Copied {skipped_count} existing embeddings to working directory")
        except Exception as e:
            print(f"‚ö† Error loading existing embeddings: {e}")
            print(f"  Will extract all embeddings from scratch")
    
    print(f"\nExtracting embeddings for {split_name} set ({len(df)} videos)...")
    if skipped_count > 0:
        print(f"Resuming from {skipped_count} existing embeddings")
    
    extracted_count = 0
    
    for idx, row in tqdm(df.iterrows(), total=len(df)):
        clip_id = row['ClipID']
        
        # Skip if already in embedding map
        if clip_id in embedding_map:
            continue
        
        video_path = find_video_path(clip_id, data_path)
        
        if not video_path.exists():
            failed_videos.append(clip_id)
            continue
        
        # Extract embeddings directly from video (using fps parameter like inference notebook)
        try:
            embeddings = extract_video_embeddings(video_path, processor, vision_model, fps=FPS)
            
            if embeddings is None:
                failed_videos.append(clip_id)
                continue
            
            # Save embeddings
            save_path = output_split_dir / f"{clip_id.replace('.avi', '')}.npy"
            np.save(save_path, embeddings)
            
            embedding_map[clip_id] = str(save_path)
            extracted_count += 1
            
        except Exception as e:
            print(f"\nError processing {clip_id}: {e}")
            failed_videos.append(clip_id)
        
        # Clear memory periodically
        if (extracted_count + 1) % 50 == 0:
            torch.cuda.empty_cache()
    
    total_embeddings = len(embedding_map)
    print(f"\n‚úì Total embeddings: {total_embeddings}/{len(df)} videos")
    print(f"  - Existing: {skipped_count}")
    print(f"  - Newly extracted: {extracted_count}")
    if failed_videos:
        print(f"  - Failed: {len(failed_videos)} videos")
    
    # Save embedding map
    map_path = output_split_dir / "embedding_map.pkl"
    with open(map_path, 'wb') as f:
        pickle.dump(embedding_map, f)
    print(f"‚úì Embedding map saved to {map_path}")
    
    return embedding_map

In [None]:
if SKIP_EXTRACTION or not EXTRACT_DAISEE:
    # Skip extraction and copy existing embeddings to working directory
    print("="*80)
    if SKIP_EXTRACTION:
        print("SKIPPING EXTRACTION - USING EXISTING EMBEDDINGS")
    else:
        print("SKIPPING DAISEE EXTRACTION - USING EXISTING EMBEDDINGS")
    print("="*80)
    
    # Copy existing embeddings to working directory
    for split, data_path in [
        ('train', TRAIN_DATA_PATH),
        ('validation', VAL_DATA_PATH),
        ('test', TEST_DATA_PATH)
    ]:
        existing_split_dir = EXISTING_EMBEDDINGS_DIR / split
        output_split_dir = EMBEDDINGS_DIR / split
        output_split_dir.mkdir(exist_ok=True)
        
        if not existing_split_dir.exists():
            print(f"\n‚úó {split}: Existing embeddings not found at {existing_split_dir}")
            continue
        
        # Load existing embedding map
        existing_map_path = existing_split_dir / "embedding_map.pkl"
        if not existing_map_path.exists():
            print(f"\n‚úó {split}: No embedding map found at {existing_map_path}")
            continue
        
        with open(existing_map_path, 'rb') as f:
            existing_map = pickle.load(f)
        
        print(f"\n{split.upper()}: Processing {len(existing_map)} embeddings...")
        
        # Copy embeddings and create new map
        new_map = {}
        copied_count = 0
        skipped_count = 0
        
        for clip_id, old_path in tqdm(existing_map.items(), desc=f"Copying {split}"):
            old_path = Path(old_path)
            new_path = output_split_dir / old_path.name
            
            # Check if file already exists in working directory
            if new_path.exists():
                new_map[clip_id] = str(new_path)
                skipped_count += 1
                continue
            
            # Try to find the source file
            source_path = None
            
            # First, try the path from the map (might be input or previous working dir)
            if old_path.exists():
                source_path = old_path
            # If not found, try to construct path from input directory
            elif existing_split_dir.exists():
                potential_source = existing_split_dir / old_path.name
                if potential_source.exists():
                    source_path = potential_source
            
            # Copy file if source found
            if source_path:
                shutil.copy2(source_path, new_path)
                new_map[clip_id] = str(new_path)
                copied_count += 1
            else:
                # File not found anywhere - skip this embedding
                if idx == 0:  # Only warn for first few to avoid spam
                    print(f"\n  ‚ö† Warning: Embedding not found for {clip_id}")
                    print(f"     Tried: {old_path} and {existing_split_dir / old_path.name if existing_split_dir.exists() else 'N/A'}")
        
        # Save new embedding map
        new_map_path = output_split_dir / "embedding_map.pkl"
        with open(new_map_path, 'wb') as f:
            pickle.dump(new_map, f)
        
        print(f"  ‚úì Total embeddings in map: {len(new_map)}")
        print(f"  ‚úì Newly copied: {copied_count}")
        print(f"  ‚úì Already existed: {skipped_count}")
        print(f"  ‚úì Saved map to {new_map_path}")
    
    print("\n" + "="*80)
elif EXTRACT_DAISEE:
    # Run DAiSEE extraction

    print("DAISEE EMBEDDING EXTRACTION")
    # Run extraction
    print("="*80)
    print("EMBEDDING EXTRACTION")
    print("="*80)

    train_embedding_map = extract_and_save_embeddings(train_df, TRAIN_DATA_PATH, 'train')
    val_embedding_map = extract_and_save_embeddings(val_df, VAL_DATA_PATH, 'validation')
    print("\nDAISEE EXTRACTION COMPLETE")
    print("\n" + "="*80)
    print("EXTRACTION COMPLETE")
    print("="*80)
else:
    print("\n‚ö† No DAiSEE extraction performed (EXTRACT_DAISEE=False)")
    print("  Ensure existing embeddings are available for training")

### Free GPU Memory (optional - can restart kernel if needed)

In [None]:
if not SKIP_EXTRACTION:
    # Free up memory by deleting the large vision model
    del vision_model
    del processor
    torch.cuda.empty_cache()
    print("Vision model removed from memory")
else:
    print("No vision model to remove (was never loaded)")

## Stage 2: Classifier Training

### Dataset Class for Embeddings

In [None]:
class EmbeddingDataset(Dataset):
    """
    Dataset for loading pre-computed embeddings.
    Supports both DAiSEE video embeddings and facial data image embeddings.
    """
    def __init__(self, df, embedding_map, category, facial_data_df=None, facial_embedding_map=None):
        """
        Args:
            df: DataFrame with labels (DAiSEE)
            embedding_map: Dictionary mapping ClipID to embedding filepath (DAiSEE)
            category: Category to predict (e.g., 'Boredom')
            facial_data_df: Optional DataFrame with facial pixel data (columns: image_id, emotion, label, ...)
            facial_embedding_map: Optional dictionary mapping facial image_id to embedding paths
        """
        self.df = df
        self.embedding_map = embedding_map
        self.category = category
        self.facial_data_df = facial_data_df
        self.facial_embedding_map = facial_embedding_map
        
        # Filter DAiSEE to only include videos with embeddings
        self.valid_indices = [
            i for i in range(len(df)) 
            if df.iloc[i]['ClipID'] in embedding_map
        ]
        
        # Debug: Check if no valid samples found
        if len(self.valid_indices) == 0 and len(df) > 0:
            print(f"  ‚ö† WARNING: No embeddings found for any videos!")
            print(f"     DataFrame has {len(df)} rows")
            print(f"     Embedding map has {len(embedding_map)} entries")
            if len(df) > 0 and len(embedding_map) > 0:
                print(f"     Sample ClipID from DataFrame: {df.iloc[0]['ClipID']}")
                print(f"     Sample key from embedding map: {list(embedding_map.keys())[0]}")
        
        # Add facial data indices if available
        self.facial_indices = []
        if facial_data_df is not None and facial_embedding_map is not None:
            # Filter facial data: only include samples with matching emotion and valid embeddings
            for i in range(len(facial_data_df)):
                row = facial_data_df.iloc[i]
                
                # Check if this image belongs to the current category
                if row.get('emotion') == category:
                    img_id = row['image_id']
                    
                    # Check if embedding exists
                    if img_id in facial_embedding_map:
                        self.facial_indices.append(i)
        
        total_samples = len(self.valid_indices) + len(self.facial_indices)
        print(f"Dataset: {total_samples} total samples for {category}")
        print(f"  - DAiSEE: {len(self.valid_indices)} samples")
        if self.facial_indices:
            print(f"  - Facial Data: {len(self.facial_indices)} samples")
    
    def __len__(self):
        return len(self.valid_indices) + len(self.facial_indices)
    
    def __getitem__(self, idx):
        # Determine if this is a DAiSEE or Facial data sample
        if idx < len(self.valid_indices):
            # DAiSEE sample
            actual_idx = self.valid_indices[idx]
            row = self.df.iloc[actual_idx]
            
            clip_id = row['ClipID']
            embedding_path = self.embedding_map[clip_id]
            
            # Load embedding
            embedding = np.load(embedding_path)
            
            # Get label
            label = int(row[self.category])
        else:
            # Facial data sample
            facial_idx = idx - len(self.valid_indices)
            actual_facial_idx = self.facial_indices[facial_idx]
            row = self.facial_data_df.iloc[actual_facial_idx]
            
            # Load embedding using image_id
            img_id = row['image_id']
            embedding_path = self.facial_embedding_map[img_id]
            
            # Load embedding
            embedding = np.load(embedding_path)
            
            # Get label (assigned during loading, typically level 2)
            label = int(row['label'])
        
        # Convert to tensors
        embedding = torch.FloatTensor(embedding)
        label = torch.LongTensor([label])[0]
        
        return embedding, label

### Classifier Architectures

#### 1. Simple MLP Classifier

In [None]:
class MLPClassifier(nn.Module):
    """
    Simple Multi-Layer Perceptron classifier.
    Works with pooled (1D) embeddings from videos or single frames.
    """
    def __init__(self, input_dim, hidden_dim=HIDDEN_DIM, num_classes=4, dropout=DROPOUT):
        super(MLPClassifier, self).__init__()
        
        self.classifier = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, num_classes)
        )
    
    def forward(self, x):
        # x shape: (batch, embedding_dim) - pre-pooled embeddings
        # No temporal dimension since embeddings are already averaged over video frames
        logits = self.classifier(x)
        return logits

#### 2. Transformer Encoder Classifier

In [None]:
class TransformerClassifier(nn.Module):
    """
    NOTE: This classifier is designed for temporal (video) data with multiple frames.
    Since we're using pooled embeddings (one vector per video), this is simplified
    to work like MLP but kept for compatibility.
    
    For true temporal modeling, use unpooled embeddings with shape (num_frames, embedding_dim).
    """
    def __init__(self, input_dim, num_heads=8, num_layers=2, hidden_dim=512, num_classes=4, dropout=0.3):
        super(TransformerClassifier, self).__init__()
        
        # Since we have pooled embeddings, this becomes a simple MLP with extra layers
        self.classifier = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, num_classes)
        )
    
    def forward(self, x):
        # x shape: (batch, embedding_dim) - pre-pooled embeddings
        # No transformer needed since temporal information is already pooled
        logits = self.classifier(x)
        return logits

### Training Functions

In [None]:
def train_epoch(model, dataloader, criterion, optimizer, device):
    """
    Train for one epoch.
    """
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for embeddings, labels in dataloader:
        embeddings = embeddings.to(device)
        labels = labels.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(embeddings)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Statistics
        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    return total_loss / len(dataloader), 100. * correct / total


def evaluate(model, dataloader, criterion, device):
    """
    Evaluate model on validation/test set.
    """
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for embeddings, labels in dataloader:
            embeddings = embeddings.to(device)
            labels = labels.to(device)
            
            outputs = model(embeddings)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(
        all_labels, all_preds, average='macro', zero_division=0
    )
    
    return {
        'loss': total_loss / len(dataloader),
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'predictions': all_preds,
        'labels': all_labels
    }

### Train Classifiers for Each Category

In [None]:
def train_classifier(category, model_type='mlp'):
    """
    Train a classifier for a specific category.
    
    Args:
        category: Emotion category to predict
        model_type: 'mlp' or 'transformer'
    """
    print(f"\n{'='*80}")
    print(f"Training {model_type.upper()} classifier for {category}")
    print(f"{'='*80}")
    
    # Load DAiSEE embedding maps
    train_map_path = EMBEDDINGS_DIR / 'train' / 'embedding_map.pkl'
    val_map_path = EMBEDDINGS_DIR / 'validation' / 'embedding_map.pkl'
    test_map_path = EMBEDDINGS_DIR / 'test' / 'embedding_map.pkl'
    
    # Validate embedding maps exist
    if not train_map_path.exists():
        raise FileNotFoundError(f"Training embedding map not found: {train_map_path}")
    if not val_map_path.exists():
        raise FileNotFoundError(f"Validation embedding map not found: {val_map_path}")
    if not test_map_path.exists():
        raise FileNotFoundError(f"Test embedding map not found: {test_map_path}")
    
    with open(train_map_path, 'rb') as f:
        train_map = pickle.load(f)
    with open(val_map_path, 'rb') as f:
        val_map = pickle.load(f)
    with open(test_map_path, 'rb') as f:
        test_map = pickle.load(f)
    
    print(f"Loaded embedding maps:")
    print(f"  Train: {len(train_map)} embeddings")
    print(f"  Val: {len(val_map)} embeddings")
    print(f"  Test: {len(test_map)} embeddings")
    
    # Validate that embedding files actually exist
    missing_train = [k for k, v in train_map.items() if not Path(v).exists()]
    if missing_train:
        print(f"  ‚ö† WARNING: {len(missing_train)} train embeddings not found on disk!")
        if len(missing_train) <= 5:
            for k in missing_train[:5]:
                print(f"     Missing: {k} -> {train_map[k]}")
    
    # Load facial data embedding maps if available
    facial_train_map = None
    facial_train_df = None
    
    category_clean = category.strip()
    if FACIAL_DATA_ENABLED and category_clean in facial_data_dfs:
        if category_clean in facial_embedding_maps:
            facial_train_map = facial_embedding_maps[category_clean]
            facial_train_df = facial_data_dfs[category_clean]
            print(f"‚úì Using facial data augmentation for {category_clean}")
            print(f"  Facial samples: {len(facial_train_map):,}")
        else:
            print(f"‚ö† No facial embeddings found for {category_clean}")
    
    # Create datasets
    train_dataset = EmbeddingDataset(
        train_df, train_map, category,
        facial_data_df=facial_train_df,
        facial_embedding_map=facial_train_map
    )
    val_dataset = EmbeddingDataset(val_df, val_map, category)
    test_dataset = EmbeddingDataset(test_df, test_map, category)
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)
    
    # Get embedding dimension from first sample
    sample_embedding, _ = train_dataset[0]
    
    # Check if embeddings are 1D (pooled) or 2D (temporal)
    if sample_embedding.dim() == 1:
        embedding_dim = sample_embedding.shape[0]
        print(f"Using pooled embeddings (1D)")
        print(f"Embedding dimension: {embedding_dim}")
    else:
        embedding_dim = sample_embedding.shape[1]
        print(f"Using temporal embeddings (2D)")
        print(f"Shape: {sample_embedding.shape}")
        print(f"Embedding dimension: {embedding_dim}")
    
    # Create model
    if model_type == 'mlp':
        model = MLPClassifier(input_dim=embedding_dim, num_classes=NUM_CLASSES)
    else:
        model = TransformerClassifier(input_dim=embedding_dim, num_classes=NUM_CLASSES)
    
    model = model.to(DEVICE)
    print(f"Model parameters: {sum(p.numel() for p in model.parameters())/1e6:.2f}M")
    
    # Initialize WandB run
    if WANDB_ENABLED:
        wandb.init(
            project=WANDB_PROJECT,
            entity=WANDB_ENTITY,
            name=f"{model_type}_{category_clean}_{FACIAL_DATA_ENABLED}_{HIDDEN_DIM}_{DROPOUT}",
            config={
                "category": category_clean,
                "model_type": model_type,
                "batch_size": BATCH_SIZE,
                "learning_rate": LEARNING_RATE,
                "num_epochs": NUM_EPOCHS,
                "embedding_dim": embedding_dim,
                "num_classes": NUM_CLASSES,
                "facial_data_enabled": FACIAL_DATA_ENABLED,
                "hidden_dim": HIDDEN_DIM,
                "dropout": DROPOUT,
                "train_samples": len(train_dataset),
                "val_samples": len(val_dataset),
                "test_samples": len(test_dataset),
            },
            reinit=True
        )
        wandb.watch(model, log="all", log_freq=10)
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=5, factor=0.5)
    
    # Training loop
    best_val_f1 = 0
    best_epoch = 0
    
    print(f"\nTraining for {NUM_EPOCHS} epochs...")
    for epoch in range(NUM_EPOCHS):
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, DEVICE)
        val_metrics = evaluate(model, val_loader, criterion, DEVICE)
        
        scheduler.step(val_metrics['f1'])
        
        # Log metrics to WandB after every epoch
        if WANDB_ENABLED:
            wandb.log({
                "epoch": epoch + 1,
                "train_loss": train_loss,
                "train_accuracy": train_acc,
                "val_loss": val_metrics['loss'],
                "val_accuracy": val_metrics['accuracy'] * 100,
                "val_f1": val_metrics['f1'],
                "val_precision": val_metrics['precision'],
                "val_recall": val_metrics['recall'],
                "learning_rate": optimizer.param_groups[0]['lr'],
            })
        
        # Print progress every 10 epochs
        if (epoch + 1) % 10 == 0 or epoch == 0:
            print(f"Epoch {epoch+1}/{NUM_EPOCHS}:")
            print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
            print(f"  Val Loss: {val_metrics['loss']:.4f} | Val Acc: {val_metrics['accuracy']*100:.2f}%")
            print(f"  Val F1: {val_metrics['f1']:.4f} | Val Precision: {val_metrics['precision']:.4f}")
        
        # Save best model
        if val_metrics['f1'] > best_val_f1:
            best_val_f1 = val_metrics['f1']
            best_epoch = epoch + 1
            
            save_path = MODEL_DIR / f"{model_type}_{category_clean}_best.pth"
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_f1': best_val_f1,
            }, save_path)
    
    print(f"\nBest model from epoch {best_epoch} with Val F1: {best_val_f1:.4f}")
    
    # Load best model and evaluate on test set
    checkpoint = torch.load(MODEL_DIR / f"{model_type}_{category_clean}_best.pth", map_location='cpu', weights_only=False)
    model.load_state_dict(checkpoint['model_state_dict'])
    
    test_metrics = evaluate(model, test_loader, criterion, DEVICE)
    
    print(f"\nTest Set Results:")
    print(f"  Accuracy: {test_metrics['accuracy']*100:.2f}%")
    print(f"  Precision: {test_metrics['precision']:.4f}")
    print(f"  Recall: {test_metrics['recall']:.4f}")
    print(f"  F1 Score: {test_metrics['f1']:.4f}")
    
    # Confusion matrix
    cm = confusion_matrix(test_metrics['labels'], test_metrics['predictions'], labels=[0, 1, 2, 3])
    print(f"\nConfusion Matrix:")
    print(cm)
    
    return test_metrics

### Train MLP Classifiers

In [None]:
mlp_results = {}

for category in CATEGORIES:
    metrics = train_classifier(category, model_type='mlp')
    mlp_results[category] = metrics
    torch.cuda.empty_cache()

### Train Transformer Classifiers

In [None]:
if TRAIN_TRANSFORMER:
    transformer_results = {}
    
    for category in CATEGORIES:
        metrics = train_classifier(category, model_type='transformer')
        transformer_results[category] = metrics
        torch.cuda.empty_cache()

## Results Summary

In [None]:
# Create summary comparison
summary_data = []

for category in CATEGORIES:
    mlp_metrics = mlp_results[category]
    trans_metrics = transformer_results[category]
    
    summary_data.append({
        'Category': category.strip(),
        'MLP Accuracy': f"{mlp_metrics['accuracy']*100:.2f}%",
        'MLP F1': f"{mlp_metrics['f1']:.4f}",
        'Transformer Accuracy': f"{trans_metrics['accuracy']*100:.2f}%",
        'Transformer F1': f"{trans_metrics['f1']:.4f}"
    })

summary_df = pd.DataFrame(summary_data)

print("\n" + "="*100)
print("FINAL RESULTS COMPARISON")
print("="*100)
print(summary_df.to_string(index=False))
print("="*100)

# Save summary
summary_df.to_csv(MODEL_DIR / 'results_summary.csv', index=False)
print(f"\n‚úì Results saved to {MODEL_DIR / 'results_summary.csv'}")

## Key Findings

This two-stage approach offers several advantages:

1. **Efficiency**: Extract embeddings once, train multiple classifiers
2. **Speed**: Small classifiers train in seconds/minutes vs hours
3. **Experimentation**: Easy to try different architectures
4. **Memory**: Lower memory requirements during training

**Next Steps:**
- Try different pooling strategies (attention-based, LSTM)
- Experiment with class weights for imbalanced data
- Fine-tune the vision model end-to-end
- Ensemble multiple classifiers

## Export Models for Deployment

Package trained classifiers into a zip file for easy deployment to the server.

In [None]:
import shutil
import zipfile

# Paths
EXPORT_DIR = Path("/kaggle/working/export")
OUTPUT_ZIP = Path("/kaggle/working/deployment_models.zip")

def export_models_for_deployment():
    """Export trained classifiers for deployment"""
    
    if not MODEL_DIR.exists():
        print(f"Error: Models directory not found at {MODEL_DIR}")
        return
    
    # Create temporary export directory
    EXPORT_DIR.mkdir(exist_ok=True)
    classifiers_dir = EXPORT_DIR / "classifiers"
    classifiers_dir.mkdir(exist_ok=True)
    
    print("\n" + "="*80)
    print("EXPORTING MODELS FOR DEPLOYMENT")
    print("="*80)
    
    exported_count = 0
    
    for category in CATEGORIES:
        category_clean = category.strip()
        
        # Look for MLP classifier (best performing)
        checkpoint_path = MODEL_DIR / f"mlp_{category_clean}_best.pth"
        
        if checkpoint_path.exists():
            # Copy to export directory
            dest_path = classifiers_dir / f"mlp_{category_clean}_best.pth"
            shutil.copy2(checkpoint_path, dest_path)
            
            # Load and print info
            try:
                checkpoint = torch.load(checkpoint_path, map_location='cpu', weights_only=False)
            except Exception as e:
                print(f"Error loading checkpoint: {e}")
                continue
            val_f1 = checkpoint.get('val_f1', 'N/A')
            epoch = checkpoint.get('epoch', 'N/A')
            
            print(f"\n‚úì Exported {category_clean}:")
            print(f"  - Epoch: {epoch}")
            print(f"  - Val F1: {val_f1:.4f if isinstance(val_f1, float) else val_f1}")
            print(f"  - Size: {dest_path.stat().st_size / 1024:.1f} KB")
            
            exported_count += 1
        else:
            print(f"\n‚úó Not found: {category_clean}")
    
    print(f"\n{'='*80}")
    print(f"Exported {exported_count}/{len(CATEGORIES)} classifiers")
    print(f"{'='*80}")
    
    # Create README
    readme_path = EXPORT_DIR / "README.txt"
    with open(readme_path, 'w') as f:
        f.write("Trained Emotion Classifiers for Deployment\n")
        f.write("=" * 60 + "\n\n")
        f.write("Installation Instructions:\n")
        f.write("-" * 60 + "\n")
        f.write("1. Download this zip file from Kaggle\n")
        f.write("2. Extract the zip file\n")
        f.write("3. Copy 'classifiers/' folder to 'deployment/models/'\n")
        f.write("4. Start the backend server: python deployment/backend/main.py\n\n")
        f.write("Model Information:\n")
        f.write("-" * 60 + "\n")
        f.write(f"Exported: {exported_count} classifier models\n")
        f.write(f"Categories: {', '.join([c.strip() for c in CATEGORIES])}\n")
        f.write(f"Model type: MLP (Multi-Layer Perceptron)\n")
        f.write(f"Framework: PyTorch\n\n")
        f.write("Server Requirements:\n")
        f.write("-" * 60 + "\n")
        f.write("- GPU: RTX 4000 or equivalent (20GB VRAM recommended)\n")
        f.write("- Python: 3.8+\n")
        f.write("- PyTorch: 2.1.2+\n")
        f.write("- Transformers: 4.37.0+\n")
        f.write("- See deployment/backend/requirements.txt for full dependencies\n")
    
    # Create zip file
    print(f"\nCreating zip archive: {OUTPUT_ZIP}")
    with zipfile.ZipFile(OUTPUT_ZIP, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for file_path in EXPORT_DIR.rglob('*'):
            if file_path.is_file():
                arcname = file_path.relative_to(EXPORT_DIR)
                zipf.write(file_path, arcname)
                print(f"  ‚úì Added: {arcname}")
    
    archive_size_mb = OUTPUT_ZIP.stat().st_size / (1024**2)
    
    print(f"\n{'='*80}")
    print("EXPORT COMPLETE!")
    print(f"{'='*80}")
    print(f"‚úì Archive: {OUTPUT_ZIP}")
    print(f"‚úì Size: {archive_size_mb:.1f} MB")
    print(f"\nNext Steps:")
    print("1. Download 'deployment_models.zip' from Kaggle output")
    print("2. Extract to your local deployment folder")
    print("3. Follow deployment/README.md for server setup")
    print("="*80)
    
    # Cleanup temporary directory
    shutil.rmtree(EXPORT_DIR)
    
    return OUTPUT_ZIP

# Run export
export_zip = export_models_for_deployment()

---

## üìä Facial Data Integration Summary

### Dataset Overview

**Facial Data Structure:**
- **5,532 total images** across 5 emotion categories
- **Format:** 256√ó256 RGB images as flattened pixel arrays (196,608 values per row)
- **No headers/labels:** Emotion determined by CSV filename

**Distribution:**
```
Boredom      (boring.csv)    : 1,931 images
Confusion    (confused.csv)  : 1,177 images  
Engagement   (happiness.csv) :   593 images
Neutral      (neutral.csv)   : 1,612 images
Surprise     (surprise.csv)  :   219 images
```

### How It Addresses DAiSEE Imbalance

**DAiSEE Problem:** Very few high-intensity samples for Boredom, Confusion, Frustration

**Facial Data Solution:**
- Adds **1,931 Boredom** images (assigned level 2)
- Adds **1,177 Confusion** images (assigned level 2)
- Adds **593 Engagement** images (assigned level 2)
- Provides diverse facial expressions to improve classifier robustness

**Combined Training:**
- DAiSEE embeddings: Video-level temporal features
- Facial embeddings: Single-frame static features
- Both use same Qwen2.5-VL embedding space
- Unified training improves generalization

### Workflow Steps

1. **Enable Facial Data:**
   ```python
   FACIAL_DATA_ENABLED = True
   FACIAL_DATA_PATH = "/kaggle/input/facial-data"  # Update path
   ```

2. **Extract Embeddings:** (if `SKIP_EXTRACTION=False`)
   - Loads pixel arrays from CSV
   - Reshapes to 256√ó256√ó3 images
   - Extracts embeddings with Qwen2.5-VL
   - Saves to `FACIAL_EMBEDDINGS_DIR`

3. **Train with Augmentation:**
   - `EmbeddingDataset` combines DAiSEE + Facial samples
   - Classifier sees both video and image embeddings
   - Balanced training across intensity levels

### Notes

- **No Frustration data** available in facial dataset
- All facial samples assigned **intensity level 2** (moderate)
- Could implement stratified sampling for different intensity levels if needed
- Facial embeddings stored separately from DAiSEE embeddings

---

## üîß Bug Fixes Applied

### Issue: "Dataset: 0 valid samples" Error

**Problem:** The SKIP_EXTRACTION logic was copying embedding files but not properly tracking the copies, resulting in:
- Embedding maps showing correct counts
- But files not being accessible or paths being incorrect
- Dataset initialization finding 0 valid samples

**Fixes Applied:**

1. **Better Copy Tracking** (Cell after "SKIPPING EXTRACTION"):
   - Added `copied_count` and `skipped_count` counters
   - Properly updates `new_map` only when files exist
   - Shows detailed output: total embeddings, newly copied, already existed

2. **Embedding Map Validation** (train_classifier function):
   - Checks if embedding map files exist before loading
   - Validates that mapped embedding files actually exist on disk
   - Shows warnings for missing files with examples

3. **Dataset Debugging** (EmbeddingDataset.__init__):
   - Detects when 0 valid samples are found
   - Shows diagnostic info: DataFrame size, embedding map size, sample ClipIDs
   - Helps identify ClipID mismatches or path issues

**Expected Output Now:**
```
TRAIN: Processing 4852 embeddings...
  ‚úì Total embeddings in map: 4852
  ‚úì Newly copied: 4852 (or 0 if already existed)
  ‚úì Already existed: 0 (or 4852)
  ‚úì Saved map to /kaggle/working/embeddings/train/embedding_map.pkl
```

**If Error Persists:**
- Check that EXISTING_EMBEDDINGS_DIR path is correct
- Verify embedding files exist: `/kaggle/input/qwen-daisee-embeddings/embeddings/train/*.npy`
- Ensure ClipIDs in CSV match those in embedding_map.pkl

In [None]:
# Run this cell to diagnose embedding issues
print("="*80)
print("DIAGNOSTIC CHECK")
print("="*80)

# Check existing embeddings directory
print(f"\n1. Checking EXISTING_EMBEDDINGS_DIR: {EXISTING_EMBEDDINGS_DIR}")
if EXISTING_EMBEDDINGS_DIR.exists():
    print(f"   ‚úì Directory exists")
    for split in ['train', 'validation', 'test']:
        split_dir = EXISTING_EMBEDDINGS_DIR / split
        if split_dir.exists():
            npy_files = list(split_dir.glob('*.npy'))
            map_file = split_dir / 'embedding_map.pkl'
            print(f"   {split:12s}: {len(npy_files):5d} .npy files, map exists: {map_file.exists()}")
            
            if map_file.exists():
                with open(map_file, 'rb') as f:
                    emap = pickle.load(f)
                print(f"                Map has {len(emap)} entries")
                # Check if any files in map exist
                existing = sum(1 for p in emap.values() if Path(p).exists())
                print(f"                {existing}/{len(emap)} paths in map actually exist")
else:
    print(f"   ‚úó Directory NOT found")

# Check working embeddings directory
print(f"\n2. Checking EMBEDDINGS_DIR (working): {EMBEDDINGS_DIR}")
if EMBEDDINGS_DIR.exists():
    print(f"   ‚úì Directory exists")
    for split in ['train', 'validation', 'test']:
        split_dir = EMBEDDINGS_DIR / split
        if split_dir.exists():
            npy_files = list(split_dir.glob('*.npy'))
            map_file = split_dir / 'embedding_map.pkl'
            print(f"   {split:12s}: {len(npy_files):5d} .npy files, map exists: {map_file.exists()}")
            
            if map_file.exists():
                with open(map_file, 'rb') as f:
                    emap = pickle.load(f)
                print(f"                Map has {len(emap)} entries")
else:
    print(f"   ‚úó Directory NOT found (will be created)")

# Check label DataFrames
print(f"\n3. Checking Label DataFrames:")
print(f"   Train:      {len(train_df)} rows")
print(f"   Validation: {len(val_df)} rows")
print(f"   Test:       {len(test_df)} rows")
print(f"   Sample ClipID: {train_df.iloc[0]['ClipID']}")

# Check facial data if enabled
if FACIAL_DATA_ENABLED:
    print(f"\n4. Checking Facial Data:")
    print(f"   FACIAL_DATA_PATH: {FACIAL_DATA_PATH}")
    facial_path = Path(FACIAL_DATA_PATH)
    if facial_path.exists():
        print(f"   ‚úì Directory exists")
        csv_files = list(facial_path.glob('*.csv'))
        print(f"   Found {len(csv_files)} CSV files")
        for csv in csv_files[:5]:
            print(f"      - {csv.name}")
    else:
        print(f"   ‚úó Directory NOT found")
    
    print(f"\n   Loaded facial_data_dfs: {len(facial_data_dfs)} categories")
    for cat, df in facial_data_dfs.items():
        print(f"      {cat}: {len(df)} images")
    
    if facial_embedding_maps:
        print(f"\n   Loaded facial_embedding_maps: {len(facial_embedding_maps)} categories")
        for cat, emap in facial_embedding_maps.items():
            print(f"      {cat}: {len(emap)} embeddings")
else:
    print(f"\n4. Facial Data: DISABLED")

print("\n" + "="*80)
print("DIAGNOSTIC COMPLETE")
print("="*80)

### üîç Diagnostic Cell (Run if you encounter errors)

---

## ‚öôÔ∏è Extraction Configuration Guide

### Configuration Flags

**Main Workflow Control:**
- `SKIP_EXTRACTION` - Skip ALL extraction, use existing embeddings only
  - `True`: Training only mode (fastest, requires pre-extracted embeddings)
  - `False`: Enable extraction (choose what to extract below)

**Extraction Targets** (only used when `SKIP_EXTRACTION = False`):
- `EXTRACT_DAISEE` - Extract DAiSEE video embeddings
  - `True`: Extract embeddings from DAiSEE videos
  - `False`: Use existing DAiSEE embeddings
  
- `EXTRACT_FACIAL` - Extract facial data embeddings
  - `True`: Extract embeddings from facial data CSVs (requires `FACIAL_DATA_ENABLED = True`)
  - `False`: Use existing facial embeddings or skip facial data

### Common Scenarios

**1. Training Only (fastest - use existing embeddings):**
```python
SKIP_EXTRACTION = True
FACIAL_DATA_ENABLED = True  # If you want to use facial data
# EXTRACT_DAISEE and EXTRACT_FACIAL are ignored
```

**2. Extract DAiSEE Only:**
```python
SKIP_EXTRACTION = False
EXTRACT_DAISEE = True
EXTRACT_FACIAL = False
FACIAL_DATA_ENABLED = False  # Optional
```

**3. Extract Facial Data Only:**
```python
SKIP_EXTRACTION = False
EXTRACT_DAISEE = False      # Use existing DAiSEE embeddings
EXTRACT_FACIAL = True
FACIAL_DATA_ENABLED = True  # Required
```

**4. Extract Both (slowest - full extraction):**
```python
SKIP_EXTRACTION = False
EXTRACT_DAISEE = True
EXTRACT_FACIAL = True
FACIAL_DATA_ENABLED = True
```

**5. Extract DAiSEE, Use Existing Facial:**
```python
SKIP_EXTRACTION = False
EXTRACT_DAISEE = True
EXTRACT_FACIAL = False
FACIAL_DATA_ENABLED = True  # Will load existing facial embeddings
```

### Time Estimates

Approximate extraction times on T4 GPU:
- **DAiSEE (8,571 videos):** ~2-3 hours
- **Facial Data (5,532 images):** ~30-45 minutes
- **Training (all classifiers):** ~10-15 minutes

**Tip:** Extract once, then set `SKIP_EXTRACTION = True` for all subsequent training runs!