# üì∑ Proctoring Feature Extraction

Extract features from webcam frames for training proctoring models.

**Based on**: AutoOEP/Proctor/feature_extractor.py

## What This Notebook Does

1. Process video frames (front + side cameras)
2. Extract features: face detection, gaze, head pose, prohibited objects
3. Save features to CSV for model training

## Prerequisites

- Dataset with labeled frames (cheating/not cheating)
- YOLO model weights for object detection
- MediaPipe face landmarker task file

In [None]:
import os
import re
import numpy as np
import pandas as pd
from datetime import datetime
from tqdm.notebook import tqdm

print("‚úÖ Basic imports loaded")

# Check optional dependencies
try:
    import cv2
    print("‚úÖ OpenCV available")
except ImportError:
    print("‚ùå OpenCV not installed: pip install opencv-python")

try:
    import mediapipe as mp
    print("‚úÖ MediaPipe available")
except ImportError:
    print("‚ùå MediaPipe not installed: pip install mediapipe")

try:
    from ultralytics import YOLO
    print("‚úÖ Ultralytics (YOLO) available")
except ImportError:
    print("‚ùå YOLO not installed: pip install ultralytics")

## 1. Configuration

In [None]:
# Paths - UPDATE THESE
BASE_PATH = os.path.dirname(os.getcwd())  # ml/ directory
PROJECT_ROOT = os.path.dirname(BASE_PATH)  # ensureStudy/

# Dataset path (update to your dataset location)
DATASET_PATH = os.path.join(PROJECT_ROOT, 'AutoOEP', 'Dataset_Parser', 'Dataset')

# Model paths
YOLO_MODEL_PATH = os.path.join(PROJECT_ROOT, 'AutoOEP', 'Models', 'OEP_YOLOv11n.pt')
MEDIAPIPE_TASK_PATH = os.path.join(PROJECT_ROOT, 'AutoOEP', 'Models', 'face_landmarker.task')

# Target image for face verification
TARGET_IMAGE_PATH = None  # Set to student ID photo path

# Output
OUTPUT_DIR = os.path.join(BASE_PATH, 'data', 'proctoring')
os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"Dataset path: {DATASET_PATH}")
print(f"Output dir: {OUTPUT_DIR}")

## 2. Feature Columns

Features extracted from each frame pair:

In [None]:
FEATURE_COLUMNS = [
    # Timestamp & Metadata
    'timestamp',
    
    # Face Verification
    'verification_result',  # 0/1 - does face match registered student
    'num_faces',            # Number of faces detected
    
    # Eye/Gaze Features
    'iris_pos',             # center/left/right
    'iris_ratio',           # Ratio indicating gaze direction
    'gaze_direction',       # forward/left/right/up/down
    'gaze_zone',            # white/yellow/red (risk level)
    
    # Mouth Features
    'mouth_zone',           # GREEN/YELLOW/ORANGE/RED
    'mouth_area',           # Mouth openness
    
    # Head Pose
    'x_rotation',           # Pitch
    'y_rotation',           # Yaw  
    'z_rotation',           # Roll
    'radial_distance',      # Distance from center
    
    # Prohibited Objects (1 if detected, 0 if not)
    'watch',
    'headphone',
    'closedbook',
    'earpiece',
    'cell phone',
    'openbook',
    'chits',
    'sheet',
    
    # Hand Features
    'H-Distance',           # Hand distance from camera
    'F-Distance',           # Face distance from camera
    
    # Labels
    'split',                # Train/Test
    'video',                # Video/session ID
    'is_cheating'           # Target label (0/1)
]

print(f"Total features: {len(FEATURE_COLUMNS)}")

## 3. Helper Functions

In [None]:
def extract_timestamp(filename):
    """Extract timestamp from frame filename."""
    match = re.search(r'_(\d+[:\-]\d+[:\-]\d+[\.\-]\d+)\.jpg$', filename)
    return match.group(1) if match else None


def get_all_timestamps(video_path):
    """Get all unique timestamps in a video directory."""
    timestamps = set()
    for folder_type in ['front', 'side']:
        for label_type in ['cheating_frames', 'not_cheating_frames']:
            directory = os.path.join(video_path, folder_type, label_type)
            if os.path.exists(directory):
                for file in os.listdir(directory):
                    if file.endswith('.jpg'):
                        ts = extract_timestamp(file)
                        if ts:
                            timestamps.add(ts)
    return timestamps


def find_frame_paths(video_path, timestamp):
    """Find face and hand frame paths for a timestamp."""
    face_path, face_label = None, None
    hand_path, hand_label = None, None
    
    for is_cheating, label in [(True, 1), (False, 0)]:
        cheating_str = 'cheating_frames' if is_cheating else 'not_cheating_frames'
        
        # Face frame
        if not face_path:
            face_dir = os.path.join(video_path, 'front', cheating_str)
            if os.path.exists(face_dir):
                for file in os.listdir(face_dir):
                    if file.endswith('.jpg') and extract_timestamp(file) == timestamp:
                        face_path = os.path.join(face_dir, file)
                        face_label = label
                        break
        
        # Hand frame
        if not hand_path:
            hand_dir = os.path.join(video_path, 'side', cheating_str)
            if os.path.exists(hand_dir):
                for file in os.listdir(hand_dir):
                    if file.endswith('.jpg') and extract_timestamp(file) == timestamp:
                        hand_path = os.path.join(hand_dir, file)
                        hand_label = label
                        break
    
    return face_path, face_label, hand_path, hand_label

print("‚úÖ Helper functions defined")

## 4. Simplified Feature Extraction

**Note**: For full feature extraction, use the AutoOEP FeatureExtractor.
This simplified version demonstrates the workflow.

In [None]:
def extract_basic_features(face_frame, hand_frame):
    """
    Extract basic features from frame pair.
    
    For production, use AutoOEP/Proctor/feature_extractor.py
    which includes YOLO object detection, MediaPipe landmarks, etc.
    """
    features = {
        'verification_result': 0,
        'num_faces': 1,
        'iris_pos': 0,  # center
        'iris_ratio': 1.0,
        'mouth_zone': 0,  # GREEN
        'mouth_area': 0.0,
        'x_rotation': 0.0,
        'y_rotation': 0.0,
        'z_rotation': 0.0,
        'radial_distance': 0.0,
        'gaze_direction': 0,  # forward
        'gaze_zone': 0,  # white
    }
    
    # Prohibited objects (default: none detected)
    for obj in ['watch', 'headphone', 'closedbook', 'earpiece', 
                'cell phone', 'openbook', 'chits', 'sheet']:
        features[obj] = 0
    
    features['H-Distance'] = 10000.0
    features['F-Distance'] = 10000.0
    
    return features

print("‚úÖ Feature extractor defined")
print("\n‚ö†Ô∏è  Note: This is a simplified extractor.")
print("For full features, use: AutoOEP/Proctor/feature_extractor.py")

## 5. Process Dataset

In [None]:
def process_dataset(dataset_path, output_dir):
    """Process dataset and save features to CSV."""
    all_results = []
    
    for split_name in ['Train', 'Test']:
        split_path = os.path.join(dataset_path, split_name)
        if not os.path.exists(split_path):
            print(f"‚ö†Ô∏è  {split_name} directory not found")
            continue
        
        print(f"\nProcessing {split_name}...")
        video_dirs = sorted([d for d in os.listdir(split_path) 
                            if os.path.isdir(os.path.join(split_path, d))])
        
        for video_name in tqdm(video_dirs, desc=split_name):
            video_path = os.path.join(split_path, video_name)
            timestamps = get_all_timestamps(video_path)
            
            if not timestamps:
                continue
            
            for timestamp in timestamps:
                face_path, face_label, hand_path, hand_label = find_frame_paths(
                    video_path, timestamp
                )
                
                if not face_path or not hand_path:
                    continue
                
                # Load frames
                face_frame = cv2.imread(face_path)
                hand_frame = cv2.imread(hand_path)
                
                if face_frame is None or hand_frame is None:
                    continue
                
                # Extract features
                features = extract_basic_features(face_frame, hand_frame)
                
                # Add metadata
                features['timestamp'] = timestamp
                features['split'] = split_name
                features['video'] = video_name
                features['is_cheating'] = 1 if (face_label == 1 or hand_label == 1) else 0
                
                all_results.append(features)
    
    if not all_results:
        print("‚ùå No features extracted!")
        return None
    
    # Save to CSV
    df = pd.DataFrame(all_results)
    output_path = os.path.join(output_dir, 'extracted_features.csv')
    df.to_csv(output_path, index=False)
    
    print(f"\n‚úÖ Saved {len(df)} samples to {output_path}")
    print(f"\nClass distribution:")
    print(df['is_cheating'].value_counts())
    
    return df

In [None]:
# Run feature extraction
if os.path.exists(DATASET_PATH):
    df = process_dataset(DATASET_PATH, OUTPUT_DIR)
else:
    print(f"‚ùå Dataset not found at: {DATASET_PATH}")
    print("\nPlease update DATASET_PATH to your dataset location.")
    print("\nExpected structure:")
    print("  Dataset/")
    print("  ‚îú‚îÄ‚îÄ Train/")
    print("  ‚îÇ   ‚îî‚îÄ‚îÄ video_001/")
    print("  ‚îÇ       ‚îú‚îÄ‚îÄ front/")
    print("  ‚îÇ       ‚îÇ   ‚îú‚îÄ‚îÄ cheating_frames/")
    print("  ‚îÇ       ‚îÇ   ‚îî‚îÄ‚îÄ not_cheating_frames/")
    print("  ‚îÇ       ‚îî‚îÄ‚îÄ side/")
    print("  ‚îî‚îÄ‚îÄ Test/")

## 6. Using AutoOEP Feature Extractor (Recommended)

For full feature extraction with YOLO and MediaPipe:

In [None]:
# Full feature extraction using AutoOEP (requires models)

USE_AUTOOEP = False  # Set to True if you have the models

if USE_AUTOOEP:
    import sys
    sys.path.insert(0, os.path.join(PROJECT_ROOT, 'AutoOEP'))
    
    from Proctor.feature_extractor import process_dataset_and_save_csv
    
    process_dataset_and_save_csv(
        dataset_path=DATASET_PATH,
        target_frame_path=TARGET_IMAGE_PATH,
        output_dir=OUTPUT_DIR,
        face_landmarker_path=MEDIAPIPE_TASK_PATH,
        yolo_model_path=YOLO_MODEL_PATH
    )
else:
    print("Skipping AutoOEP extraction.")
    print("Set USE_AUTOOEP = True after setting up:")
    print("  - YOLO model: OEP_YOLOv11n.pt")
    print("  - MediaPipe: face_landmarker.task")
    print("  - Target image for face verification")