In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

varshitpashikanti_labeled_ocat_path = kagglehub.dataset_download('varshitpashikanti/labeled-ocat')
varshitpashikanti_ocat_clips_path = kagglehub.dataset_download('varshitpashikanti/ocat-clips')
varshitpashikanti_prelabes_path = kagglehub.dataset_download('varshitpashikanti/prelabes')
varshitpashikanti_engineered_features_path = kagglehub.dataset_download('varshitpashikanti/engineered-features')

print('Data source import complete.')


In [None]:
import numpy as np
import cv2
import os
import glob
import pandas as pd
from tqdm import tqdm
import math

# --- Configuration ---
# !!! YOU MUST SET THESE !!!
VIDEO_WIDTH = 1920  # e.g., 1920 (for 1080p)
VIDEO_HEIGHT = 1080 # e.g., 1080 (for 1080p)

# --- Directories ---
INPUT_DIR = '/kaggle/input/prelabeling-ocat/part_3'  # Folder containing your .npy files
OUTPUT_FILE = '/kaggle/working/draft_labels4.csv'       # The resulting CSV file

# --- Rule Thresholds ---
# Blinking
EAR_THRESHOLD = 0.21 # Threshold for a single frame to be 'closed'

# Gaze (0.0 = far left, 1.0 = far right, 0.5 = center)
GAZE_THRESHOLD_LOW = 0.35
GAZE_THRESHOLD_HIGH = 0.65

# --- MediaPipe Landmark Indices ---
# These are fixed indices from MediaPipe's 478-landmark model
LEFT_EYE_LANDMARKS = [362, 385, 387, 263, 373, 380]
RIGHT_EYE_LANDMARKS = [33, 160, 158, 133, 153, 144]

LEFT_IRIS_LANDMARKS = [474, 475, 476, 477]
RIGHT_IRIS_LANDMARKS = [469, 470, 471, 472]

# For 3D Head Pose
HEAD_POSE_LANDMARKS = [
    33, 263, 1, 61, 291, 199 # R-eye, L-eye, Nose, R-mouth, L-mouth, Chin
]

# 3D Canonical Face Model Points (from MediaPipe docs)
# These correspond to the HEAD_POSE_LANDMARKS indices
# https://github.com/google/mediapipe/blob/master/mediapipe/modules/face_geometry/data/canonical_face_model.obj
CANONICAL_FACE_MODEL = np.array(
    [
        [ 0.033501,  0.068864, -0.052668], # 33  - Right Eye
        [-0.033501,  0.068864, -0.052668], # 263 - Left Eye
        [ 0.000000,  0.000000, -0.000000], # 1   - Nose Tip
        [ 0.046330, -0.045969, -0.032640], # 61  - Right Mouth Corner
        [-0.046330, -0.045969, -0.032640], # 291 - Left Mouth Corner
        [ 0.000000, -0.104473, -0.009363]  # 199 - Chin
    ], dtype=np.float32
) * 100 # Scale up for better solvePnP stability


# --- Helper Functions ---

def get_2d_points(landmarks_3d, width, height):
    """Converts normalized 3D landmarks to 2D pixel coordinates."""
    # landmarks_3d shape is (478, 3) where [:, 0] is x, [:, 1] is y
    # We only need x and y for 2D
    # We multiply normalized coords by video dimensions
    return landmarks_3d[:, :2] * [width, height]

def get_ear(eye_points):
    """Calculates the Eye Aspect Ratio (EAR) from 6 eye landmarks."""
    # eye_points shape is (6, 2)
    #      p2 -- p3
    # p1 /        \ p4
    #    \        /
    #      p6 -- p5

    try:
        # Vertical distances
        v1 = np.linalg.norm(eye_points[1] - eye_points[5])
        v2 = np.linalg.norm(eye_points[2] - eye_points[4])
        # Horizontal distance
        h = np.linalg.norm(eye_points[0] - eye_points[3])

        if h == 0:
            return 0.0

        ear = (v1 + v2) / (2.0 * h)
        return ear
    except:
        return 0.0

def get_gaze_ratio(eye_points, iris_center):
    """
    Calculates horizontal gaze ratio.
    < 0.35 = looking left
    > 0.65 = looking right
    ~ 0.5 = centered
    """
    try:
        # Get horizontal center of the eye
        eye_left_x = eye_points[0][0]
        eye_right_x = eye_points[3][0]
        eye_width = eye_right_x - eye_left_x

        if eye_width == 0:
            return 0.5 # Assume center if eye not detected

        gaze_ratio = (iris_center[0] - eye_left_x) / eye_width
        return np.clip(gaze_ratio, 0.0, 1.0)
    except:
        return 0.5 # Default to center on error

def get_head_pose(landmarks_3d, width, height):
    """
    Estimates head pose (yaw, pitch, roll) using cv2.solvePnP.
    Returns yaw in degrees.
    """

    # Get 2D pixel coordinates for the 6 key points
    image_points = landmarks_3d[HEAD_POSE_LANDMARKS, :2] * [width, height]

    # Get the 3D model points
    model_points = CANONICAL_FACE_MODEL

    # Camera matrix (assuming simple pinhole camera)
    focal_length = width
    center = (width / 2, height / 2)
    camera_matrix = np.array(
        [[focal_length, 0, center[0]],
         [0, focal_length, center[1]],
         [0, 0, 1]], dtype="double"
    )

    # Distortion coefficients (assuming no distortion)
    dist_coeffs = np.zeros((4, 1))

    try:
        # Solve for rotation and translation
        (success, rotation_vector, translation_vector) = cv2.solvePnP(
            model_points,
            image_points,
            camera_matrix,
            dist_coeffs,
            flags=cv2.SOLVEPNP_ITERATIVE
        )

        # Convert rotation vector to rotation matrix
        rotation_matrix, _ = cv2.Rodrigues(rotation_vector)

        # Get Euler angles (yaw, pitch, roll)
        # See: https://www.learnopencv.com/rotation-matrix-to-euler-angles/
        sy = math.sqrt(rotation_matrix[0, 0] * rotation_matrix[0, 0] + rotation_matrix[1, 0] * rotation_matrix[1, 0])
        singular = sy < 1e-6

        if not singular:
            x = math.atan2(rotation_matrix[2, 1], rotation_matrix[2, 2]) # Roll
            y = math.atan2(-rotation_matrix[2, 0], sy)                   # Pitch
            z = math.atan2(rotation_matrix[1, 0], rotation_matrix[0, 0]) # Yaw
        else:
            x = math.atan2(-rotation_matrix[1, 2], rotation_matrix[1, 1])
            y = math.atan2(-rotation_matrix[2, 0], sy)
            z = 0

        # Convert yaw (z) to degrees
        # Positive values mean turning to the left, negative to the right.
        # We'll flip the sign so positive = right, negative = left, like in the rules.
        yaw_degrees = -z * (180.0 / math.pi)
        return yaw_degrees

    except Exception as e:
        # print(f"Error in solvePnP: {e}")
        return 0.0 # Default to 0 if calculation fails

# --- Main Processing Loop ---

# Find all .npy files
npy_files = glob.glob(os.path.join(INPUT_DIR, '*.npy'))
print(f"Found {len(npy_files)} feature files. Processing...")

clip_results = []

for file_path in tqdm(npy_files, desc="Processing Clips"):
    try:
        clip_data = np.load(file_path) # Shape (num_frames, 1434)
        num_frames = clip_data.shape[0]

        if num_frames == 0:
            # print(f"Skipping empty file: {file_path}")
            continue

        blink_frames = 0
        gaze_off_center_frames = 0
        yaw_values = []
        valid_frames = 0

        for frame_idx in range(num_frames):
            frame_landmarks_flat = clip_data[frame_idx]

            # Check for empty frames (where no face was detected)
            if np.all(frame_landmarks_flat == 0):
                continue

            valid_frames += 1

            # Reshape to (478, 3) to access x, y, z
            landmarks_3d = frame_landmarks_flat.reshape((478, 3))

            # Convert to 2D pixel coordinates for EAR and Gaze
            landmarks_2d = get_2d_points(landmarks_3d, VIDEO_WIDTH, VIDEO_HEIGHT)

            # 1. Calculate Eye Blink
            left_ear = get_ear(landmarks_2d[LEFT_EYE_LANDMARKS])
            right_ear = get_ear(landmarks_2d[RIGHT_EYE_LANDMARKS])
            avg_ear = (left_ear + right_ear) / 2.0

            if avg_ear < EAR_THRESHOLD:
                blink_frames += 1

            # 2. Calculate Gaze
            left_iris_center = landmarks_2d[LEFT_IRIS_LANDMARKS].mean(axis=0)
            right_iris_center = landmarks_2d[RIGHT_IRIS_LANDMARKS].mean(axis=0)

            left_gaze = get_gaze_ratio(landmarks_2d[LEFT_EYE_LANDMARKS], left_iris_center)
            right_gaze = get_gaze_ratio(landmarks_2d[RIGHT_EYE_LANDMARKS], right_iris_center)
            avg_gaze = (left_gaze + right_gaze) / 2.0

            if not (GAZE_THRESHOLD_LOW < avg_gaze < GAZE_THRESHOLD_HIGH):
                gaze_off_center_frames += 1

            # 3. Calculate Head Yaw
            # We use the raw 3D landmarks for this
            yaw = get_head_pose(landmarks_3d, VIDEO_WIDTH, VIDEO_HEIGHT)
            yaw_values.append(yaw)

        if valid_frames == 0:
            # print(f"Skipping clip with no face detections: {file_path}")
            continue

        # --- Calculate Clip-Level Statistics ---
        percent_eyes_closed = (blink_frames / valid_frames) * 100
        gaze_off_center_time = (gaze_off_center_frames / valid_frames) * 100
        avg_head_yaw = np.mean(yaw_values) if yaw_values else 0.0

        # --- Apply Rules for Draft Label ---
        draft_label = "PRE_Neutral" # Default

        # Rule 1: Distracted
        if (avg_head_yaw > 25 or avg_head_yaw < -25) or (percent_eyes_closed > 40):
            draft_label = "PRE_Distracted"

        # Rule 2: Attentive (overrides Distracted if also true, though unlikely)
        # Note: Added abs() to yaw as < 10¬∞ implies looking straight.
        elif (abs(avg_head_yaw) < 10) and (gaze_off_center_time < 20):
            draft_label = "PRE_Attentive"

        # --- Store Result ---
        clip_name = os.path.splitext(os.path.basename(file_path))[0]
        clip_results.append({
            "clip_name": clip_name,
            "draft_label": draft_label,
            "avg_head_yaw": round(avg_head_yaw, 2),
            "percent_eyes_closed": round(percent_eyes_closed, 2),
            "gaze_off_center_time": round(gaze_off_center_time, 2),
            "valid_frames": valid_frames,
            "total_frames": num_frames
        })

    except Exception as e:
        print(f"Failed to process {file_path}. Error: {e}")

# --- Save Final CSV ---
df = pd.DataFrame(clip_results)
df.to_csv(OUTPUT_FILE, index=False)

print(f"\nProcessing complete. Saved {len(df)} clips to {OUTPUT_FILE}")

In [None]:
import pandas as pd
import glob

files = glob.glob("/kaggle/working/*.csv")
merged_df = pd.concat([pd.read_csv(f) for f in files], ignore_index=True)
merged_df.to_csv("merged_output.csv", index=False)


In [None]:
import pandas as pd

# Read the merged CSV
df = pd.read_csv("merged_output.csv")

# Sort rows by 'clip_name'
df_sorted = df.sort_values(by="clip_name")

# Save the sorted CSV
df_sorted.to_csv("grouped_output.csv", index=False)

print("‚úÖ Rows grouped (sorted) by 'clip_name' and saved as 'grouped_output.csv'")


In [None]:
import numpy as np
import cv2
import os
import glob
from tqdm import tqdm
import math

# --- Configuration ---
# !!! YOU MUST SET THESE !!!
VIDEO_WIDTH = 1920  # e.g., 1920 (for 1080p)
VIDEO_HEIGHT = 1080 # e.g., 1080 (for 1080p)

# --- Directories ---
RAW_FEATURES_DIR = '/kaggle/input/prelabeling-ocat/part_4'   # Folder with large (90, 1434) .npy files
ENGINEERED_FEATURES_DIR = '/kaggle/working/engineered_features4' # Where to save small (90, 10) .npy files

# --- MediaPipe Landmark Indices ---
# We list only the indices we need to extract
LEFT_EYE_LANDMARKS = [362, 385, 387, 263, 373, 380]
RIGHT_EYE_LANDMARKS = [33, 160, 158, 133, 153, 144]

LEFT_IRIS_LANDMARKS = [474, 475, 476, 477]
RIGHT_IRIS_LANDMARKS = [469, 470, 471, 472]

MOUTH_LANDMARKS = [61, 291, 13, 14] # R-Corner, L-Corner, Upper-Lip, Lower-Lip

HEAD_POSE_LANDMARKS = [
    33, 263, 1, 61, 291, 199 # R-eye, L-eye, Nose, R-mouth, L-mouth, Chin
]

# 3D Canonical Face Model Points
CANONICAL_FACE_MODEL = np.array(
    [
        [ 0.033501,  0.068864, -0.052668], # 33
        [-0.033501,  0.068864, -0.052668], # 263
        [ 0.000000,  0.000000, -0.000000], # 1
        [ 0.046330, -0.045969, -0.032640], # 61
        [-0.046330, -0.045969, -0.032640], # 291
        [ 0.000000, -0.104473, -0.009363]  # 199
    ], dtype=np.float32
) * 100

NUM_ENGINEERED_FEATURES = 10 # Our 10 selected features

# --- Helper Functions ---

def get_2d_points(landmarks_3d, width, height):
    """Converts normalized 3D landmarks to 2D pixel coordinates."""
    return landmarks_3d[:, :2] * [width, height]

def get_ear(eye_points):
    """Calculates the Eye Aspect Ratio (EAR) from 6 eye landmarks."""
    try:
        v1 = np.linalg.norm(eye_points[1] - eye_points[5])
        v2 = np.linalg.norm(eye_points[2] - eye_points[4])
        h = np.linalg.norm(eye_points[0] - eye_points[3])
        if h == 0: return 0.0
        return (v1 + v2) / (2.0 * h)
    except:
        return 0.0

def get_mar(mouth_points):
    """Calculates Mouth Aspect Ratio (MAR) from 4 landmarks."""
    try:
        horizontal_dist = np.linalg.norm(mouth_points[0] - mouth_points[1])
        vertical_dist = np.linalg.norm(mouth_points[2] - mouth_points[3])
        if horizontal_dist == 0: return 0.0
        return vertical_dist / horizontal_dist
    except:
        return 0.0

def get_relative_iris_pos(eye_points, iris_center):
    """Calculates normalized iris position relative to the eye center."""
    try:
        eye_center = eye_points.mean(axis=0)
        relative_pos = iris_center - eye_center
        eye_width = np.linalg.norm(eye_points[0] - eye_points[3])
        if eye_width == 0: return 0.0, 0.0
        return relative_pos[0] / eye_width, relative_pos[1] / eye_width
    except:
        return 0.0, 0.0

def get_head_pose(landmarks_3d, width, height):
    """Estimates head pose (yaw, pitch, roll)."""
    image_points = landmarks_3d[HEAD_POSE_LANDMARKS, :2] * [width, height]
    model_points = CANONICAL_FACE_MODEL

    camera_matrix = np.array(
        [[width, 0, width / 2],
         [0, width, height / 2],
         [0, 0, 1]], dtype="double"
    )
    dist_coeffs = np.zeros((4, 1))

    try:
        (success, rotation_vector, t_vec) = cv2.solvePnP(
            model_points, image_points, camera_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE
        )

        rotation_matrix, _ = cv2.Rodrigues(rotation_vector)

        sy = math.sqrt(rotation_matrix[0, 0] * rotation_matrix[0, 0] + rotation_matrix[1, 0] * rotation_matrix[1, 0])
        singular = sy < 1e-6

        if not singular:
            roll = math.atan2(rotation_matrix[2, 1], rotation_matrix[2, 2])
            pitch = math.atan2(-rotation_matrix[2, 0], sy)
            yaw = math.atan2(rotation_matrix[1, 0], rotation_matrix[0, 0])
        else:
            roll = math.atan2(-rotation_matrix[1, 2], rotation_matrix[1, 1])
            pitch = math.atan2(-rotation_matrix[2, 0], sy)
            yaw = 0

        # Convert to degrees and use intuitive signs
        yaw_deg = -yaw * (180.0 / math.pi)   # Positive = turns right
        pitch_deg = pitch * (180.0 / math.pi) # Positive = looks up
        roll_deg = roll * (180.0 / math.pi)   # Positive = rolls right
        return yaw_deg, pitch_deg, roll_deg

    except Exception as e:
        return 0.0, 0.0, 0.0 # Default to 0

# --- Main Processing Loop ---

os.makedirs(ENGINEERED_FEATURES_DIR, exist_ok=True)

raw_npy_files = glob.glob(os.path.join(RAW_FEATURES_DIR, '*.npy'))
print(f"Found {len(raw_npy_files)} raw feature files. Starting engineering...")

for file_path in tqdm(raw_npy_files, desc="Engineering features"):
    try:
        raw_clip_data = np.load(file_path) # Shape (num_frames, 1434)
        num_frames = raw_clip_data.shape[0]

        if num_frames == 0:
            continue

        engineered_feature_sequence = [] # List to hold the 10 features per frame

        for frame_idx in range(num_frames):
            frame_landmarks_flat = raw_clip_data[frame_idx]

            # If no face was detected, append zeros for all 10 features
            if np.all(frame_landmarks_flat == 0):
                engineered_feature_sequence.append(np.zeros(NUM_ENGINEERED_FEATURES))
                continue

            # Reshape to (478, 3) to access x, y, z
            landmarks_3d = frame_landmarks_flat.reshape((478, 3))

            # Convert to 2D pixel coordinates for 2D calculations
            landmarks_2d = get_2d_points(landmarks_3d, VIDEO_WIDTH, VIDEO_HEIGHT)

            # 1. & 2. Eye Aspect Ratios
            left_ear = get_ear(landmarks_2d[LEFT_EYE_LANDMARKS])
            right_ear = get_ear(landmarks_2d[RIGHT_EYE_LANDMARKS])

            # 3, 4, & 5. Head Pose
            yaw, pitch, roll = get_head_pose(landmarks_3d, VIDEO_WIDTH, VIDEO_HEIGHT)

            # 6. & 7. Left Iris Position
            left_iris_center = landmarks_2d[LEFT_IRIS_LANDMARKS].mean(axis=0)
            rel_left_iris_x, rel_left_iris_y = get_relative_iris_pos(
                landmarks_2d[LEFT_EYE_LANDMARKS], left_iris_center
            )

            # 8. & 9. Right Iris Position
            right_iris_center = landmarks_2d[RIGHT_IRIS_LANDMARKS].mean(axis=0)
            rel_right_iris_x, rel_right_iris_y = get_relative_iris_pos(
                landmarks_2d[RIGHT_EYE_LANDMARKS], right_iris_center
            )

            # 10. Mouth Aspect Ratio
            mar = get_mar(landmarks_2d[MOUTH_LANDMARKS])

            # Append all 10 features for this frame
            frame_features = [
                left_ear, right_ear,
                yaw, pitch, roll,
                rel_left_iris_x, rel_left_iris_y,
                rel_right_iris_x, rel_right_iris_y,
                mar
            ]
            engineered_feature_sequence.append(frame_features)

        # Save the new, lightweight (num_frames, 10) array
        engineered_array = np.array(engineered_feature_sequence, dtype=np.float32)

        base_name = os.path.splitext(os.path.basename(file_path))[0]
        output_path = os.path.join(ENGINEERED_FEATURES_DIR, f"{base_name}.npy")
        np.save(output_path, engineered_array)

    except Exception as e:
        print(f"Failed to process {file_path}. Error: {e}")

print(f"\nFeature engineering complete. All lightweight .npy files saved to {ENGINEERED_FEATURES_DIR}")

In [None]:
import shutil

# Folder to zip
folder_path = '/kaggle/working/path/to/engineered_features_merged'

# Output zip file path (without .zip extension)
output_zip = '/kaggle/working/engineered_features_merged'

# Create zip archive
shutil.make_archive(output_zip, 'zip', folder_path)

print("‚úÖ Folder zipped successfully!")


# **Extract 15 features**. (10 head features and 5 hand features)

In [None]:
import numpy as np
import cv2
import os
import glob
from tqdm import tqdm
import math

# --- Configuration ---
# !!! YOU MUST SET THESE !!!
VIDEO_WIDTH = 1920  # e.g., 1920 (for 1080p)
VIDEO_HEIGHT = 1080 # e.g., 1080 (for 1080p)

# --- Directories ---
RAW_HOLISTIC_DIR = '/kaggle/input/prelabeling-ocat/part_1'   # Folder with (90, 1659) files
ENGINEERED_FEATURES_DIR = '/kaggle/working/engineered_features2' # Where to save (90, 16) files

# --- MediaPipe Landmark Indices (HOLISTIC MODEL) ---
# These indices are based on a file structure from mp.solutions.holistic
# We will assume a flat array structure like:
# [ 478*3 face, 33*3 pose, 21*3 left_hand, 21*3 right_hand ]
# Total = 1434 (face) + 99 (pose) + 63 (left) + 63 (right) = 1659 features

# --- Face (Indices 0 - 1433) ---
FACE_START_IDX = 0
FACE_LANDMARKS_COUNT = 478
LEFT_EYE_LANDMARKS = [362, 385, 387, 263, 373, 380]
RIGHT_EYE_LANDMARKS = [33, 160, 158, 133, 153, 144]
LEFT_IRIS_LANDMARKS = [474, 475, 476, 477]
RIGHT_IRIS_LANDMARKS = [469, 470, 471, 472]
MOUTH_LANDMARKS = [61, 291, 13, 14]
HEAD_POSE_LANDMARKS = [33, 263, 1, 61, 291, 199]
NOSE_TIP_LANDMARK = 1

# --- Pose (Indices 1434 - 1532) ---
POSE_START_IDX = 1434
POSE_LANDMARKS_COUNT = 33
LEFT_WRIST_POSE = 15 # Index within pose landmarks
RIGHT_WRIST_POSE = 16 # Index within pose landmarks

# --- Left Hand (Indices 1533 - 1595) ---
LEFT_HAND_START_IDX = 1533
LEFT_HAND_LANDMARKS_COUNT = 21

# --- Right Hand (Indices 1596 - 1658) ---
RIGHT_HAND_START_IDX = 1596
RIGHT_HAND_LANDMARKS_COUNT = 21

NUM_ENGINEERED_FEATURES = 16 # Our 10 face + 6 hand features

# 3D Canonical Face Model Points
CANONICAL_FACE_MODEL = np.array(
    [
        [ 0.033501,  0.068864, -0.052668], # 33
        [-0.033501,  0.068864, -0.052668], # 263
        [ 0.000000,  0.000000, -0.000000], # 1
        [ 0.046330, -0.045969, -0.032640], # 61
        [-0.046330, -0.045969, -0.032640], # 291
        [ 0.000000, -0.104473, -0.009363]  # 199
    ], dtype=np.float32
) * 100

# --- Helper Functions (Same as before) ---

def get_2d_points(landmarks_3d, width, height):
    return landmarks_3d[:, :2] * [width, height]

def get_ear(eye_points):
    try:
        v1 = np.linalg.norm(eye_points[1] - eye_points[5])
        v2 = np.linalg.norm(eye_points[2] - eye_points[4])
        h = np.linalg.norm(eye_points[0] - eye_points[3])
        if h == 0: return 0.0
        return (v1 + v2) / (2.0 * h)
    except: return 0.0

def get_mar(mouth_points):
    try:
        horizontal_dist = np.linalg.norm(mouth_points[0] - mouth_points[1])
        vertical_dist = np.linalg.norm(mouth_points[2] - mouth_points[3])
        if horizontal_dist == 0: return 0.0
        return vertical_dist / horizontal_dist
    except: return 0.0

def get_relative_iris_pos(eye_points, iris_center):
    try:
        eye_center = eye_points.mean(axis=0)
        relative_pos = iris_center - eye_center
        eye_width = np.linalg.norm(eye_points[0] - eye_points[3])
        if eye_width == 0: return 0.0, 0.0
        return relative_pos[0] / eye_width, relative_pos[1] / eye_width
    except: return 0.0, 0.0

def get_head_pose(landmarks_3d, width, height):
    image_points = landmarks_3d[HEAD_POSE_LANDMARKS, :2] * [width, height]
    model_points = CANONICAL_FACE_MODEL

    camera_matrix = np.array(
        [[width, 0, width / 2],
         [0, width, height / 2],
         [0, 0, 1]], dtype="double"
    )
    dist_coeffs = np.zeros((4, 1))

    try:
        (success, rotation_vector, t_vec) = cv2.solvePnP(
            model_points, image_points, camera_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE
        )
        rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
        sy = math.sqrt(rotation_matrix[0, 0]**2 + rotation_matrix[1, 0]**2)
        singular = sy < 1e-6
        if not singular:
            roll = math.atan2(rotation_matrix[2, 1], rotation_matrix[2, 2])
            pitch = math.atan2(-rotation_matrix[2, 0], sy)
            yaw = math.atan2(rotation_matrix[1, 0], rotation_matrix[0, 0])
        else:
            roll = math.atan2(-rotation_matrix[1, 2], rotation_matrix[1, 1])
            pitch = math.atan2(-rotation_matrix[2, 0], sy)
            yaw = 0
        yaw_deg = -yaw * (180.0 / math.pi)
        pitch_deg = pitch * (180.0 / math.pi)
        roll_deg = roll * (180.0 / math.pi)
        return yaw_deg, pitch_deg, roll_deg
    except: return 0.0, 0.0, 0.0

def get_dist(p1, p2):
    """Calculate Euclidean distance between two 3D points."""
    return np.linalg.norm(p1 - p2)

# --- Main Processing Loop ---

os.makedirs(ENGINEERED_FEATURES_DIR, exist_ok=True)

raw_npy_files = glob.glob(os.path.join(RAW_HOLISTIC_DIR, '*.npy'))
print(f"Found {len(raw_npy_files)} raw HOLISTIC feature files. Starting engineering...")

for file_path in tqdm(raw_npy_files, desc="Engineering features"):
    try:
        raw_clip_data = np.load(file_path) # Shape (num_frames, 1659)
        num_frames = raw_clip_data.shape[0]

        if num_frames == 0:
            continue

        engineered_feature_sequence = []

        for frame_idx in range(num_frames):
            frame_landmarks_flat = raw_clip_data[frame_idx]

            # If no data, append zeros
            if np.all(frame_landmarks_flat == 0):
                engineered_feature_sequence.append(np.zeros(NUM_ENGINEERED_FEATURES))
                continue

            # --- Extract Landmark Groups ---
            face_flat = frame_landmarks_flat[FACE_START_IDX : POSE_START_IDX]
            pose_flat = frame_landmarks_flat[POSE_START_IDX : LEFT_HAND_START_IDX]
            # left_hand_flat = frame_landmarks_flat[LEFT_HAND_START_IDX : RIGHT_HAND_START_IDX]
            # right_hand_flat = frame_landmarks_flat[RIGHT_HAND_START_IDX:]

            # --- Reshape ---
            landmarks_3d_face = face_flat.reshape((FACE_LANDMARKS_COUNT, 3))
            landmarks_3d_pose = pose_flat.reshape((POSE_LANDMARKS_COUNT, 3))

            # Check for empty face
            if np.all(landmarks_3d_face == 0):
                engineered_feature_sequence.append(np.zeros(NUM_ENGINEERED_FEATURES))
                continue

            landmarks_2d_face = get_2d_points(landmarks_3d_face, VIDEO_WIDTH, VIDEO_HEIGHT)

            # 1. & 2. Eye Aspect Ratios
            left_ear = get_ear(landmarks_2d_face[LEFT_EYE_LANDMARKS])
            right_ear = get_ear(landmarks_2d_face[RIGHT_EYE_LANDMARKS])

            # 3, 4, & 5. Head Pose
            yaw, pitch, roll = get_head_pose(landmarks_3d_face, VIDEO_WIDTH, VIDEO_HEIGHT)

            # 6. & 7. Left Iris Position
            left_iris_center = landmarks_2d_face[LEFT_IRIS_LANDMARKS].mean(axis=0)
            rel_left_iris_x, rel_left_iris_y = get_relative_iris_pos(
                landmarks_2d_face[LEFT_EYE_LANDMARKS], left_iris_center
            )

            # 8. & 9. Right Iris Position
            right_iris_center = landmarks_2d_face[RIGHT_IRIS_LANDMARKS].mean(axis=0)
            rel_right_iris_x, rel_right_iris_y = get_relative_iris_pos(
                landmarks_2d_face[RIGHT_EYE_LANDMARKS], right_iris_center
            )

            # 10. Mouth Aspect Ratio
            mar = get_mar(landmarks_2d_face[MOUTH_LANDMARKS])

            # --- NEW HAND/POSE FEATURES ---
            # 11. & 12. Left Wrist Position (x, y)
            # Using pose landmarks, which are more stable for wrists
            l_wrist_pos = landmarks_3d_pose[LEFT_WRIST_POSE]

            # 13. & 14. Right Wrist Position (x, y)
            r_wrist_pos = landmarks_3d_pose[RIGHT_WRIST_POSE]

            # 15. & 16. Hand-to-Face Distance
            # Get 3D position of nose tip
            nose_pos = landmarks_3d_face[NOSE_TIP_LANDMARK]

            # Calculate 3D Euclidean distance
            # Use 0 as a placeholder if hands aren't detected (visibility/presence < 0.5)
            # Note: Pose landmarks also have a visibility score, which we are ignoring here
            # for simplicity, but a real implementation should check it.

            l_hand_dist = get_dist(l_wrist_pos, nose_pos) if np.any(l_wrist_pos != 0) else 0.0
            r_hand_dist = get_dist(r_wrist_pos, nose_pos) if np.any(r_wrist_pos != 0) else 0.0

            # Append all 16 features
            frame_features = [
                left_ear, right_ear,
                yaw, pitch, roll,
                rel_left_iris_x, rel_left_iris_y,
                rel_right_iris_x, rel_right_iris_y,
                mar,
                l_wrist_pos[0], l_wrist_pos[1], # l_wrist_x, l_wrist_y
                r_wrist_pos[0], r_wrist_pos[1], # r_wrist_x, r_wrist_y
                l_hand_dist, r_hand_dist
            ]
            engineered_feature_sequence.append(frame_features)

        # Save the new, lightweight (num_frames, 16) array
        engineered_array = np.array(engineered_feature_sequence, dtype=np.float32)

        base_name = os.path.splitext(os.path.basename(file_path))[0]
        output_path = os.path.join(ENGINEERED_FEATURES_DIR, f"{base_name}.npy")
        np.save(output_path, engineered_array)

    except Exception as e:
        print(f"Failed to process {file_path}. Error: {e}")

print(f"\nFeature engineering complete. All lightweight .npy files saved to {ENGINEERED_FEATURES_DIR}")

In [None]:
import pandas as pd
import numpy as np

# --- Configuration ---
INPUT_CSV = '/kaggle/input/labeled-ocat/grouped_output.csv'  # The CSV file from Step 2
OUTPUT_FILE = '/kaggle/working/seed_batch_for_labeling.txt' # File listing the clips to label
NUM_CLIPS_PER_CATEGORY = 200

# --- Load the Data ---
try:
    df = pd.read_csv(INPUT_CSV)
except FileNotFoundError:
    print(f"Error: Could not find the file {INPUT_CSV}")
    print("Please make sure you have run the pre-labeling script first.")
    exit()

# --- 1. Find Confident 'PRE_Distracted' Clips ---

# Filter for the clips labeled as 'PRE_Distracted'
df_distracted = df[df['draft_label'] == 'PRE_Distracted'].copy()

# Create a "confidence score"
# This score measures *how far* the values are past the rule thresholds
# Rule: (abs(yaw) > 25) OR (eyes_closed > 40)
# We use abs() for yaw to handle both left and right turns
df_distracted['yaw_score'] = (df_distracted['avg_head_yaw'].abs() - 25).clip(lower=0)
df_distracted['blink_score'] = (df_distracted['percent_eyes_closed'] - 40).clip(lower=0)
# The final confidence is the max of these two scores
df_distracted['confidence'] = df_distracted[['yaw_score', 'blink_score']].max(axis=1)

# Sort by the highest confidence and get the top N
top_distracted = df_distracted.sort_values(by='confidence', ascending=False).head(NUM_CLIPS_PER_CATEGORY)


# --- 2. Find Confident 'PRE_Attentive' Clips ---

# Filter for the clips labeled as 'PRE_Attentive'
df_attentive = df[df['draft_label'] == 'PRE_Attentive'].copy()

# Create a "confidence score"
# This score measures *how far* the values are *within* the rule thresholds
# Rule: (abs(yaw) < 10) AND (gaze_time < 20)
df_attentive['yaw_score'] = (10 - df_attentive['avg_head_yaw'].abs()).clip(lower=0)
df_attentive['gaze_score'] = (20 - df_attentive['gaze_off_center_time']).clip(lower=0)
# The final confidence is the *sum* of these scores (must be good at both)
df_attentive['confidence'] = df_attentive['yaw_score'] + df_attentive['gaze_score']

# Sort by the highest confidence and get the top N
top_attentive = df_attentive.sort_values(by='confidence', ascending=False).head(NUM_CLIPS_PER_CATEGORY)


# --- 3. Combine and Save the Seed Batch ---

# Combine the clip names from both dataframes
seed_batch_clips = pd.concat([top_distracted['clip_name'], top_attentive['clip_name']])

# Save the list of clip names to a text file
# This is the "work list" for your annotators
try:
    with open(OUTPUT_FILE, 'w') as f:
        for clip_name in seed_batch_clips:
            f.write(f"{clip_name}\n")

    print(f"Successfully selected seed batch:")
    print(f"  {len(top_distracted)} 'PRE_Distracted' clips")
    print(f"  {len(top_attentive)} 'PRE_Attentive' clips")
    print(f"  ---------------------------------")
    print(f"  Total: {len(seed_batch_clips)} clips")
    print(f"\nThis list has been saved to: {OUTPUT_FILE}")
    print("Your annotators should now correct the labels for these clips.")

except Exception as e:
    print(f"An error occurred while saving the file: {e}")

In [None]:
import os
import shutil

# List of source folders
folders = [
    "/kaggle/input/labeled-ocat/engineered_features_zip",
    "/kaggle/input/labeled-ocat/engineered_features_zip1",
    "/kaggle/input/labeled-ocat/engineered_features_zip2",
    "/kaggle/input/labeled-ocat/engineered_features_zip3",
    "/kaggle/input/labeled-ocat/engineered_features_zip4"
]

# Destination folder
destination = "path/to/engineered_features_merged"

# Create destination folder if it doesn‚Äôt exist
os.makedirs(destination, exist_ok=True)

# Copy all files from each folder to destination
for folder in folders:
    for file_name in os.listdir(folder):
        src_path = os.path.join(folder, file_name)
        dst_path = os.path.join(destination, file_name)

        # Avoid overwriting by renaming duplicates
        if os.path.exists(dst_path):
            name, ext = os.path.splitext(file_name)
            counter = 1
            while os.path.exists(dst_path):
                dst_path = os.path.join(destination, f"{name}_{counter}{ext}")
                counter += 1

        shutil.copy2(src_path, dst_path)

print("‚úÖ All files merged into:", destination)


In [None]:
import pandas as pd

# Read your CSV
df = pd.read_csv("/kaggle/input/labeled-ocat/grouped_output.csv")

# Replace old values with new ones
df["draft_label"] = df["draft_label"].replace({
    "PRE_Attentive": "Attentive",
    "PRE_Neutral": "Neutral",
    "PRE_Distracted": "Distracted"
})

# Save updated CSV
df.to_csv("/kaggle/working/updated_output.csv", index=False)

print("‚úÖ Values in 'draft_label' column updated and saved as 'updated_output.csv'")


In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.sequence import pad_sequences

# --- Configuration ---
LABELS_FILE = '/kaggle/input/prelabes/updated_output.csv'         # Your human-verified labels
ENGINEERED_FEATURES_DIR = '/kaggle/input/engineered-features' # The folder with (90, 10) .npy files

# Model Hyperparameters
NUM_EPOCHS = 100
BATCH_SIZE = 32
MODEL_SAVE_PATH = 'model_v0.1.h5'

# --- 1. Load Data ---
print("Loading data...")
try:
    df = pd.read_csv(LABELS_FILE)
except FileNotFoundError:
    print(f"Error: Could not find the file {LABELS_FILE}")
    print("Please make sure you have the 'corrected_labels.csv' from the labeling tool.")
    exit()

X = []
y = []

# Define the expected order of labels for consistency
label_mapping = {'Attentive': 0, 'Neutral': 1, 'Distracted': 2}
num_classes = len(label_mapping)
class_names = [name for name, _ in sorted(label_mapping.items(), key=lambda item: item[1])]

for index, row in df.iterrows():
    clip_name = row['clip_name']
    label = row['draft_label']

    # Construct the full path to the feature file
    feature_file_path = os.path.join(ENGINEERED_FEATURES_DIR, f"{clip_name}.npy")

    if os.path.exists(feature_file_path):
        # Load the (90, 10) feature array
        features = np.load(feature_file_path)
        X.append(features)
        y.append(label)
    else:
        print(f"Warning: Could not find feature file for clip: {clip_name}. Skipping.")

if not X:
    print("Error: No feature files were loaded. Please check the ENGINEERED_FEATURES_DIR path.")
    exit()

# Pad/truncate sequences to ensure they all have the same length
# This is necessary because video clips might have slightly different frame counts (e.g., 89, 90, 91)
# We will fix the length to 90 frames (for a 3-second, 30fps clip)
FIXED_SEQUENCE_LENGTH = 90
X = pad_sequences(
    X, maxlen=FIXED_SEQUENCE_LENGTH, dtype='float32', padding='post', truncating='post'
)

# Convert lists to numpy arrays
# The line below is no longer needed as pad_sequences returns a numpy array
# X = np.array(X)
y = np.array(y)

# --- 2. Preprocess Data ---
print("Preprocessing data...")
# Encode string labels to integers
encoder = LabelEncoder()
encoder.fit(class_names) # Fit on all possible class names to ensure order
y_encoded = encoder.transform(y)

# Convert integers to one-hot vectors
y_one_hot = to_categorical(y_encoded, num_classes=num_classes)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X, y_one_hot, test_size=0.2, random_state=42, stratify=y_one_hot
)

# Get input shape from the training data
# Should be (num_timesteps, num_features), e.g., (90, 10)
input_shape = (X_train.shape[1], X_train.shape[2])

print(f"Data shapes:")
print(f"  X_train: {X_train.shape}")
print(f"  y_train: {y_train.shape}")
print(f"  X_val:   {X_val.shape}")
print(f"  y_val:   {y_val.shape}")

# --- 3. Define the LSTM Model ---
print("Building the LSTM model...")
model = Sequential([
    Input(shape=input_shape),
    LSTM(64, return_sequences=True),
    Dropout(0.5),
    LSTM(32),
    Dropout(0.5),
    Dense(16, activation='relu'),
    Dense(num_classes, activation='softmax') # Softmax for multi-class classification
])

model.summary()

# --- 4. Compile and Train the Model ---
print("Compiling and training the model...")
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy', # Use for one-hot encoded labels
    metrics=['accuracy']
)

# Callbacks for better training
# Stop training if validation loss doesn't improve for 10 epochs
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
# Save the best model found during training
model_checkpoint = ModelCheckpoint(MODEL_SAVE_PATH, monitor='val_accuracy', save_best_only=True)

history = model.fit(
    X_train, y_train,
    epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping, model_checkpoint]
)

# --- 5. Final Evaluation ---
print("\nTraining complete.")
loss, accuracy = model.evaluate(X_val, y_val, verbose=0)
print(f"Best validation accuracy: {accuracy*100:.2f}%")
print(f"Model saved to {MODEL_SAVE_PATH}")

In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import regularizers

# --- Configuration ---
LABELS_FILE = '/kaggle/working/updated_output2.csv'       # Your human-verified labels
ENGINEERED_FEATURES_DIR = '/kaggle/input/engineered-features' # The folder with (90, 10) .npy files
FIXED_SEQUENCE_LENGTH = 90
NEXT_BATCH_SIZE = 300 # Number of clips to select for next round
NEXT_BATCH_FILE = '/kaggle/working/seed_labels2.csv'

# Model Hyperparameters
NUM_EPOCHS = 100
BATCH_SIZE = 32
MODEL_SAVE_PATH = '/kaggle/working/model_v0.1.h5' # Save to working dir

# Define the expected order of labels for consistency
label_mapping = {'Attentive': 0, 'Neutral': 1, 'Distracted': 2}
num_classes = len(label_mapping)
class_names = [name for name, _ in sorted(label_mapping.items(), key=lambda item: item[1])]

# --- 1. Load Data ---
print("Loading data...")
try:
    df = pd.read_csv(LABELS_FILE)
except FileNotFoundError:
    print(f"Error: Could not find the file {LABELS_FILE}")
    print("Please make sure you have the 'corrected_labels.csv' from the labeling tool.")
    exit()

X = []
y = []

for index, row in df.iterrows():
    clip_name = row['clip_name']
    label = row['draft_label']

    # Construct the full path to the feature file
    feature_file_path = os.path.join(ENGINEERED_FEATURES_DIR, f"{clip_name}.npy")

    if os.path.exists(feature_file_path):
        # Load the (90, 10) feature array
        features = np.load(feature_file_path)
        X.append(features)
        y.append(label)
    else:
        print(f"Warning: Could not find feature file for clip: {clip_name}. Skipping.")

if not X:
    print("Error: No feature files were loaded. Please check the ENGINEERED_FEATURES_DIR path.")
    exit()

X = pad_sequences(
    X, maxlen=FIXED_SEQUENCE_LENGTH, dtype='float32', padding='post', truncating='post'
)
y = np.array(y)

# --- 2. Preprocess Data ---
print("Preprocessing data...")
# Encode string labels to integers
encoder = LabelEncoder()
encoder.fit(class_names) # Fit on all possible class names to ensure order
y_encoded = encoder.transform(y)

# Convert integers to one-hot vectors
y_one_hot = to_categorical(y_encoded, num_classes=num_classes)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X, y_one_hot, test_size=0.2, random_state=42, stratify=y_one_hot
)

# Get input shape from the training data
input_shape = (X_train.shape[1], X_train.shape[2])

print(f"Data shapes:")
print(f"  X_train: {X_train.shape}")
print(f"  y_train: {y_train.shape}")
print(f"  X_val:   {X_val.shape}")
print(f"  y_val:   {y_val.shape}")

# --- 3. Define the LSTM Model ---
print("Building the LSTM model...")
model = Sequential([
    Input(shape=input_shape),
    LSTM(64, return_sequences=True),
    Dropout(0.5),
    LSTM(32),
    Dropout(0.5),
    Dense(16, activation='relu'),
    Dense(num_classes, activation='softmax') # Softmax for multi-class classification
])

model.summary()

# --- 4. Compile and Train the Model ---
print("Compiling and training the model...")
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy', # Use for one-hot encoded labels
    metrics=['accuracy']
)

# Callbacks for better training
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
model_checkpoint = ModelCheckpoint(MODEL_SAVE_PATH, monitor='val_accuracy', save_best_only=True)

history = model.fit(
    X_train, y_train,
    epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping, model_checkpoint]
)

# --- 5. Final Evaluation ---
print("\nTraining complete.")
loss, accuracy = model.evaluate(X_val, y_val, verbose=0)
print(f"Best validation accuracy: {accuracy*100:.2f}%")
print(f"Model saved to {MODEL_SAVE_PATH}")

# --- 6. Active Learning: Find Next Batch ---
print("\n--- Starting Active Learning ---")
print(f"Loading best model from {MODEL_SAVE_PATH} to find disagreements...")

# Load the best model that was just saved
try:
    model = load_model(MODEL_SAVE_PATH)
except Exception as e:
    print(f"Error loading model: {e}")
    exit()

# We need to predict on ALL data, not just the training split.
# So we re-load all feature data from the original dataframe.
X_all = []
y_draft_labels_all = []
all_clip_names = []

print(f"Loading all {len(df)} clips from {LABELS_FILE} for prediction...")
for index, row in df.iterrows():
    clip_name = row['clip_name']
    feature_file_path = os.path.join(ENGINEERED_FEATURES_DIR, f"{clip_name}.npy")

    if os.path.exists(feature_file_path):
        features = np.load(feature_file_path)
        X_all.append(features)
        y_draft_labels_all.append(row['draft_label'])
        all_clip_names.append(row['clip_name'])
    # No need to warn again, we did that in section 1

if not X_all:
    print("Error: No data loaded for prediction.")
    exit()

# Pad all sequences identically to how training data was padded
X_all = pad_sequences(
    X_all, maxlen=FIXED_SEQUENCE_LENGTH, dtype='float32', padding='post', truncating='post'
)

# Make predictions on all data
print(f"Making predictions on {len(X_all)} clips...")
y_pred_probs = model.predict(X_all) # (N_clips, 3) array of probabilities
y_pred_class_indices = np.argmax(y_pred_probs, axis=1) # (N_clips,) array of class indices (0, 1, or 2)
y_pred_confidence = np.max(y_pred_probs, axis=1) # Confidence for the predicted class

# Encode the original draft labels to compare
y_draft_encoded = encoder.transform(y_draft_labels_all)

# Calculate uncertainty (Shannon Entropy)
# H = -sum(p_i * log2(p_i))
# High entropy = high uncertainty (e.g., [0.33, 0.33, 0.33])
# Low entropy = high certainty (e.g., [0.98, 0.01, 0.01])
# We add a small epsilon (1e-9) to avoid log(0)
uncertainty = -np.sum(y_pred_probs * np.log2(y_pred_probs + 1e-9), axis=1)

# Identify disagreements (Criterion 1)
is_disagreement = (y_pred_class_indices != y_draft_encoded)
# Score: 0 if agreement, confidence if disagreement
disagreement_score = is_disagreement * y_pred_confidence

# Create a results DataFrame
results_df = pd.DataFrame({
    'clip_name': all_clip_names,
    'draft_label': y_draft_labels_all,
    'predicted_label': encoder.inverse_transform(y_pred_class_indices),
    'confidence': y_pred_confidence,
    'uncertainty_entropy': uncertainty,
    'is_disagreement': is_disagreement,
    'disagreement_score': disagreement_score
})

# Combine scores: We want clips that are high in uncertainty OR high in disagreement score
# Adding them gives a good composite score for ranking
results_df['final_score'] = results_df['uncertainty_entropy'] + results_df['disagreement_score']

# Sort by the final score (highest first)
results_df = results_df.sort_values(by='final_score', ascending=False)

# Select the top N clips for the next batch
next_batch_df = results_df.head(NEXT_BATCH_SIZE)

# Save the batch for the labeling tool (clip_name and its original draft_label)
output_df = next_batch_df[['clip_name', 'draft_label']]
output_df.to_csv(NEXT_BATCH_FILE, index=False)

print(f"\nActive learning batch created!")
print(f"Top {NEXT_BATCH_SIZE} clips saved to: {NEXT_BATCH_FILE}")
print("\n--- Top 5 most valuable clips for relabeling ---")
print(results_df.head(5))

In [None]:
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from base64 import b64encode
import os
import pandas as pd

# --- 1. Display Function ---
def display_video(path):
    try:
        mp4 = open(path, "rb").read()
        data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
        return HTML(f"""
        <video width=400 controls>
              <source src="{data_url}" type="video/mp4">
        </video>
        """)
    except Exception as e:
        return HTML(f"<p>Error loading video: {e}</p>")

# --- 2. Load Clip List from seed_labels1.csv ---

# --- MODIFIED: Load directly from seed_labels1.csv ---
labels_csv_path = "/kaggle/working/seed_labels1.csv" # <-- Using the new file

clip_data = [] # Will store tuples (clip_name, draft_label)
try:
    # Read the new CSV file
    labels_df = pd.read_csv(labels_csv_path)

    # --- ASSUMPTION ---
    # We assume your CSV has a 'clip_name' column for the filename
    # and a 'draft_label' column for the draft label.
    clip_data = list(zip(labels_df['clip_name'], labels_df['draft_label']))

    print(f"Loaded {len(clip_data)} clips to label from {labels_csv_path}")

except FileNotFoundError:
    print(f"Error: Could not find file {labels_csv_path}.")
    print("Please make sure 'seed_labels1.csv' is in that location.")
    clip_data = [("dummy.mp4", "Unknown")] # Add dummy to prevent crash
except Exception as e:
    print(f"Error reading main labels file {labels_csv_path}: {e}")
    if not clip_data:
        clip_data = [("dummy.mp4", "Unknown")]
# --- End of Modification ---


# Two main folders where clips may be located
clip_dirs = [
    "/kaggle/input/ocat-clips/ocat clips1/ocat clips1",
    "/kaggle/input/ocat-clips/ocat clips2/ocat clips2"
]

# Locate each video file and build the list for the DataFrame
video_paths = []
draft_labels = []
# --- MODIFICATION: Add clip_name to the dataframe ---
clip_names_list = []
# --- End Modification ---

for clip, label in clip_data:
    # Ensure filename ends with .mp4
    if not clip.endswith(".mp4"):
        clip += ".mp4"
    found = False
    for folder in clip_dirs:
        path = os.path.join(folder, clip)
        if os.path.exists(path):
            video_paths.append(path)
            draft_labels.append(label) # Add the corresponding label
            clip_names_list.append(clip) # --- Add clip name
            found = True
            break
    if not found and clip != "dummy.mp4": # Don't warn for the dummy clip
        print(f"‚ö†Ô∏è Missing: {clip}")

# Create annotation DataFrame
annotation_df = pd.DataFrame({
    "clip_path": video_paths,
    "clip_name": clip_names_list, # --- Add clip name column
    "draft_label": draft_labels, # Use the loaded labels
    "final_label": [None] * len(video_paths)
})

LABEL_CHOICES = ['Attentive', 'Neutral', 'Distracted']

# --- 3. Create Widgets ---
video_output = widgets.Output()
draft_label_display = widgets.HTML(value="<h3>Draft Label: -</h3>")
label_dropdown = widgets.Dropdown(
    options=LABEL_CHOICES,
    description='Correct Label:',
    style={'description_width': 'initial'}
)
submit_button = widgets.Button(
    description='Confirm & Next',
    button_style='success',
    icon='check'
)
progress_label = widgets.Label(value="Clip 0 of 0")

# --- 4. Logic for Navigation ---
class AnnotatorState:
    index = 0

state = AnnotatorState()

def load_clip(idx):
    if idx >= len(annotation_df):
        with video_output:
            clear_output()
            print("Annotation complete! üéâ")
        draft_label_display.value = "<h3>All Done!</h3>"
        label_dropdown.disabled = True
        submit_button.disabled = True
        progress_label.value = "Finished."
        return

    row = annotation_df.iloc[idx]
    progress_label.value = f"Clip {idx + 1} of {len(annotation_df)}"

    current_draft_label = row['draft_label']
    draft_label_display.value = f"<h3>Draft Label: {current_draft_label}</h3>"

    # Set dropdown default to draft label if valid
    if current_draft_label in LABEL_CHOICES:
        label_dropdown.value = current_draft_label
    else:
        label_dropdown.value = LABEL_CHOICES[0] # Default to first item

    with video_output:
        clear_output(wait=True)
        if os.path.exists(row['clip_path']):
            display(display_video(row['clip_path']))
        else:
            display(HTML(f"<p><b>Error:</b> File not found at {row['clip_path']}</p>"))

def on_submit_click(b):
    if state.index < len(annotation_df):
        annotation_df.loc[state.index, 'final_label'] = label_dropdown.value
    state.index += 1
    load_clip(state.index)

submit_button.on_click(on_submit_click)

# --- 5. Display Tool ---
ui = widgets.VBox([
    progress_label,
    video_output,
    draft_label_display,
    label_dropdown,
    submit_button
])

# --- MODIFIED: Updated print message ---
print(f"Loaded {len(annotation_df)} valid videos to label from {labels_csv_path}")

if len(annotation_df) > 0:
    load_clip(state.index)
    display(ui)
else:
    print("No videos found to label.")

# You can access your results after labeling by checking the DataFrame:
# print(annotation_df)

In [None]:
annotation_df

In [None]:
annotation_df['clip_name'] = annotation_df['clip_path'].str.replace(
    r'^/kaggle/input/ocat-clips/ocat clips[12]/ocat clips[12]/', '', regex=True
)

# Remove '.mp4' extension from clip_name
annotation_df['clip_name'] = annotation_df['clip_name'].str.replace('.mp4', '', regex=False)

annotation_df=annotation_df.drop('clip_path',axis=1)


In [None]:
annotation_df

In [None]:
import pandas as pd
import os

# --- Configuration ---
original_csv_path = "/kaggle/working/updated_output1.csv"
output_csv_path = "/kaggle/working/updated_output2.csv"

# --- Main Logic ---
try:
    # 1. Load the original CSV file
    original_df = pd.read_csv(original_csv_path)

    # 2. Prepare the updates from annotation_df
    updates_to_apply = annotation_df[annotation_df['final_label'].notna()].copy()

    if updates_to_apply.empty:
        print("No new labels found in 'annotation_df'. No updates will be made.")
    else:
        # 3. Create a mapping dictionary from clip_name -> final_label
        # Drop duplicates in case you accidentally relabeled the same clip twice in the UI
        label_map = updates_to_apply.drop_duplicates(subset='clip_name').set_index('clip_name')['final_label']

        # 4. Update the original DataFrame using .map()
        # This is the key change to fix the error and warning.

        # Create a series of new labels by mapping from the 'clip_name' column
        new_labels = original_df['clip_name'].map(label_map)

        # The 'new_labels' series will have NaN for clips that were not relabeled.
        # We use .fillna() to keep the original 'draft_label' for those rows.
        original_df['draft_label'] = new_labels.fillna(original_df['draft_label'])

        # 5. Save the final, updated DataFrame
        original_df.to_csv(output_csv_path, index=False)

        print(f"Successfully updated labels for {len(label_map)} unique clip names.")
        print(f"New file saved to: {output_csv_path}")

except NameError:
    print("Error: The DataFrame 'annotation_df' was not found.")
    print("Please make sure you have run the annotation tool cell first.")
except FileNotFoundError:
    print(f"Error: The file {original_csv_path} was not found.")
except KeyError as e:
    print(f"Error: A required column is missing. Details: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

In [None]:
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from base64 import b64encode
import os
import pandas as pd

# --- 1. Display Function ---
def display_video(path):
    try:
        mp4 = open(path, "rb").read()
        data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
        return HTML(f"""
        <video width=400 controls>
              <source src="{data_url}" type="video/mp4">
        </video>
        """)
    except Exception as e:
        return HTML(f"<p>Error loading video: {e}</p>")

# --- 2. Load Clip List from Text File ---
clip_list_path = "/kaggle/input/prelabes/seed_batch_for_labeling.txt"

# This now assumes the text file is "clip_name.mp4,draft_label"
clip_data = [] # Will store tuples (clip_name, draft_label)
try:
    with open(clip_list_path, "r") as f:
        for line in f:
            if line.strip():
                parts = line.strip().split(',')
                if len(parts) == 2:
                    clip_data.append((parts[0].strip(), parts[1].strip()))
                elif len(parts) == 1:
                     # Fallback if no label is provided
                    clip_data.append((parts[0].strip(), "Unknown"))
except Exception as e:
    print(f"Error reading {clip_list_path}: {e}")
    # Add a dummy entry to prevent crashes later if file is missing
    if not clip_data:
        clip_data = [("dummy.mp4", "Unknown")]


# Two main folders where clips may be located
clip_dirs = [
    "/kaggle/input/ocat-clips/ocat clips1/ocat clips1",
    "/kaggle/input/ocat-clips/ocat clips2/ocat clips2"
]

# Locate each video file and build the list for the DataFrame
video_paths = []
draft_labels = []
for clip, label in clip_data:
    # Ensure filename ends with .mp4
    if not clip.endswith(".mp4"):
        clip += ".mp4"
    found = False
    for folder in clip_dirs:
        path = os.path.join(folder, clip)
        if os.path.exists(path):
            video_paths.append(path)
            draft_labels.append(label) # Add the corresponding label
            found = True
            break
    if not found and clip != "dummy.mp4": # Don't warn for the dummy clip
        print(f"‚ö†Ô∏è Missing: {clip}")

# Create annotation DataFrame
annotation_df = pd.DataFrame({
    "clip_path": video_paths,
    "draft_label": draft_labels, # Use the loaded labels
    "final_label": [None] * len(video_paths)
})

# --- MODIFIED: Updated label choices ---
LABEL_CHOICES = ['attention', 'neutral', 'distracted']

# --- 3. Create Widgets ---
video_output = widgets.Output()
draft_label_display = widgets.HTML(value="<h3>Draft Label: -</h3>")
label_dropdown = widgets.Dropdown(
    options=LABEL_CHOICES,
    description='Correct Label:',
    style={'description_width': 'initial'}
)
submit_button = widgets.Button(
    description='Confirm & Next',
    button_style='success',
    icon='check'
)
progress_label = widgets.Label(value="Clip 0 of 0")

# --- 4. Logic for Navigation ---
class AnnotatorState:
    index = 0

state = AnnotatorState()

def load_clip(idx):
    if idx >= len(annotation_df):
        with video_output:
            clear_output()
            print("Annotation complete! üéâ")
        draft_label_display.value = "<h3>All Done!</h3>"
        label_dropdown.disabled = True
        submit_button.disabled = True
        progress_label.value = "Finished."
        return

    row = annotation_df.iloc[idx]
    progress_label.value = f"Clip {idx + 1} of {len(annotation_df)}"

    current_draft_label = row['draft_label']
    draft_label_display.value = f"<h3>Draft Label: {current_draft_label}</h3>"

    # --- MODIFIED: Set dropdown default to draft label if valid ---
    if current_draft_label in LABEL_CHOICES:
        label_dropdown.value = current_draft_label
    else:
        label_dropdown.value = LABEL_CHOICES[0] # Default to first item

    with video_output:
        clear_output(wait=True)
        if os.path.exists(row['clip_path']):
            display(display_video(row['clip_path']))
        else:
            display(HTML(f"<p><b>Error:</b> File not found at {row['clip_path']}</p>"))

def on_submit_click(b):
    if state.index < len(annotation_df):
        annotation_df.loc[state.index, 'final_label'] = label_dropdown.value
    state.index += 1
    load_clip(state.index)

submit_button.on_click(on_submit_click)

# --- 5. Display Tool ---
ui = widgets.VBox([
    progress_label,
    video_output,
    draft_label_display,
    label_dropdown,
    submit_button
])

print(f"Loaded {len(annotation_df)} valid videos.")
if len(annotation_df) > 0:
    load_clip(state.index)
    display(ui)
else:
    print("No videos found to label.")

# You can access your results after labeling by checking the DataFrame:
# print(annotation_df)

In [None]:
# Update package list and install the ffmpeg binary
!apt-get update && apt-get install -y ffmpeg

# Install augly with its video dependencies
!pip install augly[video]

In [None]:
from IPython.display import display, HTML
from base64 import b64encode
import os

# 1. Set the path to your video file
video_path = "/kaggle/input/ocat-clips/ocat clips1/ocat clips1/cropped_2_clip_015.mp4"

# 2. Check if the file exists
if os.path.exists(video_path):
    # 3. Read the video file in binary mode
    mp4 = open(video_path, "rb").read()

    # 4. Encode the video data to base64
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

    # 5. Create the HTML string with the embedded video
    # The 'controls' attribute adds play/pause, volume, etc.
    html_code = f"""
    <video width=400 controls>
          <source src="{data_url}" type="video/mp4">
          Your browser does not support the video tag.
    </video>
    """

    # 6. Display the HTML
    display(HTML(html_code))

else:
    print(f"Error: Video file not found at {video_path}")