In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

varshitpashikanti_ocat_original_dataset_for_labeling_path = kagglehub.dataset_download('varshitpashikanti/ocat-original-dataset-for-labeling')

print('Data source import complete.')


In [None]:
!pip install -q mediapipe==0.10.15 google-api-core==2.19.0

In [None]:
import cv2
import mediapipe as mp
import numpy as np
import os
import glob
from tqdm import tqdm # For a helpful progress bar

# --- Configuration ---
INPUT_DIR = '/kaggle/input/ocat-original-dataset-for-labeling/ocat clips1/ocat clips1'     # Directory containing your 3-second clips
OUTPUT_DIR = '/kaggle/working/clip_features1' # Where to save the .npy feature files
# ---------------------

# Ensure the output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Initialize MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,     # We are processing a video stream
    max_num_faces=1,             # Assume one driver per clip
    refine_landmarks=True,       # ESSENTIAL: This provides iris and pupil landmarks
    min_detection_confidence=0.5
)

# Find all video files in the input directory
# Add other extensions if needed (e.g., '*.avi', '*.mov')
video_files = glob.glob(os.path.join(INPUT_DIR, '*.mp4'))

print(f"Found {len(video_files)} video clips. Starting feature extraction...")

# Process each video file
for clip_path in tqdm(video_files, desc="Processing Clips"):
    cap = cv2.VideoCapture(clip_path)

    clip_feature_sequence = [] # This will store all frame vectors for this clip

    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            break # End of clip

        # Convert the BGR frame to RGB for MediaPipe
        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame
        results = face_mesh.process(image_rgb)

        if results.multi_face_landmarks:
            # We found a face, extract its landmarks
            face_landmarks = results.multi_face_landmarks[0].landmark

            # Create a single feature vector for this frame
            # We flatten all 478 landmarks (x, y, z) into one long vector
            # Vector = [x1, y1, z1, x2, y2, z2, ..., x478, y478, z478]
            # (A common alternative is [x1, x2, ..., x478, y1, ..., y478, z1, ..., z478])

            frame_vector = []
            for landmark in face_landmarks:
                frame_vector.extend([landmark.x, landmark.y, landmark.z])

            clip_feature_sequence.append(frame_vector)

        else:
            # No face detected in this frame
            # We append a vector of zeros to maintain the sequence length
            # 478 landmarks * 3 coordinates (x, y, z)
            num_features = 478 * 3
            clip_feature_sequence.append(np.zeros(num_features))

    cap.release()

    # Save the entire sequence for this clip as a NumPy array
    # The shape will be (num_frames, 1434)
    # e.g., (90, 1434) for a 3-sec, 30-FPS clip

    # Get the original clip's base name (e.g., "clip_001")
    base_name = os.path.splitext(os.path.basename(clip_path))[0]
    output_path = os.path.join(OUTPUT_DIR, f"{base_name}.npy")

    np.save(output_path, np.array(clip_feature_sequence))

face_mesh.close()
print(f"\nFeature extraction complete. All .npy files saved to {OUTPUT_DIR}")

In [None]:
import numpy as np

# Load your .npy file
data = np.load('/kaggle/working/clip_features/cropped_1_clip_008_features.npy')

# Display shape and a small preview
print("Shape:", data.shape)
print("Data (first 5 rows):")
print(data[:5])


In [None]:
import shutil

# Folder to zip
folder_path = '/kaggle/working/clip_features1'

# Output zip file path (without .zip extension)
output_zip = '/kaggle/working/clip_features_zip1'

# Create zip archive
shutil.make_archive(output_zip, 'zip', folder_path)

print("âœ… Folder zipped successfully!")
