In [3]:
import cv2
import os
import json

metadata_path = '/kaggle/input/deepfake-detection-challenge/train_sample_videos/metadata.json'

with open(metadata_path, 'r') as f:
    metadata = json.load(f)

video_filename = 'aagfhgtpmv.mp4'

if video_filename not in metadata:
    raise ValueError(f"No metadata found for video file {video_filename}")

video_metadata = metadata[video_filename]
label = video_metadata['label']  # "FAKE" or "REAL"

output_dir = f'/kaggle/working/{label.lower()}'
os.makedirs(output_dir, exist_ok=True)

video_file = f'/kaggle/input/deepfake-detection-challenge/train_sample_videos/{video_filename}'

# Path to the Haar cascade XML file
haar_cascade_path = '/kaggle/input/haarcascades/haarcascade_frontalface_default.xml'

# Load Haar cascade for face detection
face_cascade = cv2.CascadeClassifier(haar_cascade_path)

# Open the video file
video_capture = cv2.VideoCapture(video_file)

# Frame rate of the video
fps = int(video_capture.get(cv2.CAP_PROP_FPS))
frame_count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))

# Number of frames to capture
num_frames = 20
frame_interval = frame_count // num_frames

# Process frames and detect faces
for i in range(num_frames):
    # Set the frame position
    frame_pos = i * frame_interval
    video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_pos)
    
    # Read the frame
    ret, frame = video_capture.read()
    if not ret:
        break
    
    # Convert frame to grayscale (Haar cascade works on grayscale images)
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Detect faces in the frame
    faces = face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    
    # Extract and save faces
    for j, (x, y, w, h) in enumerate(faces):
        face_image = frame[y:y+h, x:x+w]
        face_image_file = os.path.join(output_dir, f'frame_{i}_face_{j}.jpg')
        cv2.imwrite(face_image_file, face_image)
        print(f'Saved {face_image_file}')

video_capture.release()
print(f'Face extraction for {label.lower()} completed.')


Saved /kaggle/working/fake/frame_0_face_0.jpg
Saved /kaggle/working/fake/frame_1_face_0.jpg
Saved /kaggle/working/fake/frame_1_face_1.jpg
Saved /kaggle/working/fake/frame_2_face_0.jpg
Saved /kaggle/working/fake/frame_2_face_1.jpg
Saved /kaggle/working/fake/frame_3_face_0.jpg
Saved /kaggle/working/fake/frame_3_face_1.jpg
Saved /kaggle/working/fake/frame_3_face_2.jpg
Saved /kaggle/working/fake/frame_4_face_0.jpg
Saved /kaggle/working/fake/frame_4_face_1.jpg
Saved /kaggle/working/fake/frame_5_face_0.jpg
Saved /kaggle/working/fake/frame_5_face_1.jpg
Saved /kaggle/working/fake/frame_6_face_0.jpg
Saved /kaggle/working/fake/frame_6_face_1.jpg
Saved /kaggle/working/fake/frame_7_face_0.jpg
Saved /kaggle/working/fake/frame_7_face_1.jpg
Saved /kaggle/working/fake/frame_7_face_2.jpg
Saved /kaggle/working/fake/frame_8_face_0.jpg
Saved /kaggle/working/fake/frame_8_face_1.jpg
Saved /kaggle/working/fake/frame_8_face_2.jpg
Saved /kaggle/working/fake/frame_9_face_0.jpg
Saved /kaggle/working/fake/frame_9

In [4]:
import shutil

folder_path = '/kaggle/working/fake'
zip_path = '/kaggle/working/fake.zip'

shutil.make_archive('/kaggle/working/fake', 'zip', folder_path)


'/kaggle/working/fake.zip'