In [None]:
import cv2
import os
import random
from tqdm import tqdm
import matplotlib.pyplot as plt

In [None]:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

def extract_random_frames_with_face(video_dir, output_dir, label, max_frames=10, target_size=(224, 224)):
    os.makedirs(output_dir, exist_ok=True)
    
    for video_name in tqdm(os.listdir(video_dir), desc=f"Processing {label} videos"):
        video_path = os.path.join(video_dir, video_name)
        video_id = os.path.splitext(video_name)[0]
        
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        frame_indices = sorted(random.sample(range(total_frames), min(max_frames, total_frames)))
        frame_count = 0

        for idx in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, idx) 
            ret, frame = cap.read()
            if not ret:
                continue
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
            
            if len(faces) > 0:
                
                x, y, w, h = faces[0]
                cropped_face = frame[y:y+h, x:x+w]
                
                resized_face = cv2.resize(cropped_face, target_size)

                frame_path = os.path.join(output_dir, f"{label}_{video_id}_frame_{frame_count:04d}.jpg")
                cv2.imwrite(frame_path, resized_face)
                frame_count += 1            
        cap.release()

real_videos_dir = "/teamspace/studios/this_studio/deepfake/dataset/videos/real"
fake_videos_dir = "/teamspace/studios/this_studio/deepfake/dataset/videos/fake"

real_frames_dir = "/teamspace/studios/this_studio/deepfake/dataset/images/real"
fake_frames_dir = "/teamspace/studios/this_studio/deepfake/dataset/images/fake"

extract_random_frames_with_face(real_videos_dir, real_frames_dir, "real")
extract_random_frames_with_face(fake_videos_dir, fake_frames_dir, "fake")


In [None]:
import cv2

def extract_frame_from_video(video_path, frame_index=0):
    cap = cv2.VideoCapture(video_path)
    
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
    
    ret, frame = cap.read()
    if ret:
        return frame
    else:
        print("Failed to extract frame.")
        return None
    cap.release()

def show_detected_face_in_video(video_path, frame_index=0):
    frame = extract_frame_from_video(video_path, frame_index)
    
    if frame is not None:
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

        for (x, y, w, h) in faces:
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)  
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        plt.imshow(frame_rgb)
        plt.axis('off') 
        plt.show()

video_path = '/teamspace/studios/this_studio/deepfake/dataset/videos/fake/03_11__exit_phone_room__P08VGHTA.mp4'  # Replace with your video path
show_detected_face_in_video(video_path, frame_index=50)  
