<a href="https://colab.research.google.com/github/sanikasanikachaudhari071/BE_project/blob/main/preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

!pip install mtcnn opencv-python-headless

Collecting mtcnn
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting lz4>=4.3.3 (from mtcnn)
  Downloading lz4-4.4.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Downloading mtcnn-1.0.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lz4-4.4.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m49.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lz4, mtcnn
Successfully installed lz4-4.4.4 mtcnn-1.0.0


In [None]:
import cv2
import os
from mtcnn.mtcnn import MTCNN
import glob


REAL_VIDEO_PATH = "/content/drive/MyDrive/Celeb-DF/Celeb-real/*.mp4"
FAKE_VIDEO_PATH = "/content/drive/MyDrive/Celeb-DF/Celeb-synthesis/*.mp4"

SAVE_DIR_REAL = "/content/drive/MyDrive/processed_faces/real"
SAVE_DIR_FAKE = "/content/drive/MyDrive/processed_faces/fake"


os.makedirs(SAVE_DIR_REAL, exist_ok=True)
os.makedirs(SAVE_DIR_FAKE, exist_ok=True)

detector = MTCNN()


def process_and_save_faces(video_path_list, save_dir, frame_interval=10):
    """
    Reads a list of video paths, detects faces, crops/resizes them,
    and saves them as images in the save_dir.

    frame_interval: Process only every 10th frame to save time.
    """
    video_count = 0
    for video_path in video_path_list:
        video_count += 1
        video_name = os.path.basename(video_path).split('.')[0]

        cap = cv2.VideoCapture(video_path)
        frame_num = 0
        face_num = 0

        while cap.isOpened():
            ret, frame = cap.read()

            if not ret:
                break


            if frame_num % frame_interval == 0:

                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)


                detections = detector.detect_faces(frame_rgb)

                for det in detections:

                    if det['confidence'] > 0.95:
                        x, y, w, h = det['box']


                        x, y = abs(x), abs(y)


                        face = frame[y : y + h, x : x + w]


                        try:
                            resized_face = cv2.resize(
                                face,
                                (224, 224),
                                interpolation=cv2.INTER_AREA
                            )


                            face_num += 1
                            save_path = os.path.join(
                                save_dir,
                                f"{video_name}_frame{frame_num}_face{face_num}.jpg"
                            )
                            cv2.imwrite(save_path, resized_face)

                        except Exception as e:

                            print(f"Error resizing face from {video_name}: {e}")

            frame_num += 1

        cap.release()
        print(f"Finished video {video_count}/{len(video_path_list)}: {video_name}. Found {face_num} faces.")



print("--- Processing REAL videos ---")
real_videos = glob.glob(REAL_VIDEO_PATH)
process_and_save_faces(real_videos, SAVE_DIR_REAL, frame_interval=10)

print("\n--- Processing FAKE videos ---")
fake_videos = glob.glob(FAKE_VIDEO_PATH)
process_and_save_faces(fake_videos, SAVE_DIR_FAKE, frame_interval=10)

print("\n--- PREPROCESSING COMPLETE ---")