# Preprocessing

In [2]:
pip install opencv-python

Collecting opencv-python
  Using cached opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl.metadata (20 kB)
Using cached opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl (38.6 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.9.0.80
Note: you may need to restart the kernel to use updated packages.


In [3]:
import cv2
import os

### Setting dataset and output directory

In [5]:
base_dir = "C:\/Users\/iampr\/Downloads\/DeepFake Face Recognition\/Datasets\/dfdc_train_part_00"

video_dataset = os.path.join(base_dir, "dfdc_train_part_0")

### Processing Metadata for Organizing

In [6]:
import json
metadata_filepath = os.path.join(video_dataset , "metadata.json")
with open(metadata_filepath, 'r') as file:
    metadata = json.load(file)

In [7]:
real_dir = os.path.join(base_dir, 'real')
fake_dir = os.path.join(base_dir, 'fake')

real_faces = os.path.join(real_dir, "real_faces")
fake_faces = os.path.join(fake_dir, "fake_faces")

os.makedirs(real_dir, exist_ok=True)
os.makedirs(fake_dir, exist_ok=True)
os.makedirs(real_faces, exist_ok=True)
os.makedirs(fake_faces, exist_ok=True)

In [8]:
real_videos = []
fake_videos = []
for videoInfo in metadata:
    if metadata[videoInfo]['label'] == "REAL":
        real_videos.append(videoInfo)
    else: 
        fake_videos.append(videoInfo)

### Extracting frames

In [9]:
pip install mtcnn

Collecting mtcnn
  Using cached mtcnn-0.1.1-py3-none-any.whl.metadata (5.8 kB)
Using cached mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
Installing collected packages: mtcnn
Successfully installed mtcnn-0.1.1
Note: you may need to restart the kernel to use updated packages.


In [10]:
from mtcnn import MTCNN
detector = MTCNN()

In [13]:
pip install Pillow

Collecting Pillow
  Downloading pillow-10.3.0-cp39-cp39-win_amd64.whl.metadata (9.4 kB)
Downloading pillow-10.3.0-cp39-cp39-win_amd64.whl (2.5 MB)
   ---------------------------------------- 0.0/2.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.5 MB 660.6 kB/s eta 0:00:04
   --- ------------------------------------ 0.2/2.5 MB 2.1 MB/s eta 0:00:02
   ----------------- ---------------------- 1.1/2.5 MB 8.7 MB/s eta 0:00:01
   ---------------------------------------  2.5/2.5 MB 16.1 MB/s eta 0:00:01
   ---------------------------------------  2.5/2.5 MB 16.1 MB/s eta 0:00:01
   ---------------------------------------- 2.5/2.5 MB 10.8 MB/s eta 0:00:00
Installing collected packages: Pillow
Successfully installed Pillow-10.3.0
Note: you may need to restart the kernel to use updated packages.


In [14]:
from PIL import Image

def detect_faces(frame, frame_basename, save_directory):
  try:
      # Load the image
      image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

      # Detect faces in the image
      detections = detector.detect_faces(image_rgb)

      # Ensure the save_directory exists
      os.makedirs(save_directory, exist_ok=True)

      # Process detections
      for i, detection in enumerate(detections):
          x, y, width, height = detection['box']
          # Ensure coordinates are positive
          x, y = max(0, x), max(0, y)
          # Crop the face from the image
          face = frame[y:y+height, x:x+width]

          # Construct a filename for the cropped face
          frame_basename = os.path.basename(frame_basename)
          save_path = os.path.join(save_directory, f"face_{i}_{frame_basename}.jpeg")
          # Save the cropped face
          face_Image = Image.fromarray(face)
          face_Image.save(save_path)
          # cv2.imwrite(save_path, face)
  except Exception as e:
    print({e})


In [16]:
def face_detection(video_path, output_path, max_frames=None):

    try:
      # Open the video file
      cap = cv2.VideoCapture(video_path)

      # Get frame count
      frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

      if max_frames is None:
          max_frames = frame_count

      # Extract video name from video path
      video_name = os.path.basename(video_path).split('.')[0]  # Extracting the name without extension

      # Loop through each frame
      for frame_number in range(max_frames):
          # Read the frame
          ret, frame = cap.read()
          if not ret:
              break

          # Save the frame as an image file
          #frame_filename = f"{output_path}/frame_{video_name}_{frame_number:04d}.jpg"  # Adjust the filename format as needed
          #cv2.imwrite(frame_filename, frame)

          # Detect face using MTCNN
          detect_faces(frame, video_path, output_path)
          print("Image saved: ", frame_number)

      # Release the video capture object
      cap.release()
    except Exception as e:
      print(f"An error occurred: {e}")


In [None]:
import tensorflow as tf

with tf.device('/GPU:0'):  # Specify GPU device
    max_frames = 100
    video_count = 0
    for video_file in os.listdir(video_dataset):
            if video_file.endswith('.mp4'):  # Adjust the extension based on your dataset
                video_path = os.path.join(video_dataset, video_file)
                if video_file in real_videos:
                    face_detection(video_path, real_faces, max_frames)
                else:
                    face_detection(video_path, fake_faces, max_frames)
    
                video_count += 1
                print("Processing Video :", video_count)

Image saved:  0
Image saved:  1
Image saved:  2
Image saved:  3
Image saved:  4
Image saved:  5
Image saved:  6
Image saved:  7
Image saved:  8
Image saved:  9
Image saved:  10
Image saved:  11
Image saved:  12
Image saved:  13
Image saved:  14
Image saved:  15
Image saved:  16
Image saved:  17
Image saved:  18
Image saved:  19
Image saved:  20
Image saved:  21
Image saved:  22
Image saved:  23
Image saved:  24
Image saved:  25
Image saved:  26
Image saved:  27
Image saved:  28
Image saved:  29
Image saved:  30
Image saved:  31
Image saved:  32
Image saved:  33
Image saved:  34
Image saved:  35
Image saved:  36
Image saved:  37
Image saved:  38
Image saved:  39
Image saved:  40
Image saved:  41
Image saved:  42
Image saved:  43
Image saved:  44
Image saved:  45
Image saved:  46
Image saved:  47
Image saved:  48
Image saved:  49
Image saved:  50
Image saved:  51
Image saved:  52
