<a href="https://colab.research.google.com/github/ruchising/College-Event-Management-System-master/blob/main/DeepFakeDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [17]:
base_data_path = '/content/drive/MyDrive/DeepFakeData/dataset' # Adjust this path

original_sequences_path = f'{base_data_path}/DFD_original sequences'
manipulated_sequences_path = f'{base_data_path}/DFD_manipulated_sequences'


In [18]:
import os

print("Counting files...")

def count_video_files_recursively(directory_path):
    """Counts video files (mp4, avi, mov, webm) recursively in a directory."""
    count = 0
    video_files_list = []
    for root, _, files in os.walk(directory_path):
        for file in files:
            if file.lower().endswith(('.mp4', '.avi', '.mov', '.webm')):
                count += 1
                video_files_list.append(os.path.join(root, file))
    return count, video_files_list

try:
    total_original_videos_count, original_videos = count_video_files_recursively(original_sequences_path)
    total_manipulated_videos_count, manipulated_videos = count_video_files_recursively(manipulated_sequences_path)

    print(f"Total original video files found: {total_original_videos_count}")
    print(f"Total manipulated video files found: {total_manipulated_videos_count}")

except FileNotFoundError:
    print(f"Error: Make sure the paths are correct. '{base_data_path}' or its subfolders not found.")
    print("Please check the exact path to your 'DeepFakeData/dataset' folder in Google Drive.")
    # Exit or handle the error appropriately if paths are wrong
    exit() # Exiting for demonstration, you might want a more graceful error handling

Counting files...
Total original video files found: 363
Total manipulated video files found: 312


In [22]:
!pip install face_recognition



In [24]:
import cv2
# import face_recognition # Or dlib, or MTCNN for face detection - Removed face_recognition
import dlib # Explicitly import dlib
import numpy as np
import random
from tqdm.notebook import tqdm # For progress bars
import os # Import os

# Define output directories for processed faces
output_base_path = '/content/processed_deepfake_faces'
output_real_faces_path = f'{output_base_path}/real_faces'
output_fake_faces_path = f'{output_base_path}/fake_faces'

os.makedirs(output_real_faces_path, exist_ok=True)
os.makedirs(output_fake_faces_path, exist_ok=True)

# Parameters for frame extraction and face processing
FRAMES_PER_VIDEO = 5 # Number of frames to extract per video
FACE_IMG_SIZE = (128, 128) # Target size for cropped face images

# Load face detection model (using dlib's default CPU face detector)
detector = dlib.get_frontal_face_detector()

def process_video(video_path, output_dir, label_type, frames_to_extract=FRAMES_PER_VIDEO, target_size=FACE_IMG_SIZE):
    """
    Extracts frames from a video, detects faces using dlib's CPU detector, crops them, and saves them.
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video {video_path}")
        return 0

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames == 0:
        print(f"Warning: Video {video_path} has 0 frames.")
        cap.release()
        return 0

    # Select frames evenly spaced
    frame_indices = [int(i * (total_frames / frames_to_extract)) for i in range(frames_to_extract)]
    # Ensure indices are within bounds and unique
    frame_indices = sorted(list(set([min(idx, total_frames - 1) for idx in frame_indices])))

    processed_count = 0
    for i, frame_idx in enumerate(frame_indices):
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        ret, frame = cap.read()

        if not ret:
            continue

        # Convert the image from BGR color (OpenCV default) to grayscale for dlib
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Find all the face locations in the current frame using dlib's CPU detector
        faces = detector(gray_frame, 1) # The '1' means upsample the image 1 time

        for j, face in enumerate(faces):
            # Get the coordinates from the dlib rectangle object
            left = face.left()
            top = face.top()
            right = face.right()
            bottom = face.bottom()

            # Crop the face
            face_image = frame[top:bottom, left:right]
            if face_image.size == 0: # Check if crop resulted in empty image
                continue

            # Resize face image to target size
            face_image_resized = cv2.resize(face_image, target_size)

            # Save the processed face image
            # Naming convention: video_name_frame_idx_face_idx.jpg
            video_name = os.path.basename(video_path).split('.')[0]
            output_filename = f"{video_name}_frame{frame_idx}_face{j}.jpg"
            output_filepath = os.path.join(output_dir, output_filename)
            cv2.imwrite(output_filepath, face_image_resized)
            processed_count += 1

    cap.release()
    return processed_count

print("\nStarting frame extraction and face processing...")
total_processed_real_faces = 0
# Use the 'original_videos' list obtained from recursive counting
for video_file_path in tqdm(original_videos, desc="Processing Original Videos"):
    total_processed_real_faces += process_video(video_file_path, output_real_faces_path, 'real')

total_processed_fake_faces = 0
# Use the 'manipulated_videos' list obtained from recursive counting
for video_file_path in tqdm(manipulated_videos, desc="Processing Manipulated Videos"):
    total_processed_fake_faces += process_video(video_file_path, output_fake_faces_path, 'fake')

print(f"\nFinished processing. Total real faces saved: {total_processed_real_faces}")
print(f"Total fake faces saved: {total_processed_fake_faces}")


Starting frame extraction and face processing...


Processing Original Videos:   0%|          | 0/363 [00:00<?, ?it/s]

Error: Could not open video /content/drive/MyDrive/DeepFakeData/dataset/DFD_original sequences/01__walking_and_outside_surprised.mp4
Error: Could not open video /content/drive/MyDrive/DeepFakeData/dataset/DFD_original sequences/01__meeting_serious.mp4
Error: Could not open video /content/drive/MyDrive/DeepFakeData/dataset/DFD_original sequences/01__talking_angry_couch.mp4
Error: Could not open video /content/drive/MyDrive/DeepFakeData/dataset/DFD_original sequences/01__outside_talking_pan_laughing.mp4
Error: Could not open video /content/drive/MyDrive/DeepFakeData/dataset/DFD_original sequences/01__walk_down_hall_angry.mp4
Error: Could not open video /content/drive/MyDrive/DeepFakeData/dataset/DFD_original sequences/01__walking_down_street_outside_angry.mp4
Error: Could not open video /content/drive/MyDrive/DeepFakeData/dataset/DFD_original sequences/01__walking_outside_cafe_disgusted.mp4
Error: Could not open video /content/drive/MyDrive/DeepFakeData/dataset/DFD_original sequences/01_

Processing Manipulated Videos:   0%|          | 0/312 [00:00<?, ?it/s]

Error: Could not open video /content/drive/MyDrive/DeepFakeData/dataset/DFD_manipulated_sequences/DFD_manipulated_sequences/03_13__walking_outside_cafe_disgusted__GBYWJW06.mp4
Error: Could not open video /content/drive/MyDrive/DeepFakeData/dataset/DFD_manipulated_sequences/DFD_manipulated_sequences/03_14__meeting_serious__L0GHYIFS.mp4
Error: Could not open video /content/drive/MyDrive/DeepFakeData/dataset/DFD_manipulated_sequences/DFD_manipulated_sequences/03_14__kitchen_pan__B1CG25H1.mp4
Error: Could not open video /content/drive/MyDrive/DeepFakeData/dataset/DFD_manipulated_sequences/DFD_manipulated_sequences/03_14__meeting_serious__KJ221YN0.mp4
Error: Could not open video /content/drive/MyDrive/DeepFakeData/dataset/DFD_manipulated_sequences/DFD_manipulated_sequences/03_14__outside_talking_pan_laughing__0O9TTCJT.mp4
Error: Could not open video /content/drive/MyDrive/DeepFakeData/dataset/DFD_manipulated_sequences/DFD_manipulated_sequences/03_14__secret_conversation__KJ221YN0.mp4
Error:

In [27]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Create an ImageDataGenerator for loading images
# We'll use this for both loading and potentially for data augmentation later
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2) # Normalize pixel values to [0, 1] and set validation split

# Point the generator to your processed data directories
# The `flow_from_directory` method automatically infers labels from subfolder names
# (e.g., 'real_faces' will be class 0, 'fake_faces' will be class 1)
batch_size = 32 # You can adjust this based on your GPU memory

train_generator = datagen.flow_from_directory(
    output_base_path, # Use the correct path for loading
    target_size=FACE_IMG_SIZE,
    batch_size=batch_size,
    class_mode='binary', # For binary classification (real/fake)
    subset='training',   # Specify this is the training subset
    seed=42,             # For reproducibility
    interpolation='nearest' # Use nearest neighbor for resizing
)

# Validation generator
validation_generator = datagen.flow_from_directory(
    output_base_path, # Use the correct path for loading
    target_size=FACE_IMG_SIZE,
    batch_size=batch_size,
    class_mode='binary',
    subset='validation', # Specify this is the validation subset
    seed=42,
    interpolation='nearest'
)

# Note: For a true 'test' set, it's best practice to hold out a portion of your
# data *before* any processing and then process and evaluate on that unseen set.
# For simplicity in a beginner project, you might use the validation set as your
# de-facto test set, or create a separate `test_generator` by manually moving files
# or using a different `flow_from_directory` instance on a dedicated test folder.
# For now, we'll assume `validation_generator` serves as your evaluation set.

print(f"\nTraining data batches: {len(train_generator)}")
print(f"Validation data batches: {len(validation_generator)}")
print(f"Class indices: {train_generator.class_indices}") # Shows which label maps to which class (0 or 1)

Found 1515 images belonging to 2 classes.
Found 378 images belonging to 2 classes.

Training data batches: 48
Validation data batches: 12
Class indices: {'fake_faces': 0, 'real_faces': 1}


In [28]:
print("\nData preparation complete! You now have generators ready for model training.")



Data preparation complete! You now have generators ready for model training.
