# Downloading the dataset

We are using the Face Forensics++ (FF++) dataset

In [2]:
# import kagglehub

# # Download latest version
# path = kagglehub.dataset_download("sanikatiwarekar/deep-fake-detection-dfd-entire-original-dataset")

# print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Downloading from https://www.kaggle.com/api/v1/datasets/download/sanikatiwarekar/deep-fake-detection-dfd-entire-original-dataset?dataset_version_number=1...


100%|██████████| 22.5G/22.5G [23:54<00:00, 16.8MB/s]  


Extracting files...
Path to dataset files: /Users/jordan/.cache/kagglehub/datasets/sanikatiwarekar/deep-fake-detection-dfd-entire-original-dataset/versions/1


No CSV file found in dataset directory: /Users/jordan/.cache/kagglehub/datasets/sanikatiwarekar/deep-fake-detection-dfd-entire-original-dataset/versions/1


In [5]:
import os
from dotenv import load_dotenv
#store the paths to the real and fake images in an env file

load_dotenv()

real_path = os.getenv('REAL_PATH')
fake_path = os.getenv('DEEPFAKE_PATH')

def count_mp4_files(folder):
    mp4_count = 0
    for root, dirs, files in os.walk(folder):
        mp4_count += sum(1 for f in files if f.endswith('.mp4'))
    return mp4_count

real_mp4_count = count_mp4_files(real_path)
fake_mp4_count = count_mp4_files(fake_path)

print(f'Real folder mp4 count: {real_mp4_count}')
print(f'Fake folder mp4 count: {fake_mp4_count}')

Real folder mp4 count: 363
Fake folder mp4 count: 3068


Use the EfficientNet model for feature extraction

In [8]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
import numpy as np

def get_efficientnet_feature_extractor():
    """
    Returns an EfficientNetB0 model that outputs feature vectors instead of class predictions.
    """
    # Load EfficientNetB0 without the top classification layer
    base_model = EfficientNetB0(weights='imagenet', include_top=False, pooling='avg')
    # The output will be the global average pooled features
    feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)
    return feature_extractor

def extract_face_features(face_images, feature_extractor):
    """
    Given a batch of face images, returns their EfficientNet feature vectors.
    face_images: numpy array of shape (batch_size, height, width, channels)
    feature_extractor: model returned by get_efficientnet_feature_extractor()
    """
    # EfficientNet expects images scaled to [0, 255] and size 224x224
    # You may need to preprocess your images accordingly
    features = feature_extractor.predict(face_images)
    return features

In [36]:
import cv2
import os
import numpy as np
from ultralytics import YOLO

def save_first_210_cropped_faces(video_path, output_dir, yolo_model_path='yolov8n.pt', conf=0.5, save_images=False):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video file: {video_path}")
        return
    os.makedirs(output_dir, exist_ok=True)
    if save_images:
        os.makedirs(os.path.join(output_dir, 'images'), exist_ok=True)
    
    model = YOLO(yolo_model_path)
    frame_count = 0
    saved_count = 0
    while frame_count < 210:
        ret, frame = cap.read()
        if not ret:
            print(f"Could not read frame {frame_count}")
            break
        
        results = model.predict(source=frame, conf=conf, classes=0, verbose=False)
        
        # Find the largest face in this frame
        largest_face = None
        largest_area = 0
        
        for result in results:
            for box in result.boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                area = (x2 - x1) * (y2 - y1)
                
                if area > largest_area:
                    largest_area = area
                    largest_face = (x1, y1, x2, y2)
        
        # Save only the largest face if found
        if largest_face:
            x1, y1, x2, y2 = largest_face
            face = frame[y1:y2, x1:x2]
            face_resized = cv2.resize(face, (224, 224))
            face_scaled = face_resized.astype(np.float32) / 127.5 - 1.0
            
            # Save as numpy array for EfficientNet
            np.save(os.path.join(output_dir, f"face_{frame_count:04d}.npy"), face_scaled)
            
            # Optionally save as image for visualization
            if save_images:
                # Convert back to 0-255 range for image saving
                face_image = ((face_scaled + 1.0) * 127.5).astype(np.uint8)
                cv2.imwrite(os.path.join(output_dir, 'images', f"face_{frame_count:04d}.png"), face_image)
            
            saved_count += 1
        else:
            print(f"No face detected in frame {frame_count}")
        
        frame_count += 1
    cap.release()
    print(f"Saved {saved_count} cropped faces to {output_dir}")

In [None]:
import os

# Get the path to the fake videos folder from your .env
fake_path = os.getenv('DEEPFAKE_PATH')

# List all mp4 files in the fake folder
fake_videos = [f for f in os.listdir(fake_path) if f.endswith('.mp4')]

if fake_videos:
    first_video = fake_videos[0]
    video_path = os.path.join(fake_path, first_video)
    output_dir = 'example_output'  # Change this to your desired output folder

    print(f"Processing video: {video_path}")
    save_first_210_cropped_faces(video_path, output_dir, save_images=True)
else:
    print("No fake videos found in the folder.")

Processing video: /Users/jordan/.cache/kagglehub/datasets/sanikatiwarekar/deep-fake-detection-dfd-entire-original-dataset/versions/1/DFD_manipulated_sequences/DFD_manipulated_sequences/13_20__walking_down_indoor_hall_disgust__EV1V4ZQV.mp4
Saved 210 cropped faces to cropped_faces_output
Saved 210 cropped faces to cropped_faces_output
