In [1]:
import cv2
import os

In [2]:
input_root = "E:/deepfake/media/datasets"
output_root = "E:/deepfake/media/processeddata"

# Make sure output folders exist
os.makedirs(output_root + "/real", exist_ok=True)
os.makedirs(output_root + "/fake", exist_ok=True)

# Parameters
frame_interval = 30  # Capture every 30th frame
frame_size = (224, 224)  # Resize for CNN input

In [None]:
# Process each category
for label in ["real", "fake"]:
    input_folder = os.path.join(input_root, label)
    output_folder = os.path.join(output_root, label)

    for filename in os.listdir(input_folder):
        if filename.endswith(".mp4"):
            video_path = os.path.join(input_folder, filename)
            video_name = os.path.splitext(filename)[0]
            print(f"Processing {video_path}...")

            # Read video
            cap = cv2.VideoCapture(video_path)
            frame_num = 0
            saved_count = 0

            while True:
                ret, frame = cap.read()
                if not ret:
                    break

                # Save every Nth frame
                if frame_num % frame_interval == 0:
                    resized = cv2.resize(frame, frame_size)
                    save_path = os.path.join(output_folder, f"{video_name}_frame{frame_num}.jpg")
                    cv2.imwrite(save_path, resized)
                    saved_count += 1

                frame_num += 1

            cap.release()
            print(f"Saved {saved_count} frames from {filename} to {output_folder}")

Processing E:/deepfake/media/datasets\real\000.mp4...
Saved 5 frames from 000.mp4 to E:/deepfake/media/processeddata\real
Processing E:/deepfake/media/datasets\real\00000.mp4...
Saved 15 frames from 00000.mp4 to E:/deepfake/media/processeddata\real
Processing E:/deepfake/media/datasets\real\00001.mp4...
Saved 14 frames from 00001.mp4 to E:/deepfake/media/processeddata\real
Processing E:/deepfake/media/datasets\real\00002.mp4...
Saved 18 frames from 00002.mp4 to E:/deepfake/media/processeddata\real
Processing E:/deepfake/media/datasets\real\00003.mp4...
Saved 10 frames from 00003.mp4 to E:/deepfake/media/processeddata\real
Processing E:/deepfake/media/datasets\real\00004.mp4...
Saved 15 frames from 00004.mp4 to E:/deepfake/media/processeddata\real
Processing E:/deepfake/media/datasets\real\00005.mp4...
Saved 16 frames from 00005.mp4 to E:/deepfake/media/processeddata\real
Processing E:/deepfake/media/datasets\real\00006.mp4...
Saved 17 frames from 00006.mp4 to E:/deepfake/media/processe

In [6]:
import os
import pandas as pd

processed_root = "E:/deepfake/media/processeddata"
csv_path = os.path.join(processed_root, "train_labels.csv")

# Prepare the list to collect data
data = []

# Loop through real and fake folders
for label in ["real", "fake"]:
    folder_path = os.path.join(processed_root, label)
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg"):
            image_path = os.path.join(label, filename)  # relative path for training
            data.append({"filename": image_path, "label": label})

# Create DataFrame
df = pd.DataFrame(data)

# Shuffle the dataset
df = df.sample(frac=1).reset_index(drop=True)

# Save to CSV
df.to_csv(csv_path, index=False)

print(f"CSV file saved at: {csv_path}")
df.head()


CSV file saved at: E:/deepfake/media/processeddata\train_labels.csv


Unnamed: 0,filename,label
0,fake\id10_id13_0003_frame0.jpg,fake
1,fake\id44_id48_0002_frame30.jpg,fake
2,fake\id31_id37_0003_frame360.jpg,fake
3,fake\id34_id26_0008_frame0.jpg,fake
4,fake\id0_id2_0001_frame300.jpg,fake
