In [1]:
import os
import cv2
import numpy as np
import csv

# Function to extract 15 frames from each video
def extract_frames(video_path, num_frames=15):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)  # Equally spaced indices
    frames = []
    
    for i in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            frames.append(frame)
    cap.release()
    return frames

# Function to create dataset with frames and labels, skipping already processed videos
def create_dataset(folders, output_dir, num_frames=15):
    dataset = []
    
    # Read existing CSV if available to skip processed files
    processed_files = set()
    if os.path.exists(csv_file):
        with open(csv_file, 'r') as f:
            reader = csv.reader(f)
            next(reader)  # Skip header
            processed_files = {row[0].split('_frame_')[0] for row in reader}

    for folder, label in folders:
        for video_file in os.listdir(folder):
            video_path = os.path.join(folder, video_file)
            base_filename = os.path.splitext(video_file)[0]
            
            # Skip videos that are already processed
            if base_filename in processed_files:
                print(f"Skipping {video_file}, already processed.")
                continue
            
            frames = extract_frames(video_path, num_frames)
            for i, frame in enumerate(frames):
                frame_filename = f"{base_filename}_frame_{i}.jpg"
                frame_path = os.path.join(output_dir, frame_filename)
                cv2.imwrite(frame_path, frame)
                dataset.append((frame_path, label))
            
            # Write frames to the CSV after each video is processed to ensure progress is saved
            with open(csv_file, 'a', newline='') as f:
                writer = csv.writer(f)
                writer.writerows([(f"{base_filename}_frame_{i}.jpg", label) for i in range(len(frames))])
    
    return dataset

# Define your video folders and corresponding labels
folders = [
    ('F:\Project\Dataset\Dataset\original_sequences\youtube\c40\\videos', 'real'),
    ('F:\Project\Dataset\Dataset\manipulated_sequences\Deepfakes\c40\\videos', 'fake')
]

# Define output directory for frames
output_dir = 'frames_dataset'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# CSV file path
csv_file = 'video_frames.csv'

# Create the dataset
dataset = create_dataset(folders, output_dir)

print(f"Dataset updated and saved as {csv_file}.")


Skipping 000.mp4, already processed.
Skipping 001.mp4, already processed.
Skipping 002.mp4, already processed.
Skipping 003.mp4, already processed.
Skipping 004.mp4, already processed.
Skipping 005.mp4, already processed.
Skipping 006.mp4, already processed.
Skipping 007.mp4, already processed.
Skipping 008.mp4, already processed.
Skipping 009.mp4, already processed.
Skipping 010.mp4, already processed.
Skipping 011.mp4, already processed.
Skipping 012.mp4, already processed.
Skipping 013.mp4, already processed.
Skipping 014.mp4, already processed.
Skipping 015.mp4, already processed.
Skipping 016.mp4, already processed.
Skipping 017.mp4, already processed.
Skipping 018.mp4, already processed.
Skipping 019.mp4, already processed.
Skipping 020.mp4, already processed.
Skipping 021.mp4, already processed.
Skipping 022.mp4, already processed.
Skipping 023.mp4, already processed.
Skipping 024.mp4, already processed.
Skipping 025.mp4, already processed.
Skipping 026.mp4, already processed.
S

In [1]:
import cv2
import numpy as np
import os
import csv

# Function to extract key frames from each video
def extract_key_frames(video_path, num_frames=10):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    key_frames = []
    
    last_frame = None
    diffs = []

    # Calculate differences between consecutive frames
    for i in range(total_frames):
        ret, frame = cap.read()
        if not ret:
            break
        if last_frame is not None:
            # Compute the difference between the current frame and the last frame
            diff = cv2.absdiff(frame, last_frame)
            diff_sum = np.sum(diff)
            diffs.append((diff_sum, i))
        last_frame = frame

    # Sort frames by the largest differences and select key frames
    diffs.sort(reverse=True, key=lambda x: x[0])
    key_frame_indices = [index for _, index in diffs[:num_frames]]
    key_frame_indices.sort()  # Sort in order of appearance

    # Extract key frames
    cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # Reset to start
    for i in range(total_frames):
        ret, frame = cap.read()
        if i in key_frame_indices and ret:
            key_frames.append(frame)
    
    cap.release()
    return key_frames

# Function to create dataset with key frames and labels, skipping already processed videos
def create_dataset(folders, output_dir, num_frames=10):
    dataset = []
    csv_file = os.path.join(output_dir, 'dataset.csv')

    # Read existing CSV if available to skip processed files
    processed_files = set()
    if os.path.exists(csv_file):
        with open(csv_file, 'r') as f:
            reader = csv.reader(f)
            next(reader)  # Skip header
            processed_files = {row[0].split('_key_frame_')[0] for row in reader}  # Updated split

    for folder, label in folders:
        for video_file in os.listdir(folder):
            video_path = os.path.join(folder, video_file)
            base_filename = os.path.splitext(video_file)[0]
            
            # Skip videos that are already processed
            if base_filename in processed_files:
                print(f"Skipping {video_file}, already processed.")
                continue
            
            key_frames = extract_key_frames(video_path, num_frames)
            for i, frame in enumerate(key_frames):
                frame_filename = f"{base_filename}_key_frame_{i}.jpg"
                frame_path = os.path.join(output_dir, frame_filename)
                cv2.imwrite(frame_path, frame)
                dataset.append((frame_path, label))
            
            # Write frames to the CSV after each video is processed
            with open(csv_file, 'a', newline='') as f:
                writer = csv.writer(f)
                if not processed_files:  # Write header only once if CSV is new
                    writer.writerow(["frame_path", "label"])
                writer.writerows([(f"{base_filename}_key_frame_{i}.jpg", label) for i in range(len(key_frames))])

    return dataset

# Define your video folders and corresponding labels
folders = [
    ('F:/Project/Dataset/Dataset/original_sequences/youtube/c40/videos', 'real'),
    ('F:/Project/Dataset/Dataset/manipulated_sequences/Deepfakes/c40/videos', 'fake')
]

# Define output directory for frames
output_dir = 'frames_dataset2'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Create the dataset
dataset = create_dataset(folders, output_dir)

print(f"Dataset updated and saved.")


Skipping 000.mp4, already processed.
Skipping 001.mp4, already processed.
Skipping 002.mp4, already processed.
Skipping 003.mp4, already processed.
Skipping 004.mp4, already processed.
Skipping 005.mp4, already processed.
Skipping 006.mp4, already processed.
Skipping 007.mp4, already processed.
Skipping 008.mp4, already processed.
Skipping 009.mp4, already processed.
Skipping 010.mp4, already processed.
Skipping 011.mp4, already processed.
Skipping 012.mp4, already processed.
Skipping 013.mp4, already processed.
Skipping 014.mp4, already processed.
Skipping 015.mp4, already processed.
Skipping 016.mp4, already processed.
Skipping 017.mp4, already processed.
Skipping 018.mp4, already processed.
Skipping 019.mp4, already processed.
Skipping 020.mp4, already processed.
Skipping 021.mp4, already processed.
Skipping 022.mp4, already processed.
Skipping 023.mp4, already processed.
Skipping 024.mp4, already processed.
Skipping 025.mp4, already processed.
Skipping 026.mp4, already processed.
S