## Video Data Augmentation Script for Balancing Real and Fake Datasets

### Import necessary libraries

In [1]:
import os
import cv2
import random
import numpy as np
from tqdm import tqdm

### Paths and Dataset Information

In [None]:
# Paths to real, fake, and output folders
real_videos_path = 'path_to_real_videos_folder'
fake_videos_path = 'path_to_fake_videos_folder'  # Reference only, no processing
output_videos_path = 'path_to_output_folder'
os.makedirs(output_videos_path, exist_ok=True)

# Count real and fake videos
real_videos = [f for f in os.listdir(real_videos_path) if f.endswith('.mp4')]
fake_videos = [f for f in os.listdir(fake_videos_path) if f.endswith('.mp4')]
augmentations_needed = len(fake_videos) - len(real_videos)

print(f"Number of real videos: {len(real_videos)}")
print(f"Number of fake videos: {len(fake_videos)}")
print(f"Augmentations needed: {augmentations_needed}")

### Define Augmentation Functions

In [None]:

def horizontal_flip(frame):
    """Flip the frame horizontally."""
    return cv2.flip(frame, 1)

def random_rotation(frame):
    """Rotate the frame by a random angle between -15 and 15 degrees."""
    h, w = frame.shape[:2]
    M = cv2.getRotationMatrix2D((w // 2, h // 2), random.uniform(-15, 15), 1.0)
    return cv2.warpAffine(frame, M, (w, h))

def random_brightness(frame):
    """Adjust brightness randomly."""
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV).astype(np.float64)
    hsv[:, :, 2] = np.clip(hsv[:, :, 2] * random.uniform(0.6, 1.4), 0, 255)
    return cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)

def random_contrast(frame):
    """Adjust contrast randomly."""
    return cv2.convertScaleAbs(frame, alpha=random.uniform(0.5, 1.5), beta=0)

def add_gaussian_noise(frame):
    """Add Gaussian noise to the frame."""
    return np.clip(frame + np.random.normal(0, random.uniform(10, 30), frame.shape), 0, 255).astype(np.uint8)

def gaussian_blur(frame):
    """Apply Gaussian blur with a random kernel size."""
    return cv2.GaussianBlur(frame, (random.choice([3, 5, 7]), random.choice([3, 5, 7])), 0)

# List of augmentation functions
augmentations = [horizontal_flip, random_rotation, random_brightness, random_contrast, add_gaussian_noise, gaussian_blur]

### Process Video Function

In [None]:
def process_video(video_path, output_path, augment_list=[]):
    """Reads the video, applies augmentations (if any), and writes the output video."""
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, cap.get(cv2.CAP_PROP_FPS),
                          (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret: break
        
        # Apply chosen augmentations
        for aug in augment_list:
            frame = aug(frame)
        
        out.write(frame)
    
    cap.release(), out.release()

### Apply Augmentations to Real Videos

In [None]:

augmentations_per_video = augmentations_needed // len(real_videos)
augmented_count = 0  

for video in tqdm(real_videos):
    video_path = video
    video_aug_count = 0 
    process_video(video_path, os.path.join(output_videos_path, os.path.basename(video)))

    while video_aug_count < augmentations_per_video:
        aug_choices = random.sample(augmentations, random.randint(1, 3))
        aug_video_path = os.path.join(output_videos_path, f"aug_{augmented_count}_{os.path.basename(video)}")
        process_video(video_path, aug_video_path, aug_choices)
        video_aug_count += 1
        augmented_count += 1

# Handle remaining augmentations if division isn't even
remaining = augmentations_needed - augmented_count
if remaining > 0:
    print(f"Distributing remaining {remaining} augmentations randomly")
    for i in range(remaining):
        video_path = random.choice(real_videos)  
        aug_choices = random.sample(augmentations, random.randint(1, 3))
        aug_video_path = os.path.join(output_videos_path, f"aug_{augmented_count}_{os.path.basename(video_path)}")
        process_video(video_path, aug_video_path, aug_choices)
        augmented_count += 1

print(f"Augmentation complete. Total augmented videos: {augmented_count}")
print(f"Original videos: {len(real_videos)}")
print(f"Total dataset size: {len(real_videos) + augmented_count}")