In [1]:

import os
import cv2
import numpy as np
from tqdm import tqdm


In [6]:
# Define the input folder containing videos
video_folder = r"C:\Users\Admin\Desktop\temp"
output_folder = r"E:\cs229_project\train\video\preprocessed_fake"
os.makedirs(output_folder, exist_ok=True)

In [7]:
# Define ImageNet normalization parameters
imagenet_mean = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape(1, 3, 1, 1)
imagenet_std = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape(1, 3, 1, 1)

In [8]:
# Define frame count ranges
frame_ranges = list(range(100, 2000, 100)) 

In [9]:
# Iterate through all video files
for filename in tqdm(os.listdir(video_folder)):
    if filename.endswith((".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv")):
        base_name, ext = os.path.splitext(filename)
        output_path = os.path.join(output_folder, f"{base_name}.npy")
        if os.path.exists(output_path):
            continue

        video_path = os.path.join(video_folder, filename)
        cap = cv2.VideoCapture(video_path)

        if not cap.isOpened():
            print(f"Skipping {filename}: Could not open video")
            continue

        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        frames = []

        # Read frames
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB
            frame = cv2.resize(frame, (224, 224))  # Resize to 224x224
            frames.append(frame)

        cap.release()

        # Convert list to NumPy array
        frames = np.array(frames, dtype=np.float32) / 255.0  # Normalize to [0,1]
        
        # Determine the frame limit based on conditions
        if frame_count < 100:
            final_count = 100
            frames = np.concatenate([frames, np.tile(frames[-1:], (100 - frame_count, 1, 1, 1))], axis=0)
        else:
            for limit in frame_ranges:
                if limit <= frame_count < limit + 100:
                    final_count = limit
                    frames = frames[:final_count]
                    break
            else:
                print(f"Skipping {filename}: Frame count {frame_count} exceeds max range (1999)")
                continue

        # Reshape to (N, 3, 224, 224)
        frames = np.transpose(frames, (0, 3, 1, 2))  # (N, H, W, 3) -> (N, 3, H, W)

        # Apply ImageNet normalization
        frames = (frames - imagenet_mean) / imagenet_std

        # Generate output filename (avoid overwriting)
        base_name, ext = os.path.splitext(filename)
        output_path = os.path.join(output_folder, f"{base_name}.npy")
        count = 1
        while os.path.exists(output_path):
            output_path = os.path.join(output_folder, f"{base_name}_{count}.npy")
            count += 1

        # Save as NumPy array
        np.save(output_path, frames.astype(np.float16))

print("Processing complete.")

100%|██████████| 1/1 [00:00<00:00,  1.55it/s]

Processing complete.



