In [6]:
import cv2
import math
from pathlib import Path


In [10]:
import sys
import os
import importlib, config
importlib.reload(config)  

# Add the base directory to sys.path
base_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(base_dir)

# Now you can import Config
from config import Config


In [4]:

# Folder containing your videos
folder_path = Config.RAW_DATA_DIR

# List all files in the folder
files = sorted([f for f in os.listdir(folder_path) if f.endswith(".mp4")])

# Rename files with new pattern: VID_<idx>_<timestamp>.mp4
for idx, file_name in enumerate(files, start=1):
    # Extract the timestamp part (everything after first underscore)
    parts = file_name.split("_", 1)  # split only once
    if len(parts) > 1:
        timestamp_part = parts[1]  # e.g., 'VID_20250716_142817322.mp4'
        new_name = f"VID_{idx}_{timestamp_part}"
        old_path = os.path.join(folder_path, file_name)
        new_path = os.path.join(folder_path, new_name)
        os.rename(old_path, new_path)
        print(f"Renamed: {file_name} -> {new_name}")


Renamed: VID_20250716_142817322.mp4 -> VID_1_20250716_142817322.mp4
Renamed: VID_20250716_142907288.mp4 -> VID_2_20250716_142907288.mp4
Renamed: VID_20250716_144442263.mp4 -> VID_3_20250716_144442263.mp4
Renamed: VID_20250716_144849394.mp4 -> VID_4_20250716_144849394.mp4
Renamed: VID_20250716_145353358.mp4 -> VID_5_20250716_145353358.mp4
Renamed: VID_20250716_145603216.mp4 -> VID_6_20250716_145603216.mp4
Renamed: VID_20250716_150443286.mp4 -> VID_7_20250716_150443286.mp4
Renamed: VID_20250716_150648286.mp4 -> VID_8_20250716_150648286.mp4
Renamed: VID_20250716_151607511.mp4 -> VID_9_20250716_151607511.mp4
Renamed: VID_20250716_152405697.mp4 -> VID_10_20250716_152405697.mp4
Renamed: VID_20250716_154434474.mp4 -> VID_11_20250716_154434474.mp4
Renamed: VID_20250716_155450434.mp4 -> VID_12_20250716_155450434.mp4
Renamed: VID_20250716_155636780.mp4 -> VID_13_20250716_155636780.mp4
Renamed: VID_20250716_155720057.mp4 -> VID_14_20250716_155720057.mp4
Renamed: VID_20250716_161123163.mp4 -> VID_

In [5]:
# Config
input_dir = Config.RAW_DATA_DIR      
output_dir = Config.CLEAN_VID_DIR  
output_dir.mkdir(parents=True, exist_ok=True)

# Process each video
for video_path in sorted(input_dir.glob("*")):
    if video_path.suffix.lower() not in Config.VIDEO_EXTS:
        continue

    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        print(f"Cannot open {video_path.name}")
        continue

    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*Config.TARGET_CODEC)

    # Determine new resolution
    if width < Config.MIN_WIDTH:
        scale = Config.MIN_WIDTH / width
        new_w = Config.MIN_WIDTH
        new_h = int(height * scale)
        print(f"Upscaling {video_path.name} from {width}x{height} → {new_w}x{new_h}")
    else:
        new_w, new_h = width, height

    # Prepare output video path
    out_path = output_dir / video_path.name
    
    # Create VideoWriter
    out = cv2.VideoWriter(str(out_path), fourcc, Config.TARGET_FPS or fps, (new_w, new_h))

    # Process frames
    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if (new_w, new_h) != (width, height):
            frame = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
        out.write(frame)
        frame_count += 1

    cap.release()
    out.release()
    print(f"Cleaned {video_path.name} → {out_path.name} ({frame_count} frames)")


Cleaned VID_10_20250716_152405697.mp4 → VID_10_20250716_152405697.mp4 (3558 frames)
Cleaned VID_11_20250716_154434474.mp4 → VID_11_20250716_154434474.mp4 (671 frames)
Cleaned VID_12_20250716_155450434.mp4 → VID_12_20250716_155450434.mp4 (1913 frames)
Cleaned VID_13_20250716_155636780.mp4 → VID_13_20250716_155636780.mp4 (978 frames)
Cleaned VID_14_20250716_155720057.mp4 → VID_14_20250716_155720057.mp4 (9314 frames)
Cleaned VID_15_20250716_161123163.mp4 → VID_15_20250716_161123163.mp4 (6674 frames)
Cleaned VID_16_20250716_161717254.mp4 → VID_16_20250716_161717254.mp4 (2264 frames)
Cleaned VID_17_20250716_162210413.mp4 → VID_17_20250716_162210413.mp4 (15748 frames)
Cleaned VID_18_20250716_163812575.mp4 → VID_18_20250716_163812575.mp4 (618 frames)
Cleaned VID_19_20250716_164036159.mp4 → VID_19_20250716_164036159.mp4 (1491 frames)
Cleaned VID_1_20250716_142817322.mp4 → VID_1_20250716_142817322.mp4 (1038 frames)
Cleaned VID_20_20250716_164149897.mp4 → VID_20_20250716_164149897.mp4 (6768 fram

In [None]:
def extract_frames_3fps(video_path: Path, out_root: Path, frame_ext=".png", jpeg_quality=95):
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        print(f"[ERR ] Cannot open {video_path.name}")
        return

    fps = cap.get(cv2.CAP_PROP_FPS)
    if not fps or fps <= 0 or math.isnan(fps):
        fps = 25  
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration_s = frame_count / fps if fps else 0

    # output folder per video
    video_folder = out_root / video_path.stem
    video_folder.mkdir(parents=True, exist_ok=True)

    current_sec = 0
    sec_frames = []  
    saved = 0
    frame_idx = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            if sec_frames:
                saved += _save_first_mid_last(sec_frames, current_sec, video_folder, frame_ext, jpeg_quality)
            break

        sec = int(frame_idx / fps)

        if sec != current_sec:
            if sec_frames:
                saved += _save_first_mid_last(sec_frames, current_sec, video_folder, frame_ext, jpeg_quality)
            sec_frames = []
            current_sec = sec

        sec_frames.append(frame)
        frame_idx += 1

    cap.release()
    print(f"[OK] {video_path.name}: saved {saved} frames ({duration_s:.1f}s).")


def _save_first_mid_last(frames, sec_idx, folder: Path, frame_ext, jpeg_quality):
    n = len(frames)
    if n == 0:
        return 0
    first = frames[0]
    mid   = frames[n // 2]
    last  = frames[-1]

    base = f"fps_{sec_idx:05d}"
    items = [
        (f"{base}_first{frame_ext}", first),
        (f"{base}_mid{frame_ext}",   mid),
        (f"{base}_last{frame_ext}",  last),
    ]
    count = 0
    for name, img in items:
        out_path = folder / name
        if frame_ext.lower() in (".jpg", ".jpeg"):
            cv2.imwrite(str(out_path), img, [cv2.IMWRITE_JPEG_QUALITY, jpeg_quality])
        else:
            cv2.imwrite(str(out_path), img)
        count += 1
    return count


# batch over all videos 
for video_file in sorted(Config.CLEAN_VID_DIR.glob("*")):
    if video_file.suffix.lower() not in Config.VIDEO_EXTS:
        continue
    extract_frames_3fps(
        video_file,
        Config.FRAMES_DIR,
        frame_ext=Config.FRAME_EXT,
        jpeg_quality=Config.JPEG_QUALITY,
    )

print("[DONE] Extracted 3 frames per second for all videos.")


[OK] VID_10_20250716_152405697.mp4: saved 393 frames (130.8s).
[OK] VID_11_20250716_154434474.mp4: saved 69 frames (22.7s).
