In [None]:
import os
import cv2
import pandas as pd
import glob
from concurrent.futures import ThreadPoolExecutor, as_completed
from cfg import DOWNLOAD_DIR, SCENE_DIR, KEYFRAME_DIR
from tqdm import tqdm

In [23]:
# Paths
os.makedirs(KEYFRAME_DIR, exist_ok=True)

In [24]:
# Recursively find all videos
videos = glob.glob(os.path.join(DOWNLOAD_DIR, '**', '*.mp4'))
print(f"[INFO] Found {len(videos)} videos to process.")

[INFO] Found 169 videos to process.


In [None]:
def process_video(video_path):
    base_name = os.path.splitext(os.path.basename(video_path))[0]
    batchh = base_name.split("_")[0]
    scene_path = os.path.join(SCENE_DIR, f"{batchh}/{base_name}.csv")
    scene_list = pd.read_csv(scene_path)
    
    os.makedirs(os.path.join(KEYFRAME_DIR, batchh, base_name), exist_ok=True)
    
    # check if existed frame number equal to scene_list length
    if len(glob.glob(os.path.join(KEYFRAME_DIR, batchh, base_name, f"{base_name}_*.jpg"))) == len(scene_list):
        print(f"[SKIP] Keyframes for {video_path} extracted fully.")
        return
    
    # Extract keyframes
    cap = cv2.VideoCapture(video_path)
    for i, row in scene_list.iterrows():
        median_frame = int(row['median_frame'])
        
        cap.set(cv2.CAP_PROP_POS_FRAMES, median_frame)
        ret, frame = cap.read()
        if ret:
            keyframe_path = os.path.join(KEYFRAME_DIR, f"{batchh}/{base_name}/{base_name}_{median_frame}.jpg")
            cv2.imwrite(keyframe_path, frame)
        else:
            print(f"[WARNING] Could not read frame {median_frame} in {base_name}")

    cap.release()
    print(f"[DONE] {video_path} processed.")


# Process in parallel
max_workers = 4
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = [executor.submit(process_video, v) for v in videos]
    for _ in tqdm(as_completed(futures), total=len(futures), desc="Processing videos"):
        _.result()

[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V005.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V003.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V002.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V001.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V007.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V006.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V009.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V008.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V010.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V011.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V012.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V013.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V014.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V015.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V016.mp4 processed.
[DONE] /mn

KeyboardInterrupt: 

[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V019.mp4 processed.
[DONE] /mnt/d/AI Challenge/Data/video/L21/L21_V021.mp4 processed.
