In [None]:
import cv2
import os
import sys
import glob
import re
from dotenv import load_dotenv
from utils import setup_logging
import logging

def save_frames(video_path, save_dir):
    # Open the video file
    video_capture = cv2.VideoCapture(video_path)
    if not video_capture.isOpened():
        logging.error(f"Could not open video {video_path}")
        return

    # Get frame rate of the video
    frame_rate = video_capture.get(cv2.CAP_PROP_FPS)
    frames_to_skip = int(round(frame_rate))

    frame_count = 0
    saved_frame_count = 0

    while True:
        # Read the next frame from the video
        ret, frame = video_capture.read()

        if not ret:
            break  # Break the loop if we've reached the end of the video

        if frame_count % frames_to_skip == 0:
            output_frame_path = os.path.join(save_dir, f"frame_{saved_frame_count:04d}.jpg")
            cv2.imwrite(output_frame_path, frame, [int(cv2.IMWRITE_JPEG_QUALITY), 100])  # Adjust the quality as needed
            saved_frame_count += 1

        frame_count += 1

    # Release the video capture object
    video_capture.release()
    logging.info(f"Saved {saved_frame_count} frames from {video_path} to {save_dir}")

def main(stimuli_dir, output_dir, season_id):
    episode_dir = os.path.join(stimuli_dir, season_id)
    episodes = glob.glob(os.path.join(episode_dir, "*.mkv"))
    episodes.sort()

    # Regex pattern to match files ending with 'a.mkv' or 'b.mkv' or 'c.mkv' or 'd.mkv'
    pattern = re.compile(r'.*([abcd])\.mkv$')

    # Filter episodes using the regex pattern
    filtered_episodes = [ep for ep in episodes if pattern.match(ep)]
    print(filtered_episodes)

    for episode in filtered_episodes:
        video_basename = os.path.basename(episode)
        folder_name = os.path.splitext(video_basename)[0]
        save_dir = os.path.join(output_dir, "frames", folder_name)
        os.makedirs(save_dir, exist_ok=True)
        save_frames(episode, save_dir)

if __name__ == "__main__":
    if len(sys.argv) != 2:
        logging.error("Usage: python save_frames.py <season_id>")
        sys.exit(1)

    season_id = sys.argv[1]
    print(f"season_id: {season_id}")

    load_dotenv()
    base_dir = os.getenv("BASE_DIR")
    scratch_dir = os.getenv("SCRATCH_DIR")
    data_dir = os.path.join(scratch_dir, "data")
    stimuli_dir = os.path.join(scratch_dir, "data", "stimuli")
    output_dir = os.path.join(scratch_dir, "output")

    log_file = setup_logging(base_dir=base_dir, task="save_frames", task_id=season_id)
    logging.info(f"saving frames started for season {season_id}")

    main(stimuli_dir, output_dir, season_id)

In [None]:
# (pliers) -bash-4.2$ ffprobe -v error -select_streams v:0 -show_entries stream=r_frame_rate DM.mp4
# [STREAM]
# r_frame_rate=30000/1001
# [/STREAM]
# (pliers) -bash-4.2$ ffprobe -v error -select_streams v:0 -show_entries stream=r_frame_rate TP.mp4
# [STREAM]
# r_frame_rate=24/1