In [None]:
import os
import cv2
import subprocess
import shlex
from copy import deepcopy
from tqdm import tqdm

In [None]:
def get_frame_types(video_fn):
    command = f'ffprobe -v error -show_entries frame=pict_type -of default=noprint_wrappers=1 {video_fn}'
    args = shlex.split(command)
    out = subprocess.check_output(args).decode()
    frame_types = out.replace('pict_type=','').split()
    return zip(range(len(frame_types)), frame_types)

In [None]:
def get_specified_frame(video_path, save_dir, frame_type="I"):
    if not os.path.exists(save_dir):
        cwd = os.getcwd()
        os.mkdir(f"{cwd}/{save_dir}")
    video_name = os.path.basename(video_path)
    video_name, _ = os.path.splitext(video_name)

    frame_dir = os.path.join(save_dir, video_name)
    if not os.path.exists(frame_dir):
        cwd = os.getcwd()
        os.mkdir(f"{frame_dir}")
    
    frame_type_dir = os.path.join(frame_dir, frame_type)
    if not os.path.exists(frame_type_dir):
        cwd = os.getcwd()
        os.mkdir(f"{frame_type_dir}")
    else:
        print(f"Warning: {frame_type_dir} has existed!")
        return
    
    frame_types = get_frame_types(video_path)
    i_frames = [x[0] for x in frame_types if x[1]==frame_type]
    if i_frames:  
        cap = cv2.VideoCapture(video_path)
        for frame_no in i_frames:
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_no)
            ret, frame = cap.read()
            frame_path = f"{frame_dir}/{frame_type}/{frame_no}.jpg"
            cv2.imwrite(frame_path, frame)
        cap.release()
    else:
        print (f'No {frame_type}-frames in {video_name}')

In [None]:
def batch_extract_frames(video_dir, save_dir, frame_type="all"):
    """ extract certain types of frames from the videos

    Args:
        video_dir (_type_): the video file path
        save_dir (_type_): file path to save extracted frames
        frame_type (str, optional): the frame type you want to extract. Defaults to "all".
    """
    if frame_type == "all":
        # the frame type you want to extact, for this paper we just need I frame and P frame
        frame_types = ["I", "P"]
    else:
        frame_types = [deepcopy(frame_type)]
    for root, dirs, files in os.walk(video_dir):
        for file in tqdm(files):
            video_path = os.path.join(root, file)
            for frame_type in frame_types:
                get_specified_frame(video_path, save_dir, frame_type)

In [None]:
batch_extract_frames("./data/videos/", "./data/frames", 'all')
    print("finish")