# Process raw video files

In [1]:
# | default_exp process_video

In [2]:
# | export
from pathlib import Path

import ffmpeg
import numpy as np
from tqdm import tqdm

from chirpminds.utils import parallel

## Read video files

In [13]:
raw_data_dir = Path("../../scratch/raw_data")
frames_dir = Path("../../scratch/frames")
frames_dir.mkdir(exist_ok=True)
file_list_mts = [file for file in raw_data_dir.glob("*.MTS")]
file_list_mp4 = [file for file in raw_data_dir.glob("*.MP4")]

In [16]:
[file_list_mts[-1]] + file_list_mp4 

[PosixPath('../../scratch/raw_data/00000.MTS'),
 PosixPath('../../scratch/raw_data/20230114_F02A.MP4'),
 PosixPath('../../scratch/raw_data/MVI0012.MP4'),
 PosixPath('../../scratch/raw_data/20230215_F04A.MP4'),
 PosixPath('../../scratch/raw_data/202301211_F11A.MP4'),
 PosixPath('../../scratch/raw_data/20230203_F16A.MP4')]

In [5]:
# | export
def get_video_info(video_path: Path) -> dict:
    probe = ffmpeg.probe(video_path.resolve().__str__())
    return next(s for s in probe["streams"] if s["codec_type"] == "video")

In [6]:
get_video_info(file_list[1])

{'index': 0,
 'codec_name': 'h264',
 'codec_long_name': 'H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10',
 'profile': 'High',
 'codec_type': 'video',
 'codec_tag_string': 'avc1',
 'codec_tag': '0x31637661',
 'width': 1920,
 'height': 1080,
 'coded_width': 1920,
 'coded_height': 1080,
 'closed_captions': 0,
 'film_grain': 0,
 'has_b_frames': 1,
 'pix_fmt': 'yuv420p',
 'level': 40,
 'color_range': 'tv',
 'color_space': 'bt709',
 'color_transfer': 'bt709',
 'color_primaries': 'bt709',
 'chroma_location': 'left',
 'field_order': 'progressive',
 'refs': 1,
 'is_avc': 'true',
 'nal_length_size': '4',
 'id': '0x1',
 'r_frame_rate': '30000/1001',
 'avg_frame_rate': '30000/1001',
 'time_base': '1/30000',
 'start_pts': 0,
 'start_time': '0.000000',
 'duration_ts': 61951890,
 'duration': '2065.063000',
 'bit_rate': '16188349',
 'bits_per_raw_sample': '8',
 'nb_frames': '61890',
 'extradata_size': 67,
 'disposition': {'default': 1,
  'dub': 0,
  'original': 0,
  'comment': 0,
  'lyrics': 0,
  'karaoke'

 ## Process video files

In [17]:
# | export
def extract_frame(
    start_time_list: list[str], file_path: Path, out_dir: Path, job_idx: int = 0
) -> None:
    for start_time in start_time_list:
        ffmpeg.input(str(file_path.resolve()), ss=start_time).output(
            str(out_dir.resolve() / f"{file_path.stem}_{start_time}.jpg"), vframes=1
        ).run()

In [18]:
# | export


def extract_frames(
    video_path_list: list[Path], num_frames: int, out_dir: Path, job_idx: int = 0
) -> None:
    for video in tqdm(video_path_list, position=job_idx):
        print(f"Processing {video}")
        video_info = get_video_info(video)
        sampled_start_times = np.linspace(
            0, round(float(video_info["duration"])), num_frames, dtype=np.int32
        )
        parallel(sampled_start_times.tolist(), extract_frame, [video, out_dir])

In [None]:
extract_frames(file_list_mts[:-1], 160, frames_dir)

In [None]:
extract_frames([file_list_mts[-1]] + file_list_mp4 , 80, frames_dir)

## View sampled frames

In [21]:
# | hide
import nbdev  # noqa

nbdev.nbdev_export()