# Process raw video files

In [None]:
# | default_exp process_video

In [1]:
# | export
from pathlib import Path

import ffmpeg
import numpy as np
from moviepy import VideoFileClip, concatenate_videoclips
from tqdm import tqdm

from chirpminds.utils import parallel

## Read video files

In [3]:
raw_data_dir = Path("../../scratch/raw_data")
clips_dir = Path("../../scratch/clips")
clips_dir.mkdir(exist_ok=True)
frames_dir = Path("../../scratch/frames")
frames_dir.mkdir(exist_ok=True)
file_list_mts = [file for file in raw_data_dir.glob("*.MTS")]
file_list_mp4 = [file for file in raw_data_dir.glob("*.MP4")]

## View video

In [4]:
video_clip_1 = VideoFileClip(file_list_mts[0])
video_clip_2 = VideoFileClip(file_list_mts[1])
video_clip_3 = VideoFileClip(file_list_mts[2])

ffmpeg output:

Input #0, mpegts, from '../../scratch/raw_data/20231002_F12A.MTS':
  Duration: 00:17:13.22, start: 1.040000, bitrate: 16449 kb/s
  Program 1 
  Stream #0:0[0x1011]: Video: h264 (High) (HDMV / 0x564D4448), yuv420p(top first), 1920x1080 [SAR 1:1 DAR 16:9], 25 fps, 50 tbr, 90k tbn
  Stream #0:1[0x1100]: Audio: ac3 (AC-3 / 0x332D4341), 48000 Hz, stereo, fltp, 256 kb/s
  Stream #0:2[0x1200]: Subtitle: hdmv_pgs_subtitle (pgssub) ([144][0][0][0] / 0x0090), 1920x1080
At least one output file must be specified



{'video_found': True, 'audio_found': True, 'metadata': {}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High)'}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': False, 'fps': 48000, 'bitrate': 256}, {'input_number': 0, 'stream_number': 2, 'stream_type': 'subtitle', 'language': None, 'default': False}], 'input_number': 0}], 'duration': 1033.22, 'bitrate': 16449, 'start': 1.04, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'h264', 'video_profile': '(High)', 'video_size': [1920, 1080], 'video_bitrate': None, 'video_fps': 25.0, 'video_duration': 1033.22, 'video_n_frames': 25830, 'audio_bitrate': 256}
/home/ank/workspace/hub/leoank/chirpminds/main/.venv/lib/python3.11/site-packages/imageio_ffmpeg/binaries/ffmpeg-linux-x86_64-v7.0.2 -i ../../scr

ffmpeg output:

Input #0, mpegts, from '../../scratch/raw_data/20231004_F02A.MTS':
  Duration: 00:10:39.84, start: 1.020000, bitrate: 26580 kb/s
  Program 1 
  Stream #0:0[0x1011]: Video: h264 (High) (HDPR / 0x52504448), yuv420p(progressive), 1920x1080 [SAR 1:1 DAR 16:9], 50 fps, 50 tbr, 90k tbn
  Stream #0:1[0x1100]: Audio: ac3 (AC-3 / 0x332D4341), 48000 Hz, stereo, fltp, 256 kb/s
  Stream #0:2[0x1200]: Subtitle: hdmv_pgs_subtitle (pgssub) ([144][0][0][0] / 0x0090), 1920x1080
At least one output file must be specified

ffmpeg output:

Input #0, mpegts, from '../../scratch/raw_data/00000.MTS':
  Duration: 00:10:39.84, start: 1.020000, bitrate: 26580 kb/s
  Program 1 
  Stream #0:0[0x1011]: Video: h264 (High) (HDPR / 0x52504448), yuv420p(progressive), 1920x1080 [SAR 1:1 DAR 16:9], 50 fps, 50 tbr, 90k tbn
  Stream #0:1[0x1100]: Audio: ac3 (AC-3 / 0x332D4341), 48000 Hz, stereo, fltp, 256 kb/s
  Stream #0:2[0x1200]: Subtitle: hdmv_pgs_subtitle (pgssub) ([144][0][0][0] / 0x0090), 1920x1080


{'video_found': True, 'audio_found': True, 'metadata': {}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 50.0, 'codec_name': 'h264', 'profile': '(High)'}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': False, 'fps': 48000, 'bitrate': 256}, {'input_number': 0, 'stream_number': 2, 'stream_type': 'subtitle', 'language': None, 'default': False}], 'input_number': 0}], 'duration': 639.84, 'bitrate': 26580, 'start': 1.02, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'h264', 'video_profile': '(High)', 'video_size': [1920, 1080], 'video_bitrate': None, 'video_fps': 50.0, 'video_duration': 639.84, 'video_n_frames': 31992, 'audio_bitrate': 256}
/home/ank/workspace/hub/leoank/chirpminds/main/.venv/lib/python3.11/site-packages/imageio_ffmpeg/binaries/ffmpeg-linux-x86_64-v7.0.2 -i ../../scrat

In [None]:
video_clip_3.subclipped(600).display_in_notebook(
    width=500, maxduration=300, fps=10, rd_kwargs=dict(bitrate="50k")
)

## Make video clips

In [20]:
video_clip_1_clipped = video_clip_1.subclipped(800, 960)
video_clip_1_clipped.write_videofile(
    str(clips_dir.joinpath(file_list_mts[0].resolve().with_suffix(".mp4").name)),
    audio=False,
    write_logfile=True,
)
video_clip_1_clipped.close()

MoviePy - Building video ../../scratch/clips/20231002_F12A.mp4.
MoviePy - Writing video ../../scratch/clips/20231002_F12A.mp4



                                                                                                           

MoviePy - Done !
MoviePy - video ready ../../scratch/clips/20231002_F12A.mp4


In [25]:
video_clip_2_clipped = video_clip_2.subclipped(0, 200)
video_clip_2_clipped.write_videofile(
    str(clips_dir.joinpath(file_list_mts[1].resolve().with_suffix(".mp4").name)),
    audio=False,
    write_logfile=True,
)
video_clip_2_clipped.close()

Proc not detected
{'video_found': True, 'audio_found': True, 'metadata': {}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 50.0, 'codec_name': 'h264', 'profile': '(High)'}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': False, 'fps': 48000, 'bitrate': 256}, {'input_number': 0, 'stream_number': 2, 'stream_type': 'subtitle', 'language': None, 'default': False}], 'input_number': 0}], 'duration': 639.84, 'bitrate': 26580, 'start': 1.02, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'h264', 'video_profile': '(High)', 'video_size': [1920, 1080], 'video_bitrate': None, 'video_fps': 50.0, 'video_duration': 639.84, 'video_n_frames': 31992, 'audio_bitrate': 256}
/home/ank/workspace/hub/leoank/chirpminds/main/.venv/lib/python3.11/site-packages/imageio_ffmpeg/binaries/ffmpeg-linux-x86_64-v7.

                                                                                                           

MoviePy - Done !
MoviePy - video ready ../../scratch/clips/20231004_F02A.mp4


In [30]:
video_clip_3_concat = video_clip_3.subclipped(120, 190)
video_clip_3_concat.write_videofile(
    str(clips_dir.joinpath(file_list_mts[2].resolve().with_suffix(".mp4").name)),
    audio=False,
    write_logfile=True,
)
video_clip_3_concat.close()

{'video_found': True, 'audio_found': True, 'metadata': {}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 50.0, 'codec_name': 'h264', 'profile': '(High)'}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': False, 'fps': 48000, 'bitrate': 256}, {'input_number': 0, 'stream_number': 2, 'stream_type': 'subtitle', 'language': None, 'default': False}], 'input_number': 0}], 'duration': 639.84, 'bitrate': 26580, 'start': 1.02, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'h264', 'video_profile': '(High)', 'video_size': [1920, 1080], 'video_bitrate': None, 'video_fps': 50.0, 'video_duration': 639.84, 'video_n_frames': 31992, 'audio_bitrate': 256}
/home/ank/workspace/hub/leoank/chirpminds/main/.venv/lib/python3.11/site-packages/imageio_ffmpeg/binaries/ffmpeg-linux-x86_64-v7.0.2 -ss 119.000000

                                                                                                           

MoviePy - Done !
MoviePy - video ready ../../scratch/clips/00000.mp4


In [10]:
# | export
def get_video_info(video_path: Path) -> dict:
    probe = ffmpeg.probe(video_path.resolve().__str__())
    return next(s for s in probe["streams"] if s["codec_type"] == "video")

 ## Process video files

In [11]:
# | export
def extract_frame(
    start_time_list: list[str],
    file_path: Path,
    out_dir: Path,
    quiet: bool = False,
    job_idx: int = 0,
) -> None:
    for start_time in start_time_list:
        ffmpeg.input(str(file_path.resolve()), ss=start_time).output(
            str(out_dir.resolve() / f"{file_path.stem}_{start_time}.jpg"), vframes=1
        ).run(overwrite_output=True, quiet=quiet)

In [40]:
# | export


def extract_frames(
    video_path_list: list[Path],
    num_frames: int,
    out_dir: Path,
    quiet: bool = False,
    job_idx: int = 0,
) -> None:
    for video in tqdm(video_path_list, position=job_idx):
        print(f"Processing {video}")
        video_info = get_video_info(video)
        sampled_start_times = np.linspace(
            0, round(float(video_info["duration"])), num_frames + 1, dtype=np.int32
        )
        parallel(
            sampled_start_times[:-1].tolist(), extract_frame, [video, out_dir, quiet]
        )

## Process clips

In [36]:
clips = [file for file in clips_dir.glob("*.mp4")]
clips

[PosixPath('../../scratch/clips/00000.mp4'),
 PosixPath('../../scratch/clips/20231004_F02A.mp4'),
 PosixPath('../../scratch/clips/20231002_F12A.mp4')]

In [37]:
get_video_info(clips[2])

{'index': 0,
 'codec_name': 'h264',
 'codec_long_name': 'H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10',
 'profile': 'High',
 'codec_type': 'video',
 'codec_tag_string': 'avc1',
 'codec_tag': '0x31637661',
 'width': 1920,
 'height': 1080,
 'coded_width': 1920,
 'coded_height': 1080,
 'closed_captions': 0,
 'film_grain': 0,
 'has_b_frames': 2,
 'pix_fmt': 'yuv420p',
 'level': 40,
 'chroma_location': 'left',
 'field_order': 'progressive',
 'refs': 1,
 'is_avc': 'true',
 'nal_length_size': '4',
 'id': '0x1',
 'r_frame_rate': '25/1',
 'avg_frame_rate': '25/1',
 'time_base': '1/12800',
 'start_pts': 0,
 'start_time': '0.000000',
 'duration_ts': 2048000,
 'duration': '160.000000',
 'bit_rate': '2704228',
 'bits_per_raw_sample': '8',
 'nb_frames': '4000',
 'extradata_size': 47,
 'disposition': {'default': 1,
  'dub': 0,
  'original': 0,
  'comment': 0,
  'lyrics': 0,
  'karaoke': 0,
  'forced': 0,
  'hearing_impaired': 0,
  'visual_impaired': 0,
  'clean_effects': 0,
  'attached_pic': 0,
  'timed

In [39]:
# Change the number of jobs to 2.
# More number of threads tries to write to the same file and crash
extract_frames(clips, 133, frames_dir, True)

  0%|                                                                                | 0/3 [00:00<?, ?it/s]

Processing ../../scratch/clips/00000.mp4


 33%|████████████████████████                                                | 1/3 [00:11<00:23, 11.56s/it]

Processing ../../scratch/clips/20231004_F02A.mp4


 67%|████████████████████████████████████████████████                        | 2/3 [00:23<00:11, 11.77s/it]

Processing ../../scratch/clips/20231002_F12A.mp4


100%|████████████████████████████████████████████████████████████████████████| 3/3 [00:35<00:00, 11.68s/it]


## Process full videos

In [41]:
extract_frames(file_list_mp4, 80, frames_dir, True)

  0%|                                                                                | 0/5 [00:00<?, ?it/s]

Processing ../../scratch/raw_data/20230114_F02A.MP4


 20%|██████████████▍                                                         | 1/5 [00:01<00:05,  1.45s/it]

Processing ../../scratch/raw_data/MVI0012.MP4


 40%|████████████████████████████▊                                           | 2/5 [00:02<00:03,  1.13s/it]

Processing ../../scratch/raw_data/20230215_F04A.MP4


 60%|███████████████████████████████████████████▏                            | 3/5 [00:03<00:02,  1.02s/it]

Processing ../../scratch/raw_data/202301211_F11A.MP4


 80%|█████████████████████████████████████████████████████████▌              | 4/5 [00:04<00:00,  1.02it/s]

Processing ../../scratch/raw_data/20230203_F16A.MP4


100%|████████████████████████████████████████████████████████████████████████| 5/5 [00:05<00:00,  1.01s/it]


## View sampled frames

In [42]:
# | hide
import nbdev  # noqa

nbdev.nbdev_export()