In [22]:
import abc
import collections.abc
from enum import Enum
from functools import partial
import logging
import os
from pathlib import Path
import random
import subprocess
import sys
from typing import Generator, List, Optional, Tuple, Type

from pydantic import BaseModel, FilePath, validator

from decord import VideoReader, cpu
from decord._ffi.ndarray import DECORDContext

from ffmpeg_tqdm_wrapper import analyze_stream, ffmpeg_to_tqdm

import numpy as np

from tqdm import tqdm

import PIL.Image

In [2]:
root = logging.getLogger()
root.setLevel(logging.INFO)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
root.addHandler(handler)

In [3]:
random.seed("16-896-375")  # my student number

In [4]:
def get_filename_without_extension(path: Path) -> str:
    filename = os.path.basename(path)
    return "".join(filename.split('.')[:-1])


In [5]:
class FrameSelectionStrategyEnum(str, Enum):
    naive = 'naive'
    segment_random = 'segment_random'


class VideoProcessParameters(BaseModel):
    input: FilePath
    output_folder: Path
    frames_skip: int

    @classmethod
    @validator('output_folder')
    def output_folder_must_exist_and_be_a_folder(cls, v: Path) -> Path:
        assert v.exists() and os.path.isdir(v)
        return v

    @property
    def file_wo_ext(self) -> str:
        return get_filename_without_extension(self.input)

    def get_output_path_with_filename(self, filename: str) -> Path:
        return self.output_folder / filename

In [6]:
# This cell is tagged `parameters` to accept arguments from papermill cli
input: Optional[str] = '../SZTR/video/SZTRA201a08.mov'  # input video file
i: Optional[str] = None  # input video file, can be used instead of parameter input (input has precedence)
output: Optional[str] = '../SZTR/video/'  # output folder
o: Optional[str] = None  # output folder, can be used instead of parameter output (output has precedence)
frames_skip: int = 12  # as the videos are 25 fps (get 2 frames per second by default)

_extract_frames_2_images: bool = False
_demo: bool = False

In [7]:
# Compile parameters with pydantic validation
parameters = VideoProcessParameters(input=input or i, output_folder=output or o, frames_skip=frames_skip)
parameters

VideoProcessParameters(input=PosixPath('../SZTR/video/SZTRA201a08.mov'), output_folder=PosixPath('../SZTR/video'), frames_skip=12)

In [8]:
# Adapted from CLIP4Clip: https://github.com/ArrowLuo/CLIP4Clip/blob/master/preprocess/compress_video.py
# but don't change the frame rate as be which to have different frame selection strategies
# and use ffmpeg_to_tqdm helper
def scale_compress_video(input_video_path: Path, output_video_path: Path) -> int:
    logger = logging.getLogger(f'ffmpeg-{str(output_video_path)}')
    logger.debug("Using ffmpeg to scale and compress an original video.")

    stream_info = analyze_stream(logger, str(input_video_path))

    command = ['ffmpeg',
                '-y',  # (optional) overwrite output file if it exists
                '-i', str(input_video_path),
                '-filter:v',
                "scale='if(gt(a,1),trunc(oh*a/2)*2,224)':'if(gt(a,1),224,trunc(ow*a/2)*2)'",  # scale to 224
                '-map', '0:v',
                '-vcodec', 'libx264',    # additionally, change the codec for better
                                        # compression
                                        # display codecs with: `ffmpeg -codecs`
            #    '-r', '3',  # frames per second
                str(output_video_path),
                ]
    # to avoid carriage return ('\r') in ffmpeg output, to mess with the reading of the
    # progress, use 'universal_newlines' argument
    # https://github.com/chriskiehl/Gooey/issues/495#issuecomment-614991802
    ffmpeg_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)

    return ffmpeg_to_tqdm(logger, ffmpeg_process, duration=stream_info.get('duration'), tqdm_desc="FFMPEG scale down to 224 and encode libx264").returncode

In [9]:
scaled_down_video_path = parameters.get_output_path_with_filename(f"{parameters.file_wo_ext}_224.mp4")
scale_compress_video(parameters.input, scaled_down_video_path)

FFMPEG scale down to 224 and encode libx264: 268.0segment [00:09, 29.56segment/s]                      


In [10]:
class FrameSelectionStrategy(collections.abc.Iterator):

    total_images_files_size: int  # index in the sequence of frames to select

    def __init__(self, interval: int, frames_count: int):
        self.interval = interval
        self.frames_count = frames_count

    @property
    @abc.abstractmethod
    def frames_i_sequence(self) -> List[int]:
        """Returns the indexes to extract from the sequence between [0; frames_count] inclusive"""
        raise NotImplemented

    @property
    @abc.abstractmethod
    def size(self) -> int:
        """Returns the number of frames to extract (size of frame_i_sequence)"""
        raise NotImplemented

    def __iter__(self) -> "FrameSelectionStrategy":
        self.i = 0
        return self

    def __next__(self) -> int:
        if self.i >= self.size:
            raise StopIteration

        i = self.i
        self.i += 1
        return self.frames_i_sequence[i]

    def __len__(self) -> int:
        return self.size


class FrameExtractor(collections.abc.Iterator):
    """Example:

    ```python
    >>> for frame, idx in FrameExtractor(Path("../SZTR/video/SZTRA201a08_224.mp4"), RandomSequenceFrameSelectionStrategy, 12):
    >>>     print(PIL.Image.fromarray(frame))
    ```
    """

    strategy: FrameSelectionStrategy = None

    def __init__(self, video_path: Path, strategy_type: Type[FrameSelectionStrategy], interval: int, video_reader_context: DECORDContext = cpu(0)):
        self.vr = VideoReader(str(video_path), video_reader_context)
        self.strategy = strategy_type(interval, len(self.vr))

    @property
    def frames_to_extract(self) -> int:
        if not hasattr(self, "strategy_iterator"):
            iter(self)
        return len(self.strategy_iterator)

    @property
    def total_frames(self) -> int:
        return len(self.vr)

    def __iter__(self) -> "FrameExtractor":
        self.strategy_iterator = iter(self.strategy)
        return self

    def __next__(self) -> Tuple[np.ndarray, int]:
        # delegate StopIteration to the strategy
        next_frame_i = next(self.strategy_iterator)
        vr_frame_array = self.vr[next_frame_i]
        return vr_frame_array.asnumpy(), next_frame_i


class TrivialFrameSelectionStrategy(FrameSelectionStrategy):
    """Select a frame every N frames (N := interval) and will always include the last frame
    (frames_count - 1)"""

    def __init__(self, interval: int, frames_count: int):
        super().__init__(interval, frames_count)
        self._frame_i_sequence = list(range(0, frames_count, interval)) + (
            [] if (frames_count - 1) % interval == 0 else [frames_count - 1]
        )

    @property
    def frames_i_sequence(self) -> List[int]:
        return self._frame_i_sequence

    @property
    def size(self) -> int:
        return len(self._frame_i_sequence)


class RandomSequenceFrameSelectionStrategy(TrivialFrameSelectionStrategy):
    """In every sequence of N frames, select randomly a frame.
    Example:
       [0     [12    [24    [36    [48    [60  ...
        +------+------+------+------+------+----
        |   7  |  22  |  29  |  38  |  53  |
        +------+------+------+------+------+----

    >>> [random.randrange(total_images_files_size, total_images_files_size + 12) for i in range(0, 60, 12)]
    """

    def __init__(self, interval: int, frames_count: int):
        super().__init__(interval, frames_count)
        self._frame_i_sequence = [random.randint(start, end) for start, end in zip(super().frames_i_sequence[:-1], super().frames_i_sequence[1:])]

    @property
    def frames_i_sequence(self) -> List[int]:
        return self._frame_i_sequence

    @property
    def size(self) -> int:
        return len(self._frame_i_sequence)


class UnionFrameSelectionStrategy(FrameSelectionStrategy):

    def __init__(self, strategy_types: List[Type[FrameSelectionStrategy]], interval: int, frames_count: int):
        super().__init__(interval, frames_count)
        self.strategies = [strategy(interval, frames_count) for strategy in strategy_types]
        # sorted(set(TrivialFrameSelectionStrategy(12, 3373)) | set(RandomSequenceFrameSelectionStrategy(12, 3373)))
        self._frame_i_sequence = sorted({frame for strategy in self.strategies for frame in strategy})

    @property
    def frames_i_sequence(self) -> List[int]:
        return self._frame_i_sequence

    @property
    def size(self) -> int:
        return len(self._frame_i_sequence)

    def get_selection_strategy_by_frame(self) -> Generator[Tuple[str, int], None, None]:
        for strategy_name, frames_i_sequence in [(type(strategy).__name__, strategy.frames_i_sequence) for strategy in self.strategies]:
            for frame_i in frames_i_sequence:
                yield strategy_name, frame_i


In [11]:
if _demo:
    frames = list(TrivialFrameSelectionStrategy(parameters.frames_skip, 3373))
    frames[-10:], len(frames)

In [12]:
if _demo:
    list(RandomSequenceFrameSelectionStrategy(parameters.frames_skip, 3373))

In [13]:
if _demo:
    assert len(TrivialFrameSelectionStrategy(parameters.frames_skip, 3373)) == len(RandomSequenceFrameSelectionStrategy(parameters.frames_skip, 3373)) + 1

In [14]:
UnionFrameSelectionStrategyCtor = partial(UnionFrameSelectionStrategy, [TrivialFrameSelectionStrategy, RandomSequenceFrameSelectionStrategy])
frame_selection = FrameExtractor(scaled_down_video_path, UnionFrameSelectionStrategyCtor, parameters.frames_skip)

In [15]:
print(f"Number of frames in sequences: {len(frame_selection.vr)}")

Number of frames in sequences: 3373


In [16]:
if _demo:
    PIL.Image.fromarray(frame_selection.vr[0].asnumpy())  # first frame

In [17]:
if _demo:
    PIL.Image.fromarray(frame_selection.vr[12*25].asnumpy())  # frame with a person rolling on the floor

In [18]:
def extract_frames_and_save_by_unit(frame_selection: FrameExtractor, output_folder: Path, filename_prefix: str) -> Generator[Path, None, None]:
    digits = len(str(frame_selection.total_frames))

    for img, idx in tqdm(frame_selection, total=frame_selection.frames_to_extract, desc="Extracting frames and saving images"):
        img_output_path = output_folder / f"{filename_prefix}_{idx:0{digits}}.png"
        PIL.Image.fromarray(img).save(img_output_path)
        yield img_output_path

if _extract_frames_2_images:
    produced_image_paths = list(extract_frames_and_save_by_unit(frame_selection, parameters.output_folder, f"{parameters.file_wo_ext}_224"))
    print(list(frame_selection.strategy.get_selection_strategy_by_frame()))
else:
    produced_image_paths = []

# Compare File System Space Usage
Quickly checking into the fs, we can see that the series of images use more disk space than the video, due to the compression of the video encoder most likely.

In [19]:
if _extract_frames_2_images:
    images_file_sizes = [(path, os.path.getsize(path)) for path in produced_image_paths]
    video_size = os.path.getsize(parameters.output_video_path)

    print("# First 10 extracted images sizes in bytes")
    print(images_file_sizes[:10])
    print("...")

    print()
    print(f"# Video size: {video_size} bytes")

    total_images_files_size = sum([size for _path, size in images_file_sizes])
    print(total_images_files_size, "vs", video_size, f"({video_size - total_images_files_size} bytes)")

# Extract the frames but keep them in a video file using FFMPEG

In [20]:
# Using Stackoverflow answer, we can select multiple frames with the '+' "operation" inside the select
def get_ffmpeg_select_argument(frames_sequence: List[int], setpts_filter: float) -> str:
    """Produce a string to be used with the FFMPEG command and its "select" argument.

    :param frames_sequence: linear or not list of frames to extract.
                            Example: 0, 12, 24, 36, 48, 60, ...
                            or:      6, 23, 27, 42, 50, 68, ...
    :param setpts_filter: from FFMPEG manual page:
                            "setpts" filter, which only sets timestamps and otherwise passes the frames unchanged
                          It is expected to be smaller than 1 in can less frames are selected and it is wished to
                          speed up the video.
                          Usually, it is expected to get 1/FRAMES_SKIP
                          Example: *0.25* (for a FRAMES_SKIP of 4)
    :return: string to be used to run the FFMPEG command for the "select" argument like: ffmpeg -i SZTRA201a08_224.mp4 -vf select='eq(n\,0)+eq(n\,12)+eq(n\,24)+eq(n\,36)+eq(n\,48)+eq(n\,60), setpts=0.08333333333*PTS' -an SZTRA201a08_224_12_random.mp4
    """
    # eq(n\\,0)+eq(n\\,12)+eq(n\\,24)+eq(n\\,36)+eq(n\\,48)
    eq_chain = "+".join([f"eq(n\,{n})" for n in frames_sequence])
    setpts = f"setpts={setpts_filter}*PTS"

    return f"'{eq_chain}',{setpts}"


if _demo:
    get_ffmpeg_select_argument(
        list(TrivialFrameSelectionStrategy(parameters.frames_skip, frames_count=frame_selection.total_frames)),
        1 / parameters.frames_skip
    )

In [21]:
def extract_frames_in_video(frames_sequence: List[int], setpts_filter: float, input_video_path: Path, output_video_path: Path) -> int:
    logger = logging.getLogger(f'ffmpeg-{str(output_video_path)}')
    logger.setLevel(logging.INFO)
    logger.debug("Using ffmpeg to extract frames from a video.")

    stream_info = analyze_stream(logger, str(input_video_path))

    command = ['ffmpeg',
                '-y',  # (optional) overwrite output file if it exists
                '-i', str(input_video_path),
                '-vf',
                f"select={get_ffmpeg_select_argument(frames_sequence, setpts_filter)}",
                '-an',
                str(output_video_path),
                ]

    # to avoid carriage return ('\r') in ffmpeg output, to mess with the reading of the
    # progress, use 'universal_newlines' argument
    # https://github.com/chriskiehl/Gooey/issues/495#issuecomment-614991802
    ffmpeg_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
    return ffmpeg_to_tqdm(logger, ffmpeg_process, duration=stream_info.get('duration'), tqdm_desc="FFMPEG select subset of frames").returncode


extract_frames_in_video(
    list(TrivialFrameSelectionStrategy(parameters.frames_skip, frames_count=frame_selection.total_frames)),
    1 / parameters.frames_skip, scaled_down_video_path, parameters.get_output_path_with_filename(f"{parameters.file_wo_ext}_224_trivial_12.mp4"))

extract_frames_in_video(
    list(RandomSequenceFrameSelectionStrategy(parameters.frames_skip, frames_count=frame_selection.total_frames)),
    1 / parameters.frames_skip, scaled_down_video_path, parameters.get_output_path_with_filename(f"{parameters.file_wo_ext}_224_random_sequence_12.mp4"))


FFMPEG execution: 145.0segment [00:00, 298.86segment/s]           
FFMPEG execution: 145.0segment [00:00, 347.84segment/s]           
