diff --git a/.github/workflows/reference_resources.yaml b/.github/workflows/reference_resources.yaml index 7471134ae..f847720b7 100644 --- a/.github/workflows/reference_resources.yaml +++ b/.github/workflows/reference_resources.yaml @@ -4,7 +4,7 @@ on: workflow_dispatch: pull_request: paths: - - test/generate_reference_resources.sh + - test/generate_reference_resources.py - .github/workflows/reference_resources.yaml # self reference schedule: - cron: '0 0 * * 0' # on sunday @@ -38,7 +38,7 @@ jobs: - name: Update pip run: python -m pip install --upgrade pip - - name: Instal generation dependencies + - name: Install generation dependencies run: | # Note that we're installing stable - this is for running a script where we're a normal PyTorch # user, not for building TorhCodec. @@ -50,4 +50,4 @@ jobs: - name: Run generation reference resources run: | - test/generate_reference_resources.sh + python test/generate_reference_resources.py diff --git a/test/convert_image_to_tensor.py b/test/convert_image_to_tensor.py deleted file mode 100644 index 9a03288c8..000000000 --- a/test/convert_image_to_tensor.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import os -import sys - -import numpy as np - -import torch -from PIL import Image - -if __name__ == "__main__": - img_file = sys.argv[1] - # Get base filename without extension - base_filename = os.path.splitext(img_file)[0] - pil_image = Image.open(img_file) - img_tensor = torch.from_numpy(np.asarray(pil_image)) - print(img_tensor.shape) - print(img_tensor.dtype) - # Save tensor to disk - torch.save(img_tensor, base_filename + ".pt", _use_new_zipfile_serialization=True) diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py new file mode 100644 index 000000000..5ae062111 --- /dev/null +++ b/test/generate_reference_resources.py @@ -0,0 +1,123 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import subprocess +from pathlib import Path + +import numpy as np + +import torch +from PIL import Image + +# Run this script to update the resources used in unit tests. The resources are all derived +# from source media already checked into the repo. + + +def convert_image_to_tensor(image_path): + image_path = Path(image_path) + if not image_path.exists(): + return + # Get base filename without extension + base_filename = image_path.with_suffix("") + pil_image = Image.open(image_path) + img_tensor = torch.from_numpy(np.asarray(pil_image)) + # Save tensor to disk + torch.save( + img_tensor, str(base_filename) + ".pt", _use_new_zipfile_serialization=True + ) + image_path.unlink() + + +def get_frame_by_index(video_path, frame, output_path, stream): + cmd = [ + "ffmpeg", + "-y", + "-i", + video_path, + "-map", + f"0:{stream}", + "-vf", + f"select=eq(n\\,{frame})", + "-vsync", + "vfr", + "-q:v", + "2", + output_path, + ] + subprocess.run(cmd, check=True) + + +def get_frame_by_timestamp(video_path, timestamp, output_path): + cmd = [ + "ffmpeg", + "-y", + "-ss", + str(timestamp), + "-i", + video_path, + "-frames:v", + "1", + output_path, + ] + subprocess.run(cmd, check=True) + + +def main(): + SCRIPT_DIR = Path(__file__).resolve().parent + TORCHCODEC_PATH = SCRIPT_DIR.parent + RESOURCES_DIR = TORCHCODEC_PATH / "test" / "resources" + VIDEO_PATH = RESOURCES_DIR / "nasa_13013.mp4" + + # Last generated with ffmpeg version 4.3 + # + # Note: The naming scheme used here must match the naming scheme used to load + # tensors in ./utils.py. + STREAMS = [0, 3] + FRAMES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 20, 25, 30, 35, 386, 387, 388, 389] + for stream in STREAMS: + for frame in FRAMES: + # Note that we are using 0-based index naming. Asking ffmpeg to number output + # frames would result in 1-based index naming. We enforce 0-based index naming + # so that the name of reference frames matches the index when accessing that + # frame in the Python decoder. + output_bmp = f"{VIDEO_PATH}.stream{stream}.frame{frame:06d}.bmp" + get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=stream) + convert_image_to_tensor(output_bmp) + + # Extract individual frames at specific timestamps, including the last frame of the video. + seek_timestamp = [6.0, 6.1, 10.0, 12.979633] + timestamp_name = [f"{seek_timestamp:06f}" for seek_timestamp in seek_timestamp] + for timestamp, name in zip(seek_timestamp, timestamp_name): + output_bmp = f"{VIDEO_PATH}.time{name}.bmp" + get_frame_by_timestamp(VIDEO_PATH, timestamp, output_bmp) + convert_image_to_tensor(output_bmp) + + # This video was generated by running the following: + # conda install -c conda-forge x265 + # ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265 --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz + # ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y + # Note that this video only has 1 stream, at index 0. + VIDEO_PATH = RESOURCES_DIR / "h265_video.mp4" + FRAMES = [5] + for frame in FRAMES: + output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp" + get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0) + convert_image_to_tensor(output_bmp) + + # This video was generated by running the following: + # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv + # Note that this video only has 1 stream, at index 0. + VIDEO_PATH = RESOURCES_DIR / "av1_video.mkv" + FRAMES = [10] + + for frame in FRAMES: + output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp" + get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0) + convert_image_to_tensor(output_bmp) + + +if __name__ == "__main__": + main() diff --git a/test/generate_reference_resources.sh b/test/generate_reference_resources.sh deleted file mode 100755 index fba098a75..000000000 --- a/test/generate_reference_resources.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# Run this script to update the resources used in unit tests. The resources are all derived -# from source media already checked into the repo. - -# Fail loudly on errors. -set -x -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -TORCHCODEC_PATH=$SCRIPT_DIR/../ -RESOURCES_DIR=$TORCHCODEC_PATH/test/resources -VIDEO_PATH=$RESOURCES_DIR/nasa_13013.mp4 - -# Last generated with ffmpeg version 4.3 -# -# Note: The naming scheme used here must match the naming scheme used to load -# tensors in ./utils.py. -STREAMS=(0 3) -FRAMES=(0 1 2 3 4 5 6 7 8 9) -FRAMES+=(15 20 25 30 35) -FRAMES+=(386 387 388 389) -for stream in "${STREAMS[@]}"; do - for frame in "${FRAMES[@]}"; do - # Note that we are using 0-based index naming. Asking ffmpeg to number output - # frames would result in 1-based index naming. We enforce 0-based index naming - # so that the name of reference frames matches the index when accessing that - # frame in the Python decoder. - frame_name=$(printf "%06d" "$frame") - ffmpeg -y -i "$VIDEO_PATH" -map 0:"$stream" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.stream$stream.frame$frame_name.bmp" - done -done -ffmpeg -y -ss 6.0 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time6.000000.bmp" -ffmpeg -y -ss 6.1 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time6.100000.bmp" -ffmpeg -y -ss 10.0 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time10.000000.bmp" -# This is the last frame of this video. -ffmpeg -y -ss 12.979633 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time12.979633.bmp" -# Audio generation in the form of an mp3. -ffmpeg -y -i "$VIDEO_PATH" -b:a 192K -vn "$VIDEO_PATH.audio.mp3" - -# This video was generated by running the following: -# conda install -c conda-forge x265 -# ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265 --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz -# ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y -# Note that this video only has 1 stream, at index 0. -VIDEO_PATH=$RESOURCES_DIR/h265_video.mp4 -FRAMES=(5) -for frame in "${FRAMES[@]}"; do - frame_name=$(printf "%06d" "$frame") - ffmpeg -y -i "$VIDEO_PATH" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.stream0.frame$frame_name.bmp" -done - -for bmp in "$RESOURCES_DIR"/*.bmp -do - python3 "$TORCHCODEC_PATH/test/convert_image_to_tensor.py" "$bmp" - rm -f "$bmp" -done - -# This video was generated by running the following: -# ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv -# Note that this video only has 1 stream, at index 0. -VIDEO_PATH=$RESOURCES_DIR/av1_video.mkv -FRAMES=(10) -for frame in "${FRAMES[@]}"; do - frame_name=$(printf "%06d" "$frame") - ffmpeg -y -i "$VIDEO_PATH" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.stream0.frame$frame_name.bmp" -done - -for bmp in "$RESOURCES_DIR"/*.bmp -do - python3 "$TORCHCODEC_PATH/test/convert_image_to_tensor.py" "$bmp" - rm -f "$bmp" -done diff --git a/test/utils.py b/test/utils.py index ed611cfda..d2f500533 100644 --- a/test/utils.py +++ b/test/utils.py @@ -517,6 +517,8 @@ def sample_format(self) -> str: return self.stream_infos[self.default_stream_index].sample_format +# This file was generated with: +# ffmpeg -y -i test/resources/nasa_13013.mp4 -b:a 192K -vn test/resources/nasa_13013.mp4.audio.mp3" NASA_AUDIO_MP3 = TestAudio( filename="nasa_13013.mp4.audio.mp3", default_stream_index=0,