From bbcae10a5fbe3ce3daa5eb890b4a8b22eaf3147c Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 20 Aug 2025 09:42:26 -0400 Subject: [PATCH 01/10] move resources gen script to python --- test/generate_reference_resources.py | 69 ++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 test/generate_reference_resources.py diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py new file mode 100644 index 000000000..08ba55f4b --- /dev/null +++ b/test/generate_reference_resources.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +import os +import subprocess +import sys + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Run this script to update the resources used in unit tests. The resources are all derived +# from source media already checked into the repo. + + +def main(): + SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) + TORCHCODEC_PATH = os.path.join(SCRIPT_DIR, "..") + RESOURCES_DIR = os.path.join(TORCHCODEC_PATH, "test", "resources") + VIDEO_PATH = os.path.join(RESOURCES_DIR, "nasa_13013.mp4") + + # Last generated with ffmpeg version 4.3 + # + # Note: The naming scheme used here must match the naming scheme used to load + # tensors in ./utils.py. + STREAMS = [0, 3] + FRAMES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 20, 25, 30, 35, 386, 387, 388, 389] + for stream in STREAMS: + for frame in FRAMES: + # Note that we are using 0-based index naming. Asking ffmpeg to number output + # frames would result in 1-based index naming. We enforce 0-based index naming + # so that the name of reference frames matches the index when accessing that + # frame in the Python decoder. + frame_name = f"{frame:06d}" + cmd = f"""ffmpeg -y -i {VIDEO_PATH} -map 0:{stream} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream{stream}.frame{frame_name}.bmp""" + subprocess.run(cmd.split(), check=True) + + # 12 is the last frame of this video. ?? + seek_ts = [6.0, 6.1, 10.0, 12.979633] + ts_name = [f"{seek_ts:06f}" for seek_ts in seek_ts] + print(ts_name) + for ts, name in zip(seek_ts, ts_name): + cmd = f"ffmpeg -y -ss {ts} -i {VIDEO_PATH} -frames:v 1 {VIDEO_PATH}.time{name}.bmp" + subprocess.run(cmd.split(), check=True) + + # This video was generated by running the following: + # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv + # Note that this video only has 1 stream, at index 0. + VIDEO_PATH = os.path.join(RESOURCES_DIR, "h265_video.mp4") + FRAMES = [5] + for frame in FRAMES: + frame_name = f"{frame:06d}" + cmd = f"""ffmpeg -y -i {VIDEO_PATH} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream0.frame{frame_name}.bmp""" + subprocess.run(cmd.split(), check=True) + + for bmp in [f for f in os.listdir(RESOURCES_DIR) if f.endswith(".bmp")]: + bmp_path = os.path.join(RESOURCES_DIR, bmp) + subprocess.run( + [ + sys.executable, + os.path.join(TORCHCODEC_PATH, "test", "convert_image_to_tensor.py"), + bmp_path, + ] + ) + os.remove(bmp_path) + + +if __name__ == "__main__": + main() From e11273d2972913e858191f15083e81bcac460b8f Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 20 Aug 2025 11:17:11 -0400 Subject: [PATCH 02/10] move mp3 generation to comment --- test/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/utils.py b/test/utils.py index ed611cfda..d2f500533 100644 --- a/test/utils.py +++ b/test/utils.py @@ -517,6 +517,8 @@ def sample_format(self) -> str: return self.stream_infos[self.default_stream_index].sample_format +# This file was generated with: +# ffmpeg -y -i test/resources/nasa_13013.mp4 -b:a 192K -vn test/resources/nasa_13013.mp4.audio.mp3" NASA_AUDIO_MP3 = TestAudio( filename="nasa_13013.mp4.audio.mp3", default_stream_index=0, From f2345051d61a485184875572737605c751ecdf57 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 20 Aug 2025 13:15:36 -0400 Subject: [PATCH 03/10] delete shell script --- test/generate_reference_resources.py | 2 +- test/generate_reference_resources.sh | 79 ---------------------------- 2 files changed, 1 insertion(+), 80 deletions(-) delete mode 100755 test/generate_reference_resources.sh diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py index 08ba55f4b..470301064 100644 --- a/test/generate_reference_resources.py +++ b/test/generate_reference_resources.py @@ -35,7 +35,7 @@ def main(): cmd = f"""ffmpeg -y -i {VIDEO_PATH} -map 0:{stream} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream{stream}.frame{frame_name}.bmp""" subprocess.run(cmd.split(), check=True) - # 12 is the last frame of this video. ?? + # Extract individual frames at specific timestamps, including the last frame of the video. seek_ts = [6.0, 6.1, 10.0, 12.979633] ts_name = [f"{seek_ts:06f}" for seek_ts in seek_ts] print(ts_name) diff --git a/test/generate_reference_resources.sh b/test/generate_reference_resources.sh deleted file mode 100755 index fba098a75..000000000 --- a/test/generate_reference_resources.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# Run this script to update the resources used in unit tests. The resources are all derived -# from source media already checked into the repo. - -# Fail loudly on errors. -set -x -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -TORCHCODEC_PATH=$SCRIPT_DIR/../ -RESOURCES_DIR=$TORCHCODEC_PATH/test/resources -VIDEO_PATH=$RESOURCES_DIR/nasa_13013.mp4 - -# Last generated with ffmpeg version 4.3 -# -# Note: The naming scheme used here must match the naming scheme used to load -# tensors in ./utils.py. -STREAMS=(0 3) -FRAMES=(0 1 2 3 4 5 6 7 8 9) -FRAMES+=(15 20 25 30 35) -FRAMES+=(386 387 388 389) -for stream in "${STREAMS[@]}"; do - for frame in "${FRAMES[@]}"; do - # Note that we are using 0-based index naming. Asking ffmpeg to number output - # frames would result in 1-based index naming. We enforce 0-based index naming - # so that the name of reference frames matches the index when accessing that - # frame in the Python decoder. - frame_name=$(printf "%06d" "$frame") - ffmpeg -y -i "$VIDEO_PATH" -map 0:"$stream" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.stream$stream.frame$frame_name.bmp" - done -done -ffmpeg -y -ss 6.0 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time6.000000.bmp" -ffmpeg -y -ss 6.1 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time6.100000.bmp" -ffmpeg -y -ss 10.0 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time10.000000.bmp" -# This is the last frame of this video. -ffmpeg -y -ss 12.979633 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time12.979633.bmp" -# Audio generation in the form of an mp3. -ffmpeg -y -i "$VIDEO_PATH" -b:a 192K -vn "$VIDEO_PATH.audio.mp3" - -# This video was generated by running the following: -# conda install -c conda-forge x265 -# ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265 --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz -# ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y -# Note that this video only has 1 stream, at index 0. -VIDEO_PATH=$RESOURCES_DIR/h265_video.mp4 -FRAMES=(5) -for frame in "${FRAMES[@]}"; do - frame_name=$(printf "%06d" "$frame") - ffmpeg -y -i "$VIDEO_PATH" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.stream0.frame$frame_name.bmp" -done - -for bmp in "$RESOURCES_DIR"/*.bmp -do - python3 "$TORCHCODEC_PATH/test/convert_image_to_tensor.py" "$bmp" - rm -f "$bmp" -done - -# This video was generated by running the following: -# ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv -# Note that this video only has 1 stream, at index 0. -VIDEO_PATH=$RESOURCES_DIR/av1_video.mkv -FRAMES=(10) -for frame in "${FRAMES[@]}"; do - frame_name=$(printf "%06d" "$frame") - ffmpeg -y -i "$VIDEO_PATH" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.stream0.frame$frame_name.bmp" -done - -for bmp in "$RESOURCES_DIR"/*.bmp -do - python3 "$TORCHCODEC_PATH/test/convert_image_to_tensor.py" "$bmp" - rm -f "$bmp" -done From 80e19807b1b149e5360115166a51f88dd97af90f Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 20 Aug 2025 15:31:38 -0400 Subject: [PATCH 04/10] restore av1_video --- test/generate_reference_resources.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py index 470301064..a5aab2448 100644 --- a/test/generate_reference_resources.py +++ b/test/generate_reference_resources.py @@ -38,16 +38,28 @@ def main(): # Extract individual frames at specific timestamps, including the last frame of the video. seek_ts = [6.0, 6.1, 10.0, 12.979633] ts_name = [f"{seek_ts:06f}" for seek_ts in seek_ts] - print(ts_name) for ts, name in zip(seek_ts, ts_name): cmd = f"ffmpeg -y -ss {ts} -i {VIDEO_PATH} -frames:v 1 {VIDEO_PATH}.time{name}.bmp" subprocess.run(cmd.split(), check=True) # This video was generated by running the following: - # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv + # conda install -c conda-forge x265 + # ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265 --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz + # ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y # Note that this video only has 1 stream, at index 0. VIDEO_PATH = os.path.join(RESOURCES_DIR, "h265_video.mp4") FRAMES = [5] + for frame in FRAMES: + frame_name = f"{frame:06d}" + cmd = f"""ffmpeg -y -i {VIDEO_PATH} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream0.frame{frame_name}.bmp""" + subprocess.run(cmd.split(), check=True) + + # This video was generated by running the following: + # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv + # Note that this video only has 1 stream, at index 0. + VIDEO_PATH = os.path.join(RESOURCES_DIR, "av1_video.mkv") + FRAMES = [10] + for frame in FRAMES: frame_name = f"{frame:06d}" cmd = f"""ffmpeg -y -i {VIDEO_PATH} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream0.frame{frame_name}.bmp""" From aef1360c2be0d94abde3fa28bb5f51fcdb675993 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 20 Aug 2025 15:51:26 -0400 Subject: [PATCH 05/10] update resource workflow to use py --- .github/workflows/reference_resources.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/reference_resources.yaml b/.github/workflows/reference_resources.yaml index 7471134ae..f847720b7 100644 --- a/.github/workflows/reference_resources.yaml +++ b/.github/workflows/reference_resources.yaml @@ -4,7 +4,7 @@ on: workflow_dispatch: pull_request: paths: - - test/generate_reference_resources.sh + - test/generate_reference_resources.py - .github/workflows/reference_resources.yaml # self reference schedule: - cron: '0 0 * * 0' # on sunday @@ -38,7 +38,7 @@ jobs: - name: Update pip run: python -m pip install --upgrade pip - - name: Instal generation dependencies + - name: Install generation dependencies run: | # Note that we're installing stable - this is for running a script where we're a normal PyTorch # user, not for building TorhCodec. @@ -50,4 +50,4 @@ jobs: - name: Run generation reference resources run: | - test/generate_reference_resources.sh + python test/generate_reference_resources.py From 8a22597fafb1339bb83d9c084f753a4e4ebe0089 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Thu, 21 Aug 2025 11:16:50 -0400 Subject: [PATCH 06/10] Use arg list, rename timestamp variables --- test/generate_reference_resources.py | 70 +++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 11 deletions(-) diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py index a5aab2448..3b6f51add 100644 --- a/test/generate_reference_resources.py +++ b/test/generate_reference_resources.py @@ -32,15 +32,39 @@ def main(): # so that the name of reference frames matches the index when accessing that # frame in the Python decoder. frame_name = f"{frame:06d}" - cmd = f"""ffmpeg -y -i {VIDEO_PATH} -map 0:{stream} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream{stream}.frame{frame_name}.bmp""" - subprocess.run(cmd.split(), check=True) + cmd = [ + "ffmpeg", + "-y", + "-i", + VIDEO_PATH, + "-map", + f"0:{stream}", + "-vf", + f"select=eq(n\\,{frame})", + "-vsync", + "vfr", + "-q:v", + "2", + f"{VIDEO_PATH}.stream{stream}.frame{frame_name}.bmp", + ] + subprocess.run(cmd, check=True) # Extract individual frames at specific timestamps, including the last frame of the video. - seek_ts = [6.0, 6.1, 10.0, 12.979633] - ts_name = [f"{seek_ts:06f}" for seek_ts in seek_ts] - for ts, name in zip(seek_ts, ts_name): - cmd = f"ffmpeg -y -ss {ts} -i {VIDEO_PATH} -frames:v 1 {VIDEO_PATH}.time{name}.bmp" - subprocess.run(cmd.split(), check=True) + seek_timestamp = [6.0, 6.1, 10.0, 12.979633] + timestamp_name = [f"{seek_timestamp:06f}" for seek_timestamp in seek_timestamp] + for timestamp, name in zip(seek_timestamp, timestamp_name): + cmd = [ + "ffmpeg", + "-y", + "-ss", + str(timestamp), + "-i", + VIDEO_PATH, + "-frames:v", + "1", + f"{VIDEO_PATH}.time{name}.bmp", + ] + subprocess.run(cmd, check=True) # This video was generated by running the following: # conda install -c conda-forge x265 @@ -51,8 +75,20 @@ def main(): FRAMES = [5] for frame in FRAMES: frame_name = f"{frame:06d}" - cmd = f"""ffmpeg -y -i {VIDEO_PATH} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream0.frame{frame_name}.bmp""" - subprocess.run(cmd.split(), check=True) + cmd = [ + "ffmpeg", + "-y", + "-i", + VIDEO_PATH, + "-vf", + f"select=eq(n\\,{frame})", + "-vsync", + "vfr", + "-q:v", + "2", + f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp", + ] + subprocess.run(cmd, check=True) # This video was generated by running the following: # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv @@ -62,8 +98,20 @@ def main(): for frame in FRAMES: frame_name = f"{frame:06d}" - cmd = f"""ffmpeg -y -i {VIDEO_PATH} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream0.frame{frame_name}.bmp""" - subprocess.run(cmd.split(), check=True) + cmd = [ + "ffmpeg", + "-y", + "-i", + VIDEO_PATH, + "-vf", + f"select=eq(n\\,{frame})", + "-vsync", + "vfr", + "-q:v", + "2", + f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp", + ] + subprocess.run(cmd, check=True) for bmp in [f for f in os.listdir(RESOURCES_DIR) if f.endswith(".bmp")]: bmp_path = os.path.join(RESOURCES_DIR, bmp) From 6c96b2e4a6c40ea02537f8927783182a16b5bb5d Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 22 Aug 2025 15:11:20 -0400 Subject: [PATCH 07/10] Delete convert_image_to_tensor script --- test/convert_image_to_tensor.py | 24 -------------- test/generate_reference_resources.py | 47 ++++++++++++++++++---------- 2 files changed, 31 insertions(+), 40 deletions(-) delete mode 100644 test/convert_image_to_tensor.py diff --git a/test/convert_image_to_tensor.py b/test/convert_image_to_tensor.py deleted file mode 100644 index 9a03288c8..000000000 --- a/test/convert_image_to_tensor.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import os -import sys - -import numpy as np - -import torch -from PIL import Image - -if __name__ == "__main__": - img_file = sys.argv[1] - # Get base filename without extension - base_filename = os.path.splitext(img_file)[0] - pil_image = Image.open(img_file) - img_tensor = torch.from_numpy(np.asarray(pil_image)) - print(img_tensor.shape) - print(img_tensor.dtype) - # Save tensor to disk - torch.save(img_tensor, base_filename + ".pt", _use_new_zipfile_serialization=True) diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py index 3b6f51add..8fae6c2f2 100644 --- a/test/generate_reference_resources.py +++ b/test/generate_reference_resources.py @@ -1,7 +1,11 @@ #!/usr/bin/env python3 import os import subprocess -import sys + +import numpy as np + +import torch +from PIL import Image # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. @@ -13,9 +17,23 @@ # from source media already checked into the repo. +def convert_image_to_tensor(image_path): + if not os.path.exists(image_path): + return + # Get base filename without extension + base_filename = os.path.splitext(image_path)[0] + pil_image = Image.open(image_path) + img_tensor = torch.from_numpy(np.asarray(pil_image)) + print(img_tensor.shape) + print(img_tensor.dtype) + # Save tensor to disk + torch.save(img_tensor, base_filename + ".pt", _use_new_zipfile_serialization=True) + os.remove(image_path) + + def main(): SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) - TORCHCODEC_PATH = os.path.join(SCRIPT_DIR, "..") + TORCHCODEC_PATH = os.path.dirname(SCRIPT_DIR) RESOURCES_DIR = os.path.join(TORCHCODEC_PATH, "test", "resources") VIDEO_PATH = os.path.join(RESOURCES_DIR, "nasa_13013.mp4") @@ -31,6 +49,7 @@ def main(): # frames would result in 1-based index naming. We enforce 0-based index naming # so that the name of reference frames matches the index when accessing that # frame in the Python decoder. + output_bmp = f"{VIDEO_PATH}.stream{stream}.frame{frame:06d}.bmp" frame_name = f"{frame:06d}" cmd = [ "ffmpeg", @@ -45,14 +64,16 @@ def main(): "vfr", "-q:v", "2", - f"{VIDEO_PATH}.stream{stream}.frame{frame_name}.bmp", + output_bmp, ] subprocess.run(cmd, check=True) + convert_image_to_tensor(output_bmp) # Extract individual frames at specific timestamps, including the last frame of the video. seek_timestamp = [6.0, 6.1, 10.0, 12.979633] timestamp_name = [f"{seek_timestamp:06f}" for seek_timestamp in seek_timestamp] for timestamp, name in zip(seek_timestamp, timestamp_name): + output_bmp = f"{VIDEO_PATH}.time{name}.bmp" cmd = [ "ffmpeg", "-y", @@ -65,6 +86,7 @@ def main(): f"{VIDEO_PATH}.time{name}.bmp", ] subprocess.run(cmd, check=True) + convert_image_to_tensor(output_bmp) # This video was generated by running the following: # conda install -c conda-forge x265 @@ -75,6 +97,7 @@ def main(): FRAMES = [5] for frame in FRAMES: frame_name = f"{frame:06d}" + output_bmp = f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp" cmd = [ "ffmpeg", "-y", @@ -86,9 +109,10 @@ def main(): "vfr", "-q:v", "2", - f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp", + output_bmp, ] subprocess.run(cmd, check=True) + convert_image_to_tensor(output_bmp) # This video was generated by running the following: # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv @@ -98,6 +122,7 @@ def main(): for frame in FRAMES: frame_name = f"{frame:06d}" + output_bmp = f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp" cmd = [ "ffmpeg", "-y", @@ -109,20 +134,10 @@ def main(): "vfr", "-q:v", "2", - f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp", + output_bmp, ] subprocess.run(cmd, check=True) - - for bmp in [f for f in os.listdir(RESOURCES_DIR) if f.endswith(".bmp")]: - bmp_path = os.path.join(RESOURCES_DIR, bmp) - subprocess.run( - [ - sys.executable, - os.path.join(TORCHCODEC_PATH, "test", "convert_image_to_tensor.py"), - bmp_path, - ] - ) - os.remove(bmp_path) + convert_image_to_tensor(output_bmp) if __name__ == "__main__": From 9148c525f4b587572875f97a040b87a034ac6152 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Tue, 26 Aug 2025 16:01:55 -0400 Subject: [PATCH 08/10] reflect comments --- test/generate_reference_resources.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py index 8fae6c2f2..df6beaf54 100644 --- a/test/generate_reference_resources.py +++ b/test/generate_reference_resources.py @@ -1,4 +1,9 @@ -#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + import os import subprocess @@ -7,12 +12,6 @@ import torch from PIL import Image -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - # Run this script to update the resources used in unit tests. The resources are all derived # from source media already checked into the repo. @@ -24,8 +23,6 @@ def convert_image_to_tensor(image_path): base_filename = os.path.splitext(image_path)[0] pil_image = Image.open(image_path) img_tensor = torch.from_numpy(np.asarray(pil_image)) - print(img_tensor.shape) - print(img_tensor.dtype) # Save tensor to disk torch.save(img_tensor, base_filename + ".pt", _use_new_zipfile_serialization=True) os.remove(image_path) From 390da5aad5ee4db3542cb7caceaf41d7bd33ddd7 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Tue, 26 Aug 2025 16:28:36 -0400 Subject: [PATCH 09/10] Factor out shared ffmpeg functions --- test/generate_reference_resources.py | 101 +++++++++++---------------- 1 file changed, 40 insertions(+), 61 deletions(-) diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py index df6beaf54..18c6299d3 100644 --- a/test/generate_reference_resources.py +++ b/test/generate_reference_resources.py @@ -28,6 +28,40 @@ def convert_image_to_tensor(image_path): os.remove(image_path) +def get_frame_by_index(video_path, frame, output_path, stream): + cmd = [ + "ffmpeg", + "-y", + "-i", + video_path, + "-map", + f"0:{stream}", + "-vf", + f"select=eq(n\\,{frame})", + "-vsync", + "vfr", + "-q:v", + "2", + output_path, + ] + subprocess.run(cmd, check=True) + + +def get_frame_by_timestamp(video_path, timestamp, output_path): + cmd = [ + "ffmpeg", + "-y", + "-ss", + str(timestamp), + "-i", + video_path, + "-frames:v", + "1", + output_path, + ] + subprocess.run(cmd, check=True) + + def main(): SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) TORCHCODEC_PATH = os.path.dirname(SCRIPT_DIR) @@ -47,23 +81,7 @@ def main(): # so that the name of reference frames matches the index when accessing that # frame in the Python decoder. output_bmp = f"{VIDEO_PATH}.stream{stream}.frame{frame:06d}.bmp" - frame_name = f"{frame:06d}" - cmd = [ - "ffmpeg", - "-y", - "-i", - VIDEO_PATH, - "-map", - f"0:{stream}", - "-vf", - f"select=eq(n\\,{frame})", - "-vsync", - "vfr", - "-q:v", - "2", - output_bmp, - ] - subprocess.run(cmd, check=True) + get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=stream) convert_image_to_tensor(output_bmp) # Extract individual frames at specific timestamps, including the last frame of the video. @@ -71,18 +89,7 @@ def main(): timestamp_name = [f"{seek_timestamp:06f}" for seek_timestamp in seek_timestamp] for timestamp, name in zip(seek_timestamp, timestamp_name): output_bmp = f"{VIDEO_PATH}.time{name}.bmp" - cmd = [ - "ffmpeg", - "-y", - "-ss", - str(timestamp), - "-i", - VIDEO_PATH, - "-frames:v", - "1", - f"{VIDEO_PATH}.time{name}.bmp", - ] - subprocess.run(cmd, check=True) + get_frame_by_timestamp(VIDEO_PATH, timestamp, output_bmp) convert_image_to_tensor(output_bmp) # This video was generated by running the following: @@ -93,22 +100,8 @@ def main(): VIDEO_PATH = os.path.join(RESOURCES_DIR, "h265_video.mp4") FRAMES = [5] for frame in FRAMES: - frame_name = f"{frame:06d}" - output_bmp = f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp" - cmd = [ - "ffmpeg", - "-y", - "-i", - VIDEO_PATH, - "-vf", - f"select=eq(n\\,{frame})", - "-vsync", - "vfr", - "-q:v", - "2", - output_bmp, - ] - subprocess.run(cmd, check=True) + output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp" + get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0) convert_image_to_tensor(output_bmp) # This video was generated by running the following: @@ -118,22 +111,8 @@ def main(): FRAMES = [10] for frame in FRAMES: - frame_name = f"{frame:06d}" - output_bmp = f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp" - cmd = [ - "ffmpeg", - "-y", - "-i", - VIDEO_PATH, - "-vf", - f"select=eq(n\\,{frame})", - "-vsync", - "vfr", - "-q:v", - "2", - output_bmp, - ] - subprocess.run(cmd, check=True) + output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp" + get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0) convert_image_to_tensor(output_bmp) From de74ae764dee8784b0c26211960595d8f601b923 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Tue, 26 Aug 2025 16:52:18 -0400 Subject: [PATCH 10/10] Update os to Path --- test/generate_reference_resources.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py index 18c6299d3..5ae062111 100644 --- a/test/generate_reference_resources.py +++ b/test/generate_reference_resources.py @@ -4,8 +4,8 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import os import subprocess +from pathlib import Path import numpy as np @@ -17,15 +17,18 @@ def convert_image_to_tensor(image_path): - if not os.path.exists(image_path): + image_path = Path(image_path) + if not image_path.exists(): return # Get base filename without extension - base_filename = os.path.splitext(image_path)[0] + base_filename = image_path.with_suffix("") pil_image = Image.open(image_path) img_tensor = torch.from_numpy(np.asarray(pil_image)) # Save tensor to disk - torch.save(img_tensor, base_filename + ".pt", _use_new_zipfile_serialization=True) - os.remove(image_path) + torch.save( + img_tensor, str(base_filename) + ".pt", _use_new_zipfile_serialization=True + ) + image_path.unlink() def get_frame_by_index(video_path, frame, output_path, stream): @@ -63,10 +66,10 @@ def get_frame_by_timestamp(video_path, timestamp, output_path): def main(): - SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) - TORCHCODEC_PATH = os.path.dirname(SCRIPT_DIR) - RESOURCES_DIR = os.path.join(TORCHCODEC_PATH, "test", "resources") - VIDEO_PATH = os.path.join(RESOURCES_DIR, "nasa_13013.mp4") + SCRIPT_DIR = Path(__file__).resolve().parent + TORCHCODEC_PATH = SCRIPT_DIR.parent + RESOURCES_DIR = TORCHCODEC_PATH / "test" / "resources" + VIDEO_PATH = RESOURCES_DIR / "nasa_13013.mp4" # Last generated with ffmpeg version 4.3 # @@ -97,7 +100,7 @@ def main(): # ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265 --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz # ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y # Note that this video only has 1 stream, at index 0. - VIDEO_PATH = os.path.join(RESOURCES_DIR, "h265_video.mp4") + VIDEO_PATH = RESOURCES_DIR / "h265_video.mp4" FRAMES = [5] for frame in FRAMES: output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp" @@ -107,7 +110,7 @@ def main(): # This video was generated by running the following: # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv # Note that this video only has 1 stream, at index 0. - VIDEO_PATH = os.path.join(RESOURCES_DIR, "av1_video.mkv") + VIDEO_PATH = RESOURCES_DIR / "av1_video.mkv" FRAMES = [10] for frame in FRAMES: