From bbcae10a5fbe3ce3daa5eb890b4a8b22eaf3147c Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@fb.com>
Date: Wed, 20 Aug 2025 09:42:26 -0400
Subject: [PATCH 01/10] move resources gen script to python

---
 test/generate_reference_resources.py | 69 ++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 test/generate_reference_resources.py

diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py
new file mode 100644
index 000000000..08ba55f4b
--- /dev/null
+++ b/test/generate_reference_resources.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+import os
+import subprocess
+import sys
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Run this script to update the resources used in unit tests. The resources are all derived
+# from source media already checked into the repo.
+
+
+def main():
+    SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+    TORCHCODEC_PATH = os.path.join(SCRIPT_DIR, "..")
+    RESOURCES_DIR = os.path.join(TORCHCODEC_PATH, "test", "resources")
+    VIDEO_PATH = os.path.join(RESOURCES_DIR, "nasa_13013.mp4")
+
+    # Last generated with ffmpeg version 4.3
+    #
+    # Note: The naming scheme used here must match the naming scheme used to load
+    # tensors in ./utils.py.
+    STREAMS = [0, 3]
+    FRAMES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 20, 25, 30, 35, 386, 387, 388, 389]
+    for stream in STREAMS:
+        for frame in FRAMES:
+            # Note that we are using 0-based index naming. Asking ffmpeg to number output
+            # frames would result in 1-based index naming. We enforce 0-based index naming
+            # so that the name of reference frames matches the index when accessing that
+            # frame in the Python decoder.
+            frame_name = f"{frame:06d}"
+            cmd = f"""ffmpeg -y -i {VIDEO_PATH} -map 0:{stream} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream{stream}.frame{frame_name}.bmp"""
+            subprocess.run(cmd.split(), check=True)
+
+    # 12 is the last frame of this video. ??
+    seek_ts = [6.0, 6.1, 10.0, 12.979633]
+    ts_name = [f"{seek_ts:06f}" for seek_ts in seek_ts]
+    print(ts_name)
+    for ts, name in zip(seek_ts, ts_name):
+        cmd = f"ffmpeg -y -ss {ts} -i {VIDEO_PATH} -frames:v 1 {VIDEO_PATH}.time{name}.bmp"
+        subprocess.run(cmd.split(), check=True)
+
+    # This video was generated by running the following:
+    # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv
+    # Note that this video only has 1 stream, at index 0.
+    VIDEO_PATH = os.path.join(RESOURCES_DIR, "h265_video.mp4")
+    FRAMES = [5]
+    for frame in FRAMES:
+        frame_name = f"{frame:06d}"
+        cmd = f"""ffmpeg -y -i {VIDEO_PATH} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream0.frame{frame_name}.bmp"""
+        subprocess.run(cmd.split(), check=True)
+
+    for bmp in [f for f in os.listdir(RESOURCES_DIR) if f.endswith(".bmp")]:
+        bmp_path = os.path.join(RESOURCES_DIR, bmp)
+        subprocess.run(
+            [
+                sys.executable,
+                os.path.join(TORCHCODEC_PATH, "test", "convert_image_to_tensor.py"),
+                bmp_path,
+            ]
+        )
+        os.remove(bmp_path)
+
+
+if __name__ == "__main__":
+    main()

From e11273d2972913e858191f15083e81bcac460b8f Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@fb.com>
Date: Wed, 20 Aug 2025 11:17:11 -0400
Subject: [PATCH 02/10] move mp3 generation to comment

---
 test/utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/utils.py b/test/utils.py
index ed611cfda..d2f500533 100644
--- a/test/utils.py
+++ b/test/utils.py
@@ -517,6 +517,8 @@ def sample_format(self) -> str:
         return self.stream_infos[self.default_stream_index].sample_format
 
 
+# This file was generated with:
+# ffmpeg -y -i test/resources/nasa_13013.mp4 -b:a 192K -vn test/resources/nasa_13013.mp4.audio.mp3"
 NASA_AUDIO_MP3 = TestAudio(
     filename="nasa_13013.mp4.audio.mp3",
     default_stream_index=0,

From f2345051d61a485184875572737605c751ecdf57 Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@fb.com>
Date: Wed, 20 Aug 2025 13:15:36 -0400
Subject: [PATCH 03/10] delete shell script

---
 test/generate_reference_resources.py |  2 +-
 test/generate_reference_resources.sh | 79 ----------------------------
 2 files changed, 1 insertion(+), 80 deletions(-)
 delete mode 100755 test/generate_reference_resources.sh

diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py
index 08ba55f4b..470301064 100644
--- a/test/generate_reference_resources.py
+++ b/test/generate_reference_resources.py
@@ -35,7 +35,7 @@ def main():
             cmd = f"""ffmpeg -y -i {VIDEO_PATH} -map 0:{stream} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream{stream}.frame{frame_name}.bmp"""
             subprocess.run(cmd.split(), check=True)
 
-    # 12 is the last frame of this video. ??
+    # Extract individual frames at specific timestamps, including the last frame of the video.
     seek_ts = [6.0, 6.1, 10.0, 12.979633]
     ts_name = [f"{seek_ts:06f}" for seek_ts in seek_ts]
     print(ts_name)
diff --git a/test/generate_reference_resources.sh b/test/generate_reference_resources.sh
deleted file mode 100755
index fba098a75..000000000
--- a/test/generate_reference_resources.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-# Run this script to update the resources used in unit tests. The resources are all derived
-# from source media already checked into the repo.
-
-# Fail loudly on errors.
-set -x
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-TORCHCODEC_PATH=$SCRIPT_DIR/../
-RESOURCES_DIR=$TORCHCODEC_PATH/test/resources
-VIDEO_PATH=$RESOURCES_DIR/nasa_13013.mp4
-
-# Last generated with ffmpeg version 4.3
-#
-# Note: The naming scheme used here must match the naming scheme used to load
-# tensors in ./utils.py.
-STREAMS=(0 3)
-FRAMES=(0 1 2 3 4 5 6 7 8 9)
-FRAMES+=(15 20 25 30 35)
-FRAMES+=(386 387 388 389)
-for stream in "${STREAMS[@]}"; do
-  for frame in "${FRAMES[@]}"; do
-    # Note that we are using 0-based index naming. Asking ffmpeg to number output
-    # frames would result in 1-based index naming. We enforce 0-based index naming
-    # so that the name of reference frames matches the index when accessing that
-    # frame in the Python decoder.
-    frame_name=$(printf "%06d" "$frame")
-    ffmpeg -y -i "$VIDEO_PATH" -map 0:"$stream" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.stream$stream.frame$frame_name.bmp"
-  done
-done
-ffmpeg -y -ss 6.0 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time6.000000.bmp"
-ffmpeg -y -ss 6.1 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time6.100000.bmp"
-ffmpeg -y -ss 10.0 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time10.000000.bmp"
-# This is the last frame of this video.
-ffmpeg -y -ss 12.979633 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time12.979633.bmp"
-# Audio generation in the form of an mp3.
-ffmpeg -y -i "$VIDEO_PATH" -b:a 192K -vn "$VIDEO_PATH.audio.mp3"
-
-# This video was generated by running the following:
-# conda install -c conda-forge x265
-# ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265  --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz
-# ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y
-# Note that this video only has 1 stream, at index 0.
-VIDEO_PATH=$RESOURCES_DIR/h265_video.mp4
-FRAMES=(5)
-for frame in "${FRAMES[@]}"; do
-  frame_name=$(printf "%06d" "$frame")
-  ffmpeg -y -i "$VIDEO_PATH" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.stream0.frame$frame_name.bmp"
-done
-
-for bmp in "$RESOURCES_DIR"/*.bmp
-do
-  python3 "$TORCHCODEC_PATH/test/convert_image_to_tensor.py" "$bmp"
-  rm -f "$bmp"
-done
-
-# This video was generated by running the following:
-# ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv
-# Note that this video only has 1 stream, at index 0.
-VIDEO_PATH=$RESOURCES_DIR/av1_video.mkv
-FRAMES=(10)
-for frame in "${FRAMES[@]}"; do
-  frame_name=$(printf "%06d" "$frame")
-  ffmpeg -y -i "$VIDEO_PATH" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.stream0.frame$frame_name.bmp"
-done
-
-for bmp in "$RESOURCES_DIR"/*.bmp
-do
-  python3 "$TORCHCODEC_PATH/test/convert_image_to_tensor.py" "$bmp"
-  rm -f "$bmp"
-done

From 80e19807b1b149e5360115166a51f88dd97af90f Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@fb.com>
Date: Wed, 20 Aug 2025 15:31:38 -0400
Subject: [PATCH 04/10] restore av1_video

---
 test/generate_reference_resources.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py
index 470301064..a5aab2448 100644
--- a/test/generate_reference_resources.py
+++ b/test/generate_reference_resources.py
@@ -38,16 +38,28 @@ def main():
     # Extract individual frames at specific timestamps, including the last frame of the video.
     seek_ts = [6.0, 6.1, 10.0, 12.979633]
     ts_name = [f"{seek_ts:06f}" for seek_ts in seek_ts]
-    print(ts_name)
     for ts, name in zip(seek_ts, ts_name):
         cmd = f"ffmpeg -y -ss {ts} -i {VIDEO_PATH} -frames:v 1 {VIDEO_PATH}.time{name}.bmp"
         subprocess.run(cmd.split(), check=True)
 
     # This video was generated by running the following:
-    # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv
+    # conda install -c conda-forge x265
+    # ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265  --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz
+    # ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y
     # Note that this video only has 1 stream, at index 0.
     VIDEO_PATH = os.path.join(RESOURCES_DIR, "h265_video.mp4")
     FRAMES = [5]
+    for frame in FRAMES:
+        frame_name = f"{frame:06d}"
+        cmd = f"""ffmpeg -y -i {VIDEO_PATH} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream0.frame{frame_name}.bmp"""
+        subprocess.run(cmd.split(), check=True)
+
+    # This video was generated by running the following:
+    # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv
+    # Note that this video only has 1 stream, at index 0.
+    VIDEO_PATH = os.path.join(RESOURCES_DIR, "av1_video.mkv")
+    FRAMES = [10]
+
     for frame in FRAMES:
         frame_name = f"{frame:06d}"
         cmd = f"""ffmpeg -y -i {VIDEO_PATH} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream0.frame{frame_name}.bmp"""

From aef1360c2be0d94abde3fa28bb5f51fcdb675993 Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@fb.com>
Date: Wed, 20 Aug 2025 15:51:26 -0400
Subject: [PATCH 05/10] update resource workflow to use py

---
 .github/workflows/reference_resources.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/reference_resources.yaml b/.github/workflows/reference_resources.yaml
index 7471134ae..f847720b7 100644
--- a/.github/workflows/reference_resources.yaml
+++ b/.github/workflows/reference_resources.yaml
@@ -4,7 +4,7 @@ on:
   workflow_dispatch:
   pull_request:
     paths:
-      - test/generate_reference_resources.sh
+      - test/generate_reference_resources.py
       - .github/workflows/reference_resources.yaml # self reference
   schedule:
     - cron: '0 0 * * 0'  # on sunday
@@ -38,7 +38,7 @@ jobs:
       - name: Update pip
         run: python -m pip install --upgrade pip
 
-      - name: Instal generation dependencies
+      - name: Install generation dependencies
         run: |
           # Note that we're installing stable - this is for running a script where we're a normal PyTorch
           # user, not for building TorhCodec.
@@ -50,4 +50,4 @@ jobs:
 
       - name: Run generation reference resources
         run: |
-          test/generate_reference_resources.sh
+          python test/generate_reference_resources.py

From 8a22597fafb1339bb83d9c084f753a4e4ebe0089 Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@fb.com>
Date: Thu, 21 Aug 2025 11:16:50 -0400
Subject: [PATCH 06/10] Use arg list, rename timestamp variables

---
 test/generate_reference_resources.py | 70 +++++++++++++++++++++++-----
 1 file changed, 59 insertions(+), 11 deletions(-)

diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py
index a5aab2448..3b6f51add 100644
--- a/test/generate_reference_resources.py
+++ b/test/generate_reference_resources.py
@@ -32,15 +32,39 @@ def main():
             # so that the name of reference frames matches the index when accessing that
             # frame in the Python decoder.
             frame_name = f"{frame:06d}"
-            cmd = f"""ffmpeg -y -i {VIDEO_PATH} -map 0:{stream} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream{stream}.frame{frame_name}.bmp"""
-            subprocess.run(cmd.split(), check=True)
+            cmd = [
+                "ffmpeg",
+                "-y",
+                "-i",
+                VIDEO_PATH,
+                "-map",
+                f"0:{stream}",
+                "-vf",
+                f"select=eq(n\\,{frame})",
+                "-vsync",
+                "vfr",
+                "-q:v",
+                "2",
+                f"{VIDEO_PATH}.stream{stream}.frame{frame_name}.bmp",
+            ]
+            subprocess.run(cmd, check=True)
 
     # Extract individual frames at specific timestamps, including the last frame of the video.
-    seek_ts = [6.0, 6.1, 10.0, 12.979633]
-    ts_name = [f"{seek_ts:06f}" for seek_ts in seek_ts]
-    for ts, name in zip(seek_ts, ts_name):
-        cmd = f"ffmpeg -y -ss {ts} -i {VIDEO_PATH} -frames:v 1 {VIDEO_PATH}.time{name}.bmp"
-        subprocess.run(cmd.split(), check=True)
+    seek_timestamp = [6.0, 6.1, 10.0, 12.979633]
+    timestamp_name = [f"{seek_timestamp:06f}" for seek_timestamp in seek_timestamp]
+    for timestamp, name in zip(seek_timestamp, timestamp_name):
+        cmd = [
+            "ffmpeg",
+            "-y",
+            "-ss",
+            str(timestamp),
+            "-i",
+            VIDEO_PATH,
+            "-frames:v",
+            "1",
+            f"{VIDEO_PATH}.time{name}.bmp",
+        ]
+        subprocess.run(cmd, check=True)
 
     # This video was generated by running the following:
     # conda install -c conda-forge x265
@@ -51,8 +75,20 @@ def main():
     FRAMES = [5]
     for frame in FRAMES:
         frame_name = f"{frame:06d}"
-        cmd = f"""ffmpeg -y -i {VIDEO_PATH} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream0.frame{frame_name}.bmp"""
-        subprocess.run(cmd.split(), check=True)
+        cmd = [
+            "ffmpeg",
+            "-y",
+            "-i",
+            VIDEO_PATH,
+            "-vf",
+            f"select=eq(n\\,{frame})",
+            "-vsync",
+            "vfr",
+            "-q:v",
+            "2",
+            f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp",
+        ]
+        subprocess.run(cmd, check=True)
 
     # This video was generated by running the following:
     # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv
@@ -62,8 +98,20 @@ def main():
 
     for frame in FRAMES:
         frame_name = f"{frame:06d}"
-        cmd = f"""ffmpeg -y -i {VIDEO_PATH} -vf select=eq(n\\,{frame}) -vsync vfr -q:v 2 {VIDEO_PATH}.stream0.frame{frame_name}.bmp"""
-        subprocess.run(cmd.split(), check=True)
+        cmd = [
+            "ffmpeg",
+            "-y",
+            "-i",
+            VIDEO_PATH,
+            "-vf",
+            f"select=eq(n\\,{frame})",
+            "-vsync",
+            "vfr",
+            "-q:v",
+            "2",
+            f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp",
+        ]
+        subprocess.run(cmd, check=True)
 
     for bmp in [f for f in os.listdir(RESOURCES_DIR) if f.endswith(".bmp")]:
         bmp_path = os.path.join(RESOURCES_DIR, bmp)

From 6c96b2e4a6c40ea02537f8927783182a16b5bb5d Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@fb.com>
Date: Fri, 22 Aug 2025 15:11:20 -0400
Subject: [PATCH 07/10] Delete convert_image_to_tensor script

---
 test/convert_image_to_tensor.py      | 24 --------------
 test/generate_reference_resources.py | 47 ++++++++++++++++++----------
 2 files changed, 31 insertions(+), 40 deletions(-)
 delete mode 100644 test/convert_image_to_tensor.py

diff --git a/test/convert_image_to_tensor.py b/test/convert_image_to_tensor.py
deleted file mode 100644
index 9a03288c8..000000000
--- a/test/convert_image_to_tensor.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-import os
-import sys
-
-import numpy as np
-
-import torch
-from PIL import Image
-
-if __name__ == "__main__":
-    img_file = sys.argv[1]
-    # Get base filename without extension
-    base_filename = os.path.splitext(img_file)[0]
-    pil_image = Image.open(img_file)
-    img_tensor = torch.from_numpy(np.asarray(pil_image))
-    print(img_tensor.shape)
-    print(img_tensor.dtype)
-    # Save tensor to disk
-    torch.save(img_tensor, base_filename + ".pt", _use_new_zipfile_serialization=True)
diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py
index 3b6f51add..8fae6c2f2 100644
--- a/test/generate_reference_resources.py
+++ b/test/generate_reference_resources.py
@@ -1,7 +1,11 @@
 #!/usr/bin/env python3
 import os
 import subprocess
-import sys
+
+import numpy as np
+
+import torch
+from PIL import Image
 
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
@@ -13,9 +17,23 @@
 # from source media already checked into the repo.
 
 
+def convert_image_to_tensor(image_path):
+    if not os.path.exists(image_path):
+        return
+    # Get base filename without extension
+    base_filename = os.path.splitext(image_path)[0]
+    pil_image = Image.open(image_path)
+    img_tensor = torch.from_numpy(np.asarray(pil_image))
+    print(img_tensor.shape)
+    print(img_tensor.dtype)
+    # Save tensor to disk
+    torch.save(img_tensor, base_filename + ".pt", _use_new_zipfile_serialization=True)
+    os.remove(image_path)
+
+
 def main():
     SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-    TORCHCODEC_PATH = os.path.join(SCRIPT_DIR, "..")
+    TORCHCODEC_PATH = os.path.dirname(SCRIPT_DIR)
     RESOURCES_DIR = os.path.join(TORCHCODEC_PATH, "test", "resources")
     VIDEO_PATH = os.path.join(RESOURCES_DIR, "nasa_13013.mp4")
 
@@ -31,6 +49,7 @@ def main():
             # frames would result in 1-based index naming. We enforce 0-based index naming
             # so that the name of reference frames matches the index when accessing that
             # frame in the Python decoder.
+            output_bmp = f"{VIDEO_PATH}.stream{stream}.frame{frame:06d}.bmp"
             frame_name = f"{frame:06d}"
             cmd = [
                 "ffmpeg",
@@ -45,14 +64,16 @@ def main():
                 "vfr",
                 "-q:v",
                 "2",
-                f"{VIDEO_PATH}.stream{stream}.frame{frame_name}.bmp",
+                output_bmp,
             ]
             subprocess.run(cmd, check=True)
+            convert_image_to_tensor(output_bmp)
 
     # Extract individual frames at specific timestamps, including the last frame of the video.
     seek_timestamp = [6.0, 6.1, 10.0, 12.979633]
     timestamp_name = [f"{seek_timestamp:06f}" for seek_timestamp in seek_timestamp]
     for timestamp, name in zip(seek_timestamp, timestamp_name):
+        output_bmp = f"{VIDEO_PATH}.time{name}.bmp"
         cmd = [
             "ffmpeg",
             "-y",
@@ -65,6 +86,7 @@ def main():
             f"{VIDEO_PATH}.time{name}.bmp",
         ]
         subprocess.run(cmd, check=True)
+        convert_image_to_tensor(output_bmp)
 
     # This video was generated by running the following:
     # conda install -c conda-forge x265
@@ -75,6 +97,7 @@ def main():
     FRAMES = [5]
     for frame in FRAMES:
         frame_name = f"{frame:06d}"
+        output_bmp = f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp"
         cmd = [
             "ffmpeg",
             "-y",
@@ -86,9 +109,10 @@ def main():
             "vfr",
             "-q:v",
             "2",
-            f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp",
+            output_bmp,
         ]
         subprocess.run(cmd, check=True)
+        convert_image_to_tensor(output_bmp)
 
     # This video was generated by running the following:
     # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv
@@ -98,6 +122,7 @@ def main():
 
     for frame in FRAMES:
         frame_name = f"{frame:06d}"
+        output_bmp = f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp"
         cmd = [
             "ffmpeg",
             "-y",
@@ -109,20 +134,10 @@ def main():
             "vfr",
             "-q:v",
             "2",
-            f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp",
+            output_bmp,
         ]
         subprocess.run(cmd, check=True)
-
-    for bmp in [f for f in os.listdir(RESOURCES_DIR) if f.endswith(".bmp")]:
-        bmp_path = os.path.join(RESOURCES_DIR, bmp)
-        subprocess.run(
-            [
-                sys.executable,
-                os.path.join(TORCHCODEC_PATH, "test", "convert_image_to_tensor.py"),
-                bmp_path,
-            ]
-        )
-        os.remove(bmp_path)
+        convert_image_to_tensor(output_bmp)
 
 
 if __name__ == "__main__":

From 9148c525f4b587572875f97a040b87a034ac6152 Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@fb.com>
Date: Tue, 26 Aug 2025 16:01:55 -0400
Subject: [PATCH 08/10] reflect comments

---
 test/generate_reference_resources.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py
index 8fae6c2f2..df6beaf54 100644
--- a/test/generate_reference_resources.py
+++ b/test/generate_reference_resources.py
@@ -1,4 +1,9 @@
-#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
 import os
 import subprocess
 
@@ -7,12 +12,6 @@
 import torch
 from PIL import Image
 
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
 # Run this script to update the resources used in unit tests. The resources are all derived
 # from source media already checked into the repo.
 
@@ -24,8 +23,6 @@ def convert_image_to_tensor(image_path):
     base_filename = os.path.splitext(image_path)[0]
     pil_image = Image.open(image_path)
     img_tensor = torch.from_numpy(np.asarray(pil_image))
-    print(img_tensor.shape)
-    print(img_tensor.dtype)
     # Save tensor to disk
     torch.save(img_tensor, base_filename + ".pt", _use_new_zipfile_serialization=True)
     os.remove(image_path)

From 390da5aad5ee4db3542cb7caceaf41d7bd33ddd7 Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@fb.com>
Date: Tue, 26 Aug 2025 16:28:36 -0400
Subject: [PATCH 09/10] Factor out shared ffmpeg functions

---
 test/generate_reference_resources.py | 101 +++++++++++----------------
 1 file changed, 40 insertions(+), 61 deletions(-)

diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py
index df6beaf54..18c6299d3 100644
--- a/test/generate_reference_resources.py
+++ b/test/generate_reference_resources.py
@@ -28,6 +28,40 @@ def convert_image_to_tensor(image_path):
     os.remove(image_path)
 
 
+def get_frame_by_index(video_path, frame, output_path, stream):
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-i",
+        video_path,
+        "-map",
+        f"0:{stream}",
+        "-vf",
+        f"select=eq(n\\,{frame})",
+        "-vsync",
+        "vfr",
+        "-q:v",
+        "2",
+        output_path,
+    ]
+    subprocess.run(cmd, check=True)
+
+
+def get_frame_by_timestamp(video_path, timestamp, output_path):
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-ss",
+        str(timestamp),
+        "-i",
+        video_path,
+        "-frames:v",
+        "1",
+        output_path,
+    ]
+    subprocess.run(cmd, check=True)
+
+
 def main():
     SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
     TORCHCODEC_PATH = os.path.dirname(SCRIPT_DIR)
@@ -47,23 +81,7 @@ def main():
             # so that the name of reference frames matches the index when accessing that
             # frame in the Python decoder.
             output_bmp = f"{VIDEO_PATH}.stream{stream}.frame{frame:06d}.bmp"
-            frame_name = f"{frame:06d}"
-            cmd = [
-                "ffmpeg",
-                "-y",
-                "-i",
-                VIDEO_PATH,
-                "-map",
-                f"0:{stream}",
-                "-vf",
-                f"select=eq(n\\,{frame})",
-                "-vsync",
-                "vfr",
-                "-q:v",
-                "2",
-                output_bmp,
-            ]
-            subprocess.run(cmd, check=True)
+            get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=stream)
             convert_image_to_tensor(output_bmp)
 
     # Extract individual frames at specific timestamps, including the last frame of the video.
@@ -71,18 +89,7 @@ def main():
     timestamp_name = [f"{seek_timestamp:06f}" for seek_timestamp in seek_timestamp]
     for timestamp, name in zip(seek_timestamp, timestamp_name):
         output_bmp = f"{VIDEO_PATH}.time{name}.bmp"
-        cmd = [
-            "ffmpeg",
-            "-y",
-            "-ss",
-            str(timestamp),
-            "-i",
-            VIDEO_PATH,
-            "-frames:v",
-            "1",
-            f"{VIDEO_PATH}.time{name}.bmp",
-        ]
-        subprocess.run(cmd, check=True)
+        get_frame_by_timestamp(VIDEO_PATH, timestamp, output_bmp)
         convert_image_to_tensor(output_bmp)
 
     # This video was generated by running the following:
@@ -93,22 +100,8 @@ def main():
     VIDEO_PATH = os.path.join(RESOURCES_DIR, "h265_video.mp4")
     FRAMES = [5]
     for frame in FRAMES:
-        frame_name = f"{frame:06d}"
-        output_bmp = f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp"
-        cmd = [
-            "ffmpeg",
-            "-y",
-            "-i",
-            VIDEO_PATH,
-            "-vf",
-            f"select=eq(n\\,{frame})",
-            "-vsync",
-            "vfr",
-            "-q:v",
-            "2",
-            output_bmp,
-        ]
-        subprocess.run(cmd, check=True)
+        output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp"
+        get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0)
         convert_image_to_tensor(output_bmp)
 
     # This video was generated by running the following:
@@ -118,22 +111,8 @@ def main():
     FRAMES = [10]
 
     for frame in FRAMES:
-        frame_name = f"{frame:06d}"
-        output_bmp = f"{VIDEO_PATH}.stream0.frame{frame_name}.bmp"
-        cmd = [
-            "ffmpeg",
-            "-y",
-            "-i",
-            VIDEO_PATH,
-            "-vf",
-            f"select=eq(n\\,{frame})",
-            "-vsync",
-            "vfr",
-            "-q:v",
-            "2",
-            output_bmp,
-        ]
-        subprocess.run(cmd, check=True)
+        output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp"
+        get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0)
         convert_image_to_tensor(output_bmp)
 
 

From de74ae764dee8784b0c26211960595d8f601b923 Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@fb.com>
Date: Tue, 26 Aug 2025 16:52:18 -0400
Subject: [PATCH 10/10] Update os to Path

---
 test/generate_reference_resources.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py
index 18c6299d3..5ae062111 100644
--- a/test/generate_reference_resources.py
+++ b/test/generate_reference_resources.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-import os
 import subprocess
+from pathlib import Path
 
 import numpy as np
 
@@ -17,15 +17,18 @@
 
 
 def convert_image_to_tensor(image_path):
-    if not os.path.exists(image_path):
+    image_path = Path(image_path)
+    if not image_path.exists():
         return
     # Get base filename without extension
-    base_filename = os.path.splitext(image_path)[0]
+    base_filename = image_path.with_suffix("")
     pil_image = Image.open(image_path)
     img_tensor = torch.from_numpy(np.asarray(pil_image))
     # Save tensor to disk
-    torch.save(img_tensor, base_filename + ".pt", _use_new_zipfile_serialization=True)
-    os.remove(image_path)
+    torch.save(
+        img_tensor, str(base_filename) + ".pt", _use_new_zipfile_serialization=True
+    )
+    image_path.unlink()
 
 
 def get_frame_by_index(video_path, frame, output_path, stream):
@@ -63,10 +66,10 @@ def get_frame_by_timestamp(video_path, timestamp, output_path):
 
 
 def main():
-    SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-    TORCHCODEC_PATH = os.path.dirname(SCRIPT_DIR)
-    RESOURCES_DIR = os.path.join(TORCHCODEC_PATH, "test", "resources")
-    VIDEO_PATH = os.path.join(RESOURCES_DIR, "nasa_13013.mp4")
+    SCRIPT_DIR = Path(__file__).resolve().parent
+    TORCHCODEC_PATH = SCRIPT_DIR.parent
+    RESOURCES_DIR = TORCHCODEC_PATH / "test" / "resources"
+    VIDEO_PATH = RESOURCES_DIR / "nasa_13013.mp4"
 
     # Last generated with ffmpeg version 4.3
     #
@@ -97,7 +100,7 @@ def main():
     # ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265  --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz
     # ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y
     # Note that this video only has 1 stream, at index 0.
-    VIDEO_PATH = os.path.join(RESOURCES_DIR, "h265_video.mp4")
+    VIDEO_PATH = RESOURCES_DIR / "h265_video.mp4"
     FRAMES = [5]
     for frame in FRAMES:
         output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp"
@@ -107,7 +110,7 @@ def main():
     # This video was generated by running the following:
     # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv
     # Note that this video only has 1 stream, at index 0.
-    VIDEO_PATH = os.path.join(RESOURCES_DIR, "av1_video.mkv")
+    VIDEO_PATH = RESOURCES_DIR / "av1_video.mkv"
     FRAMES = [10]
 
     for frame in FRAMES: