diff --git a/.github/workflows/reference_resources.yaml b/.github/workflows/reference_resources.yaml
index 7471134ae..f847720b7 100644
--- a/.github/workflows/reference_resources.yaml
+++ b/.github/workflows/reference_resources.yaml
@@ -4,7 +4,7 @@ on:
   workflow_dispatch:
   pull_request:
     paths:
-      - test/generate_reference_resources.sh
+      - test/generate_reference_resources.py
       - .github/workflows/reference_resources.yaml # self reference
   schedule:
     - cron: '0 0 * * 0'  # on sunday
@@ -38,7 +38,7 @@ jobs:
       - name: Update pip
         run: python -m pip install --upgrade pip
 
-      - name: Instal generation dependencies
+      - name: Install generation dependencies
         run: |
           # Note that we're installing stable - this is for running a script where we're a normal PyTorch
           # user, not for building TorhCodec.
@@ -50,4 +50,4 @@ jobs:
 
       - name: Run generation reference resources
         run: |
-          test/generate_reference_resources.sh
+          python test/generate_reference_resources.py
diff --git a/test/convert_image_to_tensor.py b/test/convert_image_to_tensor.py
deleted file mode 100644
index 9a03288c8..000000000
--- a/test/convert_image_to_tensor.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-import os
-import sys
-
-import numpy as np
-
-import torch
-from PIL import Image
-
-if __name__ == "__main__":
-    img_file = sys.argv[1]
-    # Get base filename without extension
-    base_filename = os.path.splitext(img_file)[0]
-    pil_image = Image.open(img_file)
-    img_tensor = torch.from_numpy(np.asarray(pil_image))
-    print(img_tensor.shape)
-    print(img_tensor.dtype)
-    # Save tensor to disk
-    torch.save(img_tensor, base_filename + ".pt", _use_new_zipfile_serialization=True)
diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py
new file mode 100644
index 000000000..5ae062111
--- /dev/null
+++ b/test/generate_reference_resources.py
@@ -0,0 +1,123 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import subprocess
+from pathlib import Path
+
+import numpy as np
+
+import torch
+from PIL import Image
+
+# Run this script to update the resources used in unit tests. The resources are all derived
+# from source media already checked into the repo.
+
+
+def convert_image_to_tensor(image_path):
+    image_path = Path(image_path)
+    if not image_path.exists():
+        return
+    # Get base filename without extension
+    base_filename = image_path.with_suffix("")
+    pil_image = Image.open(image_path)
+    img_tensor = torch.from_numpy(np.asarray(pil_image))
+    # Save tensor to disk
+    torch.save(
+        img_tensor, str(base_filename) + ".pt", _use_new_zipfile_serialization=True
+    )
+    image_path.unlink()
+
+
+def get_frame_by_index(video_path, frame, output_path, stream):
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-i",
+        video_path,
+        "-map",
+        f"0:{stream}",
+        "-vf",
+        f"select=eq(n\\,{frame})",
+        "-vsync",
+        "vfr",
+        "-q:v",
+        "2",
+        output_path,
+    ]
+    subprocess.run(cmd, check=True)
+
+
+def get_frame_by_timestamp(video_path, timestamp, output_path):
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-ss",
+        str(timestamp),
+        "-i",
+        video_path,
+        "-frames:v",
+        "1",
+        output_path,
+    ]
+    subprocess.run(cmd, check=True)
+
+
+def main():
+    SCRIPT_DIR = Path(__file__).resolve().parent
+    TORCHCODEC_PATH = SCRIPT_DIR.parent
+    RESOURCES_DIR = TORCHCODEC_PATH / "test" / "resources"
+    VIDEO_PATH = RESOURCES_DIR / "nasa_13013.mp4"
+
+    # Last generated with ffmpeg version 4.3
+    #
+    # Note: The naming scheme used here must match the naming scheme used to load
+    # tensors in ./utils.py.
+    STREAMS = [0, 3]
+    FRAMES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 20, 25, 30, 35, 386, 387, 388, 389]
+    for stream in STREAMS:
+        for frame in FRAMES:
+            # Note that we are using 0-based index naming. Asking ffmpeg to number output
+            # frames would result in 1-based index naming. We enforce 0-based index naming
+            # so that the name of reference frames matches the index when accessing that
+            # frame in the Python decoder.
+            output_bmp = f"{VIDEO_PATH}.stream{stream}.frame{frame:06d}.bmp"
+            get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=stream)
+            convert_image_to_tensor(output_bmp)
+
+    # Extract individual frames at specific timestamps, including the last frame of the video.
+    seek_timestamp = [6.0, 6.1, 10.0, 12.979633]
+    timestamp_name = [f"{seek_timestamp:06f}" for seek_timestamp in seek_timestamp]
+    for timestamp, name in zip(seek_timestamp, timestamp_name):
+        output_bmp = f"{VIDEO_PATH}.time{name}.bmp"
+        get_frame_by_timestamp(VIDEO_PATH, timestamp, output_bmp)
+        convert_image_to_tensor(output_bmp)
+
+    # This video was generated by running the following:
+    # conda install -c conda-forge x265
+    # ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265  --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz
+    # ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y
+    # Note that this video only has 1 stream, at index 0.
+    VIDEO_PATH = RESOURCES_DIR / "h265_video.mp4"
+    FRAMES = [5]
+    for frame in FRAMES:
+        output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp"
+        get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0)
+        convert_image_to_tensor(output_bmp)
+
+    # This video was generated by running the following:
+    # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv
+    # Note that this video only has 1 stream, at index 0.
+    VIDEO_PATH = RESOURCES_DIR / "av1_video.mkv"
+    FRAMES = [10]
+
+    for frame in FRAMES:
+        output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp"
+        get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0)
+        convert_image_to_tensor(output_bmp)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/generate_reference_resources.sh b/test/generate_reference_resources.sh
deleted file mode 100755
index fba098a75..000000000
--- a/test/generate_reference_resources.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-# Run this script to update the resources used in unit tests. The resources are all derived
-# from source media already checked into the repo.
-
-# Fail loudly on errors.
-set -x
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-TORCHCODEC_PATH=$SCRIPT_DIR/../
-RESOURCES_DIR=$TORCHCODEC_PATH/test/resources
-VIDEO_PATH=$RESOURCES_DIR/nasa_13013.mp4
-
-# Last generated with ffmpeg version 4.3
-#
-# Note: The naming scheme used here must match the naming scheme used to load
-# tensors in ./utils.py.
-STREAMS=(0 3)
-FRAMES=(0 1 2 3 4 5 6 7 8 9)
-FRAMES+=(15 20 25 30 35)
-FRAMES+=(386 387 388 389)
-for stream in "${STREAMS[@]}"; do
-  for frame in "${FRAMES[@]}"; do
-    # Note that we are using 0-based index naming. Asking ffmpeg to number output
-    # frames would result in 1-based index naming. We enforce 0-based index naming
-    # so that the name of reference frames matches the index when accessing that
-    # frame in the Python decoder.
-    frame_name=$(printf "%06d" "$frame")
-    ffmpeg -y -i "$VIDEO_PATH" -map 0:"$stream" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.stream$stream.frame$frame_name.bmp"
-  done
-done
-ffmpeg -y -ss 6.0 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time6.000000.bmp"
-ffmpeg -y -ss 6.1 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time6.100000.bmp"
-ffmpeg -y -ss 10.0 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time10.000000.bmp"
-# This is the last frame of this video.
-ffmpeg -y -ss 12.979633 -i "$VIDEO_PATH" -frames:v 1 "$VIDEO_PATH.time12.979633.bmp"
-# Audio generation in the form of an mp3.
-ffmpeg -y -i "$VIDEO_PATH" -b:a 192K -vn "$VIDEO_PATH.audio.mp3"
-
-# This video was generated by running the following:
-# conda install -c conda-forge x265
-# ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265  --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz
-# ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y
-# Note that this video only has 1 stream, at index 0.
-VIDEO_PATH=$RESOURCES_DIR/h265_video.mp4
-FRAMES=(5)
-for frame in "${FRAMES[@]}"; do
-  frame_name=$(printf "%06d" "$frame")
-  ffmpeg -y -i "$VIDEO_PATH" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.stream0.frame$frame_name.bmp"
-done
-
-for bmp in "$RESOURCES_DIR"/*.bmp
-do
-  python3 "$TORCHCODEC_PATH/test/convert_image_to_tensor.py" "$bmp"
-  rm -f "$bmp"
-done
-
-# This video was generated by running the following:
-# ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv
-# Note that this video only has 1 stream, at index 0.
-VIDEO_PATH=$RESOURCES_DIR/av1_video.mkv
-FRAMES=(10)
-for frame in "${FRAMES[@]}"; do
-  frame_name=$(printf "%06d" "$frame")
-  ffmpeg -y -i "$VIDEO_PATH" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.stream0.frame$frame_name.bmp"
-done
-
-for bmp in "$RESOURCES_DIR"/*.bmp
-do
-  python3 "$TORCHCODEC_PATH/test/convert_image_to_tensor.py" "$bmp"
-  rm -f "$bmp"
-done
diff --git a/test/utils.py b/test/utils.py
index ed611cfda..d2f500533 100644
--- a/test/utils.py
+++ b/test/utils.py
@@ -517,6 +517,8 @@ def sample_format(self) -> str:
         return self.stream_infos[self.default_stream_index].sample_format
 
 
+# This file was generated with:
+# ffmpeg -y -i test/resources/nasa_13013.mp4 -b:a 192K -vn test/resources/nasa_13013.mp4.audio.mp3"
 NASA_AUDIO_MP3 = TestAudio(
     filename="nasa_13013.mp4.audio.mp3",
     default_stream_index=0,