Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions test/decoders/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import io
import os
from functools import partial

Expand Down Expand Up @@ -922,6 +923,76 @@ def get_all_frames(asset, sample_rate=None, stop_seconds=None):

torch.testing.assert_close(frames_downsampled_to_8000, frames_8000_native)

@pytest.mark.parametrize("buffering", (0, 1024))
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_file_like_decoding(self, buffering, device):
# Test to ensure that seeks and reads are actually going through the
# methods on the IO object.
#
# Note that we do not check the number of reads in this test past the
# initialization step. That is because the number of reads that FFmpeg
# issues is dependent on the size of the internal buffer, the amount of
# data per frame and the size of the video file. We can't control
# the size of the buffer from the Python layer and we don't know the
# amount of data per frame. We also can't know the amount of data per
# frame from first principles, because it is data-depenent.
Comment on lines +937 to +938
Copy link
Contributor

@NicolasHug NicolasHug Apr 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could store the number of samples of each frames in the .json files that we check in. Not sure that would be enough if the number of reads still depends on the internal FFmpeg buffers, as you mentioned.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not even sure that would be enough to calculate the number of bytes decoded for each frame; I think we'd be going down the path of tracking the encoded data itself, which I don't think we should do. And that still wouldn't get us the determinism we need, as there's the internal buffers.

class FileOpCounter(io.RawIOBase):

def __init__(self, file: io.RawIOBase):
self._file = file
self.num_seeks = 0
self.num_reads = 0

def read(self, size: int) -> bytes:
self.num_reads += 1
return self._file.read(size)

def seek(self, offset: int, whence: int) -> bytes:
self.num_seeks += 1
return self._file.seek(offset, whence)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just noting this so we don't forget, we should test for proper errors if the user provides a malformed object that e.g. doesn't support read() or seek() correctly. E.g. if read() doesn't exist, right now we only error:

    def create_from_file_like(
        file_like: Union[io.RawIOBase, io.BytesIO], seek_mode: Optional[str] = None
    ) -> torch.Tensor:
        assert _pybind_ops is not None
>       return _convert_to_tensor(_pybind_ops.create_from_file_like(file_like, seek_mode))
E       NotImplementedError

which isn't super informative. I guess the check can be done on the public Pythons side rather than on the ops?

Also, we should try to hit some of the error checks, e.g. catching in the tests that we properly raise https://github.com/pytorch/torchcodec/blob/f416dcf8cb709d679cd739d2a56a761d8658468f/src/torchcodec/decoders/_core/AVIOFileLikeContext.cpp#L53-L59 when doing something bad like

return self._file.read(size + 10)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Excellent point. I'll put up another PR with such a test.


file_counter = FileOpCounter(
open(NASA_VIDEO.path, mode="rb", buffering=buffering)
)
decoder = create_from_file_like(file_counter, "approximate")
add_video_stream(decoder, device=device)

frame0, *_ = get_next_frame(decoder)
reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0)
assert_frames_equal(frame0, reference_frame0.to(device))

# We don't assert the actual number of reads and seeks because that is
# dependent on both the size of the internal buffers on the C++ side and
# how much is read during initialization. Note that we still decode
# several frames at startup to improve metadata accuracy.
assert file_counter.num_seeks > 0
assert file_counter.num_reads > 0

initialization_seeks = file_counter.num_seeks

seek_to_pts(decoder, 12.979633)

frame_last, *_ = get_next_frame(decoder)
reference_frame_last = NASA_VIDEO.get_frame_data_by_index(289)
assert_frames_equal(frame_last, reference_frame_last.to(device))

assert file_counter.num_seeks > initialization_seeks

last_frame_seeks = file_counter.num_seeks

# We're smart enough to avoid seeks within key frames and our test
# files have very few keyframes. However, we can force a seek by
# requesting a backwards seek.
seek_to_pts(decoder, 6.0)

frame_time6, *_ = get_next_frame(decoder)
reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index(
INDEX_OF_FRAME_AT_6_SECONDS
)
assert_frames_equal(frame_time6, reference_frame_time6.to(device))

assert file_counter.num_seeks > last_frame_seeks


if __name__ == "__main__":
pytest.main()
Loading