From 85d4fc15ab7bc4c3c78680415faf600714aecefd Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 20 Mar 2025 17:21:08 +0000 Subject: [PATCH 1/2] Debug --- test/decoders/test_decoders.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/decoders/test_decoders.py b/test/decoders/test_decoders.py index 6885b51a7..509df666b 100644 --- a/test/decoders/test_decoders.py +++ b/test/decoders/test_decoders.py @@ -1157,6 +1157,12 @@ def test_sample_rate_conversion(self, start_seconds, stop_seconds): rtol=rtol, ) + def test_upsample(self): + asset = NASA_AUDIO_MP3 + assert asset.sample_rate == 8000 + decoder = AudioDecoder(asset.path, sample_rate=44100) + decoder.get_samples_played_in_range(start_seconds=0) + def test_s16_ffmpeg4_bug(self): # s16 fails on FFmpeg4 but can be decoded on other versions. # Debugging logs show that we're hitting: From cfb190b6567e0544ae463f8620755734b5be85f8 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 20 Mar 2025 18:59:43 +0000 Subject: [PATCH 2/2] Fix sample rate conversion bug with multi-channel data --- src/torchcodec/decoders/_core/VideoDecoder.cpp | 15 ++++++++++----- test/decoders/test_decoders.py | 6 ++++-- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp index f451ee580..3b0e5b019 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp @@ -1522,17 +1522,22 @@ std::optional VideoDecoder::maybeFlushSwrBuffers() { return std::nullopt; } - torch::Tensor lastSamples = torch::empty( - {getNumChannels(streamInfo.codecContext), numRemainingSamples}, - torch::kFloat32); - uint8_t* lastSamplesData = static_cast(lastSamples.data_ptr()); + auto numChannels = getNumChannels(streamInfo.codecContext); + torch::Tensor lastSamples = + torch::empty({numChannels, numRemainingSamples}, torch::kFloat32); + + std::vector outputBuffers(numChannels); + for (auto i = 0; i < numChannels; i++) { + outputBuffers[i] = static_cast(lastSamples[i].data_ptr()); + } auto actualNumRemainingSamples = swr_convert( streamInfo.swrContext.get(), - &lastSamplesData, + outputBuffers.data(), numRemainingSamples, nullptr, 0); + return lastSamples.narrow( /*dim=*/1, /*start=*/0, /*length=*/actualNumRemainingSamples); } diff --git a/test/decoders/test_decoders.py b/test/decoders/test_decoders.py index 509df666b..e63e4cd1d 100644 --- a/test/decoders/test_decoders.py +++ b/test/decoders/test_decoders.py @@ -1157,10 +1157,12 @@ def test_sample_rate_conversion(self, start_seconds, stop_seconds): rtol=rtol, ) - def test_upsample(self): + def test_sample_rate_conversion_stereo(self): + # Non-regression test for https://github.com/pytorch/torchcodec/pull/584 asset = NASA_AUDIO_MP3 assert asset.sample_rate == 8000 - decoder = AudioDecoder(asset.path, sample_rate=44100) + assert asset.num_channels == 2 + decoder = AudioDecoder(asset.path, sample_rate=44_100) decoder.get_samples_played_in_range(start_seconds=0) def test_s16_ffmpeg4_bug(self):