From d83491dd4f58891daa0ed778ceafdc52f92022b6 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 23 Jul 2025 00:12:24 -0400 Subject: [PATCH 1/2] consistently use ptsToSeconds --- src/torchcodec/_core/SingleStreamDecoder.cpp | 23 ++++++++++---------- test/test_ops.py | 4 ++-- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp index 8174fd7d6..1e9289a7a 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.cpp +++ b/src/torchcodec/_core/SingleStreamDecoder.cpp @@ -18,7 +18,7 @@ namespace facebook::torchcodec { namespace { double ptsToSeconds(int64_t pts, const AVRational& timeBase) { - return static_cast(pts) * timeBase.num / timeBase.den; + return static_cast(pts) * av_q2d(timeBase); } int64_t secondsToClosestPts(double seconds, const AVRational& timeBase) { @@ -129,11 +129,11 @@ void SingleStreamDecoder::initializeDecoder() { if (avStream->duration > 0 && avStream->time_base.den > 0) { streamMetadata.durationSecondsFromHeader = - av_q2d(avStream->time_base) * avStream->duration; + ptsToSeconds(avStream->duration, avStream->time_base); } if (avStream->start_time != AV_NOPTS_VALUE) { streamMetadata.beginStreamSecondsFromHeader = - av_q2d(avStream->time_base) * avStream->start_time; + ptsToSeconds(avStream->start_time, avStream->time_base); } if (avStream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { @@ -304,13 +304,12 @@ void SingleStreamDecoder::scanFileAndUpdateMetadataAndIndex() { streamInfos_[streamIndex].allFrames.size(); if (streamMetadata.beginStreamPtsFromContent.has_value()) { - streamMetadata.beginStreamPtsSecondsFromContent = - *streamMetadata.beginStreamPtsFromContent * - av_q2d(avStream->time_base); + streamMetadata.beginStreamPtsSecondsFromContent = ptsToSeconds( + *streamMetadata.beginStreamPtsFromContent, avStream->time_base); } if (streamMetadata.endStreamPtsFromContent.has_value()) { - streamMetadata.endStreamPtsSecondsFromContent = - *streamMetadata.endStreamPtsFromContent * av_q2d(avStream->time_base); + streamMetadata.endStreamPtsSecondsFromContent = ptsToSeconds( + *streamMetadata.endStreamPtsFromContent, avStream->time_base); } } @@ -344,11 +343,11 @@ void SingleStreamDecoder::readCustomFrameMappingsUpdateMetadataAndIndex( all_frames[-1].item() + duration[-1].item(); auto avStream = formatContext_->streams[streamIndex]; - streamMetadata.beginStreamPtsSecondsFromContent = - *streamMetadata.beginStreamPtsFromContent * av_q2d(avStream->time_base); + streamMetadata.beginStreamPtsSecondsFromContent = ptsToSeconds( + *streamMetadata.beginStreamPtsFromContent, avStream->time_base); - streamMetadata.endStreamPtsSecondsFromContent = - *streamMetadata.endStreamPtsFromContent * av_q2d(avStream->time_base); + streamMetadata.endStreamPtsSecondsFromContent = ptsToSeconds( + *streamMetadata.endStreamPtsFromContent, avStream->time_base); streamMetadata.numFramesFromContent = all_frames.size(0); for (int64_t i = 0; i < all_frames.size(0); ++i) { diff --git a/test/test_ops.py b/test/test_ops.py index 2b0e7801a..792658f0e 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -751,7 +751,7 @@ def test_next(self, asset): frame, asset.get_frame_data_by_index(frame_index) ) frame_info = asset.get_frame_info(frame_index) - assert pts_seconds == frame_info.pts_seconds + assert pts_seconds == pytest.approx(frame_info.pts_seconds) assert duration_seconds == frame_info.duration_seconds frame_index += 1 @@ -955,7 +955,7 @@ def test_pts(self, asset): frames, asset.get_frame_data_by_index(frame_index) ) - assert pts_seconds == start_seconds + assert pts_seconds == pytest.approx(start_seconds) def test_sample_rate_conversion(self): def get_all_frames(asset, sample_rate=None, stop_seconds=None): From fffd4999ce59f81fd1c411612409da5832eaf45a Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 25 Jul 2025 13:34:16 -0400 Subject: [PATCH 2/2] use EXPECT_DOUBLE_EQ for ptsSeconds --- test/VideoDecoderTest.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/VideoDecoderTest.cpp b/test/VideoDecoderTest.cpp index 0c21f0d46..88ffc94cd 100644 --- a/test/VideoDecoderTest.cpp +++ b/test/VideoDecoderTest.cpp @@ -267,9 +267,9 @@ TEST_P(SingleStreamDecoderTest, SeeksCloseToEof) { ourDecoder->addVideoStream(-1); ourDecoder->setCursorPtsInSeconds(388388. / 30'000); auto output = ourDecoder->getNextFrame(); - EXPECT_EQ(output.ptsSeconds, 388'388. / 30'000); + EXPECT_DOUBLE_EQ(output.ptsSeconds, 388'388. / 30'000); output = ourDecoder->getNextFrame(); - EXPECT_EQ(output.ptsSeconds, 389'389. / 30'000); + EXPECT_DOUBLE_EQ(output.ptsSeconds, 389'389. / 30'000); EXPECT_THROW(ourDecoder->getNextFrame(), std::exception); } @@ -300,7 +300,7 @@ TEST_P(SingleStreamDecoderTest, GetsFramePlayedAtTimestamp) { // Sanity check: make sure duration is strictly positive. EXPECT_GT(kPtsPlusDurationOfLastFrame, kPtsOfLastFrameInVideoStream); output = ourDecoder->getFramePlayedAt(kPtsPlusDurationOfLastFrame - 1e-6); - EXPECT_EQ(output.ptsSeconds, kPtsOfLastFrameInVideoStream); + EXPECT_DOUBLE_EQ(output.ptsSeconds, kPtsOfLastFrameInVideoStream); } TEST_P(SingleStreamDecoderTest, SeeksToFrameWithSpecificPts) { @@ -311,7 +311,7 @@ TEST_P(SingleStreamDecoderTest, SeeksToFrameWithSpecificPts) { ourDecoder->setCursorPtsInSeconds(6.0); auto output = ourDecoder->getNextFrame(); torch::Tensor tensor6FromOurDecoder = output.data; - EXPECT_EQ(output.ptsSeconds, 180'180. / 30'000); + EXPECT_DOUBLE_EQ(output.ptsSeconds, 180'180. / 30'000); torch::Tensor tensor6FromFFMPEG = readTensorFromDisk("nasa_13013.mp4.time6.000000.pt"); EXPECT_TRUE(torch::equal(tensor6FromOurDecoder, tensor6FromFFMPEG)); @@ -327,7 +327,7 @@ TEST_P(SingleStreamDecoderTest, SeeksToFrameWithSpecificPts) { ourDecoder->setCursorPtsInSeconds(6.1); output = ourDecoder->getNextFrame(); torch::Tensor tensor61FromOurDecoder = output.data; - EXPECT_EQ(output.ptsSeconds, 183'183. / 30'000); + EXPECT_DOUBLE_EQ(output.ptsSeconds, 183'183. / 30'000); torch::Tensor tensor61FromFFMPEG = readTensorFromDisk("nasa_13013.mp4.time6.100000.pt"); EXPECT_TRUE(torch::equal(tensor61FromOurDecoder, tensor61FromFFMPEG)); @@ -347,7 +347,7 @@ TEST_P(SingleStreamDecoderTest, SeeksToFrameWithSpecificPts) { ourDecoder->setCursorPtsInSeconds(10.0); output = ourDecoder->getNextFrame(); torch::Tensor tensor10FromOurDecoder = output.data; - EXPECT_EQ(output.ptsSeconds, 300'300. / 30'000); + EXPECT_DOUBLE_EQ(output.ptsSeconds, 300'300. / 30'000); torch::Tensor tensor10FromFFMPEG = readTensorFromDisk("nasa_13013.mp4.time10.000000.pt"); EXPECT_TRUE(torch::equal(tensor10FromOurDecoder, tensor10FromFFMPEG)); @@ -364,7 +364,7 @@ TEST_P(SingleStreamDecoderTest, SeeksToFrameWithSpecificPts) { ourDecoder->setCursorPtsInSeconds(6.0); output = ourDecoder->getNextFrame(); tensor6FromOurDecoder = output.data; - EXPECT_EQ(output.ptsSeconds, 180'180. / 30'000); + EXPECT_DOUBLE_EQ(output.ptsSeconds, 180'180. / 30'000); EXPECT_TRUE(torch::equal(tensor6FromOurDecoder, tensor6FromFFMPEG)); EXPECT_EQ(ourDecoder->getDecodeStats().numSeeksAttempted, 1); // We cannot skip a seek here because timestamp=6 has a different keyframe @@ -379,7 +379,7 @@ TEST_P(SingleStreamDecoderTest, SeeksToFrameWithSpecificPts) { ourDecoder->setCursorPtsInSeconds(kPtsOfLastFrameInVideoStream); output = ourDecoder->getNextFrame(); torch::Tensor tensor7FromOurDecoder = output.data; - EXPECT_EQ(output.ptsSeconds, 389'389. / 30'000); + EXPECT_DOUBLE_EQ(output.ptsSeconds, 389'389. / 30'000); torch::Tensor tensor7FromFFMPEG = readTensorFromDisk("nasa_13013.mp4.time12.979633.pt"); EXPECT_TRUE(torch::equal(tensor7FromOurDecoder, tensor7FromFFMPEG));