Skip to content

Commit

Permalink
MUI Decoder: Support uniform sampling in decoder itself (so decoding …
Browse files Browse the repository at this point in the history
…only what is needed)

Summary: TSIA

Differential Revision: D56020516

fbshipit-source-id: 6d5537886d648dfbd94724c86b1fb3235b370cbd
  • Loading branch information
Yury Gitman authored and facebook-github-bot committed Apr 11, 2024
1 parent 4eb67d1 commit ec4e27f
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 0 deletions.
12 changes: 12 additions & 0 deletions torchvision/csrc/io/decoder/decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,7 @@ int Decoder::getFrame(size_t workingTimeInMs) {
continue;
}


size_t numConsecutiveNoBytes = 0;
// it can be only partial decoding of the package bytes
do {
Expand Down Expand Up @@ -590,6 +591,17 @@ int Decoder::getFrame(size_t workingTimeInMs) {
result = 0;

av_packet_unref(avPacket);

if (++kFramesDecoded_ == params_.uniformSampling) {
result = ENODATA;
flushStreams();
break;
}

int64_t stepTs = static_cast<int64_t>((params_.expectedDuration * AV_TIME_BASE) / (params_.uniformSampling - 1));
while (kFramesDecoded_ < params_.uniformSampling && avformat_seek_file(inputCtx_, -1, stepTs * (kFramesDecoded_ - 1) + 1, stepTs * kFramesDecoded_, stepTs * kFramesDecoded_, 0) < 0) {
++kFramesDecoded_;
}
}

av_packet_free(&avPacket);
Expand Down
1 change: 1 addition & 0 deletions torchvision/csrc/io/decoder/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,5 +89,6 @@ class Decoder : public MediaDecoder {
AVIOContext* avioCtx_{nullptr};
std::unordered_map<ssize_t, std::unique_ptr<Stream>> streams_;
std::bitset<64> inRange_;
int kFramesDecoded_{0};
};
} // namespace ffmpeg
6 changes: 6 additions & 0 deletions torchvision/csrc/io/decoder/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,12 @@ struct DecoderParameters {
// it is dispersed into the stream, but will increase latency. Must be an
// integer not lesser than 32. It is 5000000 by default.
int64_t probeSize{5000000};

// Expected duration of the video to be decoded, mainly used with uniform sampling
float expectedDuration{0.0f};

// Sample N key-frames from the video roughly uniformly across the timeline
int uniformSampling{0};
};

struct DecoderHeader {
Expand Down

0 comments on commit ec4e27f

Please sign in to comment.