Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions src/torchcodec/_core/FFMPEGCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,9 @@ void setChannelLayout(
}

SwrContext* createSwrContext(
AVSampleFormat sourceSampleFormat,
AVSampleFormat srcSampleFormat,
AVSampleFormat desiredSampleFormat,
int sourceSampleRate,
int srcSampleRate,
int desiredSampleRate,
const UniqueAVFrame& srcAVFrame,
int desiredNumChannels) {
Expand All @@ -178,8 +178,8 @@ SwrContext* createSwrContext(
desiredSampleFormat,
desiredSampleRate,
&srcAVFrame->ch_layout,
sourceSampleFormat,
sourceSampleRate,
srcSampleFormat,
srcSampleRate,
0,
nullptr);

Expand All @@ -196,8 +196,8 @@ SwrContext* createSwrContext(
desiredSampleFormat,
desiredSampleRate,
srcAVFrame->channel_layout,
sourceSampleFormat,
sourceSampleRate,
srcSampleFormat,
srcSampleRate,
0,
nullptr);
#endif
Expand Down Expand Up @@ -228,8 +228,8 @@ UniqueAVFrame convertAudioAVFrameSamples(
convertedAVFrame->format = static_cast<int>(desiredSampleFormat);

convertedAVFrame->sample_rate = desiredSampleRate;
int sourceSampleRate = srcAVFrame->sample_rate;
if (sourceSampleRate != desiredSampleRate) {
int srcSampleRate = srcAVFrame->sample_rate;
if (srcSampleRate != desiredSampleRate) {
// Note that this is an upper bound on the number of output samples.
// `swr_convert()` will likely not fill convertedAVFrame with that many
// samples if sample rate conversion is needed. It will buffer the last few
Expand All @@ -239,10 +239,9 @@ UniqueAVFrame convertAudioAVFrameSamples(
// output samples, but empirically `av_rescale_rnd()` seems to provide a
// tighter bound.
convertedAVFrame->nb_samples = av_rescale_rnd(
swr_get_delay(swrContext.get(), sourceSampleRate) +
srcAVFrame->nb_samples,
swr_get_delay(swrContext.get(), srcSampleRate) + srcAVFrame->nb_samples,
desiredSampleRate,
sourceSampleRate,
srcSampleRate,
AV_ROUND_UP);
} else {
convertedAVFrame->nb_samples = srcAVFrame->nb_samples;
Expand Down
4 changes: 2 additions & 2 deletions src/torchcodec/_core/FFMPEGCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,9 @@ void setChannelLayout(
int desiredNumChannels);

SwrContext* createSwrContext(
AVSampleFormat sourceSampleFormat,
AVSampleFormat srcSampleFormat,
AVSampleFormat desiredSampleFormat,
int sourceSampleRate,
int srcSampleRate,
int desiredSampleRate,
const UniqueAVFrame& srcAVFrame,
int desiredNumChannels);
Expand Down
24 changes: 12 additions & 12 deletions src/torchcodec/_core/SingleStreamDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1184,40 +1184,40 @@ FrameOutput SingleStreamDecoder::convertAVFrameToFrameOutput(
void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU(
UniqueAVFrame& srcAVFrame,
FrameOutput& frameOutput) {
AVSampleFormat sourceSampleFormat =
AVSampleFormat srcSampleFormat =
static_cast<AVSampleFormat>(srcAVFrame->format);
AVSampleFormat desiredSampleFormat = AV_SAMPLE_FMT_FLTP;

StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
int sourceSampleRate = srcAVFrame->sample_rate;
int srcSampleRate = srcAVFrame->sample_rate;
int desiredSampleRate =
streamInfo.audioStreamOptions.sampleRate.value_or(sourceSampleRate);
streamInfo.audioStreamOptions.sampleRate.value_or(srcSampleRate);

int sourceNumChannels = getNumChannels(streamInfo.codecContext);
int srcNumChannels = getNumChannels(streamInfo.codecContext);
TORCH_CHECK(
sourceNumChannels == getNumChannels(srcAVFrame),
srcNumChannels == getNumChannels(srcAVFrame),
"The frame has ",
getNumChannels(srcAVFrame),
" channels, expected ",
sourceNumChannels,
srcNumChannels,
". If you are hitting this, it may be because you are using "
"a buggy FFmpeg version. FFmpeg4 is known to fail here in some "
"valid scenarios. Try to upgrade FFmpeg?");
int desiredNumChannels =
streamInfo.audioStreamOptions.numChannels.value_or(sourceNumChannels);
streamInfo.audioStreamOptions.numChannels.value_or(srcNumChannels);

bool mustConvert =
(sourceSampleFormat != desiredSampleFormat ||
sourceSampleRate != desiredSampleRate ||
sourceNumChannels != desiredNumChannels);
(srcSampleFormat != desiredSampleFormat ||
srcSampleRate != desiredSampleRate ||
srcNumChannels != desiredNumChannels);

UniqueAVFrame convertedAVFrame;
if (mustConvert) {
if (!streamInfo.swrContext) {
streamInfo.swrContext.reset(createSwrContext(
sourceSampleFormat,
srcSampleFormat,
desiredSampleFormat,
sourceSampleRate,
srcSampleRate,
desiredSampleRate,
srcAVFrame,
desiredNumChannels));
Expand Down
Loading