Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Soundtouch offset compensation updated #11154

Merged
merged 8 commits into from
Aug 9, 2023
Merged
3 changes: 2 additions & 1 deletion src/engine/bufferscalers/enginebufferscale.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ EngineBufferScale::EngineBufferScale()
m_dBaseRate(1.0),
m_bSpeedAffectsPitch(false),
m_dTempoRatio(1.0),
m_dPitchRatio(1.0) {
m_dPitchRatio(1.0),
m_effectiveRate(1.0) {
DEBUG_ASSERT(!m_outputSignal.isValid());
}

Expand Down
2 changes: 2 additions & 0 deletions src/engine/bufferscalers/enginebufferscale.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,6 @@ class EngineBufferScale : public QObject {
bool m_bSpeedAffectsPitch;
double m_dTempoRatio;
double m_dPitchRatio;
// Due to the scaler latency, tempo and pitch changes are not immediately effective.
double m_effectiveRate;
};
28 changes: 11 additions & 17 deletions src/engine/bufferscalers/enginebufferscalerubberband.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,7 @@ double EngineBufferScaleRubberBand::scaleBuffer(
return 0.0;
}

SINT total_received_frames = 0;

double readFramesProcessed = 0;
SINT remaining_frames = getOutputSignal().samples2frames(iOutputBufferSize);
CSAMPLE* read = pOutputBuffer;
bool last_read_failed = false;
Expand All @@ -177,7 +176,7 @@ double EngineBufferScaleRubberBand::scaleBuffer(
SINT received_frames = retrieveAndDeinterleave(
read, remaining_frames);
remaining_frames -= received_frames;
total_received_frames += received_frames;
readFramesProcessed += m_effectiveRate * received_frames;
read += getOutputSignal().frames2samples(received_frames);

if (break_out_after_retrieve_and_reset_rubberband) {
Expand All @@ -203,12 +202,14 @@ double EngineBufferScaleRubberBand::scaleBuffer(
//qDebug() << "iLenFramesRequired" << iLenFramesRequired;

if (remaining_frames > 0 && iLenFramesRequired > 0) {
// The requested setting becomes effective after all previous frames have been processed
m_effectiveRate = m_dBaseRate * m_dTempoRatio;
SINT iAvailSamples = m_pReadAheadManager->getNextSamples(
// The value doesn't matter here. All that matters is we
// are going forward or backward.
(m_bBackwards ? -1.0 : 1.0) * m_dBaseRate * m_dTempoRatio,
m_buffer_back,
getOutputSignal().frames2samples(iLenFramesRequired));
// The value doesn't matter here. All that matters is we
// are going forward or backward.
(m_bBackwards ? -1.0 : 1.0) * m_dBaseRate * m_dTempoRatio,
m_buffer_back,
getOutputSignal().frames2samples(iLenFramesRequired));
SINT iAvailFrames = getOutputSignal().samples2frames(iAvailSamples);

if (iAvailFrames > 0) {
Expand All @@ -233,15 +234,8 @@ double EngineBufferScaleRubberBand::scaleBuffer(
counter.increment();
}

// framesRead is interpreted as the total number of virtual sample frames
// readFramesProcessed is interpreted as the total number of frames
// consumed to produce the scaled buffer. Due to this, we do not take into
// account directionality or starting point.
// NOTE(rryan): Why no m_dPitchAdjust here? Pitch does not change the time
// ratio. m_dSpeedAdjust is the ratio of unstretched time to stretched
// time. So, if we used total_received_frames in stretched time, then
// multiplying that by the ratio of unstretched time to stretched time
// will get us the unstretched sample frames read.
double framesRead = m_dBaseRate * m_dTempoRatio * total_received_frames;

return framesRead;
return readFramesProcessed;
}
74 changes: 37 additions & 37 deletions src/engine/bufferscalers/enginebufferscalest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,30 @@ using namespace soundtouch;
namespace {

// Due to filtering and oversampling, SoundTouch is some samples behind.
daschuer marked this conversation as resolved.
Show resolved Hide resolved
// The value below was experimental identified using a saw signal and SoundTouch 1.8
// at a speed of 1.0
// 0.918 (upscaling 44.1 kHz to 48 kHz) will produce an additional offset of 3 Frames
// 0.459 (upscaling 44.1 kHz to 96 kHz) will produce an additional offset of 18 Frames
// (Rubberband does not suffer this issue)
const SINT kSeekOffsetFrames = 519;
// The value below was experimental identified using a saw signal and SoundTouch 2.1.1
constexpr SINT kSeekOffsetFramesV20101 = 429;

// From V 2.3.0 Soundtouch has no initial offset at unity, but it is to early with
// lowered pitch and to early with raised pitch ~+-2000 farmes = 50 ms.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand the comment. also typos

Suggested change
// From V 2.3.0 Soundtouch has no initial offset at unity, but it is to early with
// lowered pitch and to early with raised pitch ~+-2000 farmes = 50 ms.
// From V 2.3.0 Soundtouch has no initial offset at unity, but it is too early with
// lowered pitch and too early with raised pitch ~+-2000 frames = 50 ms.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you elaborate on this, does the 2000ms just mean latency? how are you estimating time from that?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added the description of my test.
Does it help?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I appreciate the comment, but it didn't really help. I have a couple questions:

  1. what exactly do you mean with offset in this comment? Latency expressed as a number of frames/samples?
  2. how can that offset be earlier (lowered/raised pitch) than no offset (at unity (which I assume means no change in pitch)).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1 In my test with extrem pitches, the saw tooth was shifted by up to 2000 frames compared to the unity signal. This is relative to the pitch, but I have not investigated how exactly.

2 It can be earlier, because of the settling samples. Soundtouch needs some samples to be settled before you can consume the output even at unity. Mixxx controls the output and recent Soundtouch compensates the input correct only at unity.

// TODO() Compensate that. This is probably cause by the delayed adoption of pitch changes due
// to the SoundTouch chunk size.

constexpr SINT kBackBufferSize = 1024;

} // namespace

EngineBufferScaleST::EngineBufferScaleST(ReadAheadManager *pReadAheadManager)
: m_pReadAheadManager(pReadAheadManager),
m_pSoundTouch(std::make_unique<soundtouch::SoundTouch>()),
m_bBackwards(false) {
EngineBufferScaleST::EngineBufferScaleST(ReadAheadManager* pReadAheadManager)
: m_pReadAheadManager(pReadAheadManager),
m_pSoundTouch(std::make_unique<soundtouch::SoundTouch>()),
m_bufferBack(kBackBufferSize),
m_bBackwards(false) {
m_pSoundTouch->setChannels(getOutputSignal().getChannelCount());
m_pSoundTouch->setRate(m_dBaseRate);
m_pSoundTouch->setPitch(1.0);
m_pSoundTouch->setSetting(SETTING_USE_QUICKSEEK, 1);
// Initialize the internal buffers to prevent re-allocations
// in the real-time thread.
onSampleRateChanged();

}

EngineBufferScaleST::~EngineBufferScaleST() {
Expand Down Expand Up @@ -90,31 +93,32 @@ void EngineBufferScaleST::setScaleParameters(double base_rate,
}

void EngineBufferScaleST::onSampleRateChanged() {
buffer_back.clear();
m_bufferBack.clear();
if (!getOutputSignal().isValid()) {
return;
}
m_pSoundTouch->setSampleRate(getOutputSignal().getSampleRate());
const auto bufferSize = getOutputSignal().frames2samples(kSeekOffsetFrames);
if (bufferSize > buffer_back.size()) {
// grow buffer
buffer_back = mixxx::SampleBuffer(bufferSize);
}
Swiftb0y marked this conversation as resolved.
Show resolved Hide resolved

// Setting the tempo to a very low value will force SoundTouch
// to preallocate buffers large enough to (almost certainly)
// avoid memory reallocations during playback.
m_pSoundTouch->setTempo(0.1);
m_pSoundTouch->putSamples(buffer_back.data(), kSeekOffsetFrames);
m_pSoundTouch->clear();
clear();
m_pSoundTouch->setTempo(m_dTempoRatio);
clear();
Swiftb0y marked this conversation as resolved.
Show resolved Hide resolved
}

void EngineBufferScaleST::clear() {
m_pSoundTouch->clear();

// compensate seek offset for a rate of 1.0
SampleUtil::clear(buffer_back.data(), buffer_back.size());
m_pSoundTouch->putSamples(buffer_back.data(), kSeekOffsetFrames);
if (SoundTouch::getVersionId() < 20302) {
DEBUG_ASSERT(SoundTouch::getVersionId() >= 20101);
// from SoundTouch 2.3.0 the initial offset is corrected internally
m_effectiveRate = m_dBaseRate * m_dTempoRatio;
SampleUtil::clear(m_bufferBack.data(), m_bufferBack.size());
m_pSoundTouch->putSamples(m_bufferBack.data(), kSeekOffsetFramesV20101);
}
}

double EngineBufferScaleST::scaleBuffer(
Expand All @@ -127,8 +131,7 @@ double EngineBufferScaleST::scaleBuffer(
return 0.0;
}

SINT total_received_frames = 0;

double readFramesProcessed = 0;
SINT remaining_frames = getOutputSignal().samples2frames(iOutputBufferSize);
CSAMPLE* read = pOutputBuffer;
bool last_read_failed = false;
Expand All @@ -137,21 +140,23 @@ double EngineBufferScaleST::scaleBuffer(
read, remaining_frames);
DEBUG_ASSERT(remaining_frames >= received_frames);
remaining_frames -= received_frames;
total_received_frames += received_frames;
readFramesProcessed += m_effectiveRate * received_frames;
read += getOutputSignal().frames2samples(received_frames);

if (remaining_frames > 0) {
// The requested setting becomes effective after all previous frames have been processed
m_effectiveRate = m_dBaseRate * m_dTempoRatio;
SINT iAvailSamples = m_pReadAheadManager->getNextSamples(
// The value doesn't matter here. All that matters is we
// are going forward or backward.
(m_bBackwards ? -1.0 : 1.0) * m_dBaseRate * m_dTempoRatio,
buffer_back.data(),
buffer_back.size());
// The value doesn't matter here. All that matters is we
// are going forward or backward.
(m_bBackwards ? -1.0 : 1.0) * m_effectiveRate,
m_bufferBack.data(),
m_bufferBack.size());
SINT iAvailFrames = getOutputSignal().samples2frames(iAvailSamples);

if (iAvailFrames > 0) {
last_read_failed = false;
m_pSoundTouch->putSamples(buffer_back.data(), iAvailFrames);
m_pSoundTouch->putSamples(m_bufferBack.data(), iAvailFrames);
} else {
if (last_read_failed) {
m_pSoundTouch->flush();
Expand All @@ -162,13 +167,8 @@ double EngineBufferScaleST::scaleBuffer(
}
}

// framesRead is interpreted as the total number of virtual sample frames
// readFramesProcessed is interpreted as the total number of frames
// consumed to produce the scaled buffer. Due to this, we do not take into
// account directionality or starting point.
// NOTE(rryan): Why no m_dPitchAdjust here? SoundTouch implements pitch
// shifting as a tempo shift of (1/m_dPitchAdjust) and a rate shift of
// (*m_dPitchAdjust) so these two cancel out.
double framesRead = m_dBaseRate * m_dTempoRatio * total_received_frames;

return framesRead;
return readFramesProcessed;
}
2 changes: 1 addition & 1 deletion src/engine/bufferscalers/enginebufferscalest.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class EngineBufferScaleST : public EngineBufferScale {
std::unique_ptr<soundtouch::SoundTouch> m_pSoundTouch;

// Temporary buffer for reading from the RAMAN.
mixxx::SampleBuffer buffer_back;
mixxx::SampleBuffer m_bufferBack;

// Holds the playback direction.
bool m_bBackwards;
Expand Down