mixxxdj · Swiftb0y · Aug 9, 2023 · Dec 23, 2022 · Dec 23, 2022 · Dec 23, 2022
diff --git a/src/engine/bufferscalers/enginebufferscale.cpp b/src/engine/bufferscalers/enginebufferscale.cpp
@@ -12,7 +12,8 @@ EngineBufferScale::EngineBufferScale()
           m_dBaseRate(1.0),
           m_bSpeedAffectsPitch(false),
           m_dTempoRatio(1.0),
-          m_dPitchRatio(1.0) {
+          m_dPitchRatio(1.0),
+          m_effectiveRate(1.0) {
     DEBUG_ASSERT(!m_outputSignal.isValid());
 }
 

diff --git a/src/engine/bufferscalers/enginebufferscale.h b/src/engine/bufferscalers/enginebufferscale.h
@@ -73,4 +73,6 @@ class EngineBufferScale : public QObject {
     bool m_bSpeedAffectsPitch;
     double m_dTempoRatio;
     double m_dPitchRatio;
+    // Due to the scaler latency, tempo and pitch changes are not immediately effective.
+    double m_effectiveRate;
 };
diff --git a/src/engine/bufferscalers/enginebufferscalerubberband.cpp b/src/engine/bufferscalers/enginebufferscalerubberband.cpp
@@ -163,8 +163,7 @@ double EngineBufferScaleRubberBand::scaleBuffer(
         return 0.0;
     }
 
-    SINT total_received_frames = 0;
-
+    double readFramesProcessed = 0;
     SINT remaining_frames = getOutputSignal().samples2frames(iOutputBufferSize);
     CSAMPLE* read = pOutputBuffer;
     bool last_read_failed = false;
@@ -177,7 +176,7 @@ double EngineBufferScaleRubberBand::scaleBuffer(
         SINT received_frames = retrieveAndDeinterleave(
                 read, remaining_frames);
         remaining_frames -= received_frames;
-        total_received_frames += received_frames;
+        readFramesProcessed += m_effectiveRate * received_frames;
         read += getOutputSignal().frames2samples(received_frames);
 
         if (break_out_after_retrieve_and_reset_rubberband) {
@@ -203,12 +202,14 @@ double EngineBufferScaleRubberBand::scaleBuffer(
         //qDebug() << "iLenFramesRequired" << iLenFramesRequired;
 
         if (remaining_frames > 0 && iLenFramesRequired > 0) {
+            // The requested setting becomes effective after all previous frames have been processed
+            m_effectiveRate = m_dBaseRate * m_dTempoRatio;
             SINT iAvailSamples = m_pReadAheadManager->getNextSamples(
-                        // The value doesn't matter here. All that matters is we
-                        // are going forward or backward.
-                        (m_bBackwards ? -1.0 : 1.0) * m_dBaseRate * m_dTempoRatio,
-                        m_buffer_back,
-                        getOutputSignal().frames2samples(iLenFramesRequired));
+                    // The value doesn't matter here. All that matters is we
+                    // are going forward or backward.
+                    (m_bBackwards ? -1.0 : 1.0) * m_dBaseRate * m_dTempoRatio,
+                    m_buffer_back,
+                    getOutputSignal().frames2samples(iLenFramesRequired));
             SINT iAvailFrames = getOutputSignal().samples2frames(iAvailSamples);
 
             if (iAvailFrames > 0) {
@@ -233,15 +234,8 @@ double EngineBufferScaleRubberBand::scaleBuffer(
         counter.increment();
     }
 
-    // framesRead is interpreted as the total number of virtual sample frames
+    // readFramesProcessed is interpreted as the total number of frames
     // consumed to produce the scaled buffer. Due to this, we do not take into
     // account directionality or starting point.
-    // NOTE(rryan): Why no m_dPitchAdjust here? Pitch does not change the time
-    // ratio. m_dSpeedAdjust is the ratio of unstretched time to stretched
-    // time. So, if we used total_received_frames in stretched time, then
-    // multiplying that by the ratio of unstretched time to stretched time
-    // will get us the unstretched sample frames read.
-    double framesRead = m_dBaseRate * m_dTempoRatio * total_received_frames;
-
-    return framesRead;
+    return readFramesProcessed;
 }
diff --git a/src/engine/bufferscalers/enginebufferscalest.cpp b/src/engine/bufferscalers/enginebufferscalest.cpp
@@ -17,27 +17,30 @@ using namespace soundtouch;
 namespace {
 
 // Due to filtering and oversampling, SoundTouch is some samples behind.
-// The value below was experimental identified using a saw signal and SoundTouch 1.8
-// at a speed of 1.0
-// 0.918 (upscaling 44.1 kHz to 48 kHz) will produce an additional offset of 3 Frames
-// 0.459 (upscaling 44.1 kHz to 96 kHz) will produce an additional offset of 18 Frames
-// (Rubberband does not suffer this issue)
-const SINT kSeekOffsetFrames = 519;
+// The value below was experimental identified using a saw signal and SoundTouch 2.1.1
+constexpr SINT kSeekOffsetFramesV20101 = 429;
+
+// From V 2.3.0 Soundtouch has no initial offset at unity, but it is to early with
+// lowered pitch and to early with raised pitch ~+-2000 farmes = 50 ms.
-// From V 2.3.0 Soundtouch has no initial offset at unity, but it is to early with
-// lowered pitch and to early with raised pitch ~+-2000 farmes = 50 ms.
+// From V 2.3.0 Soundtouch has no initial offset at unity, but it is too early with
+// lowered pitch and too early with raised pitch ~+-2000 frames = 50 ms.
-// From V 2.3.0 Soundtouch has no initial offset at unity, but it is to early with
-// lowered pitch and to early with raised pitch ~+-2000 farmes = 50 ms.
+// From V 2.3.0 Soundtouch has no initial offset at unity, but it is too early with
+// lowered pitch and too early with raised pitch ~+-2000 frames = 50 ms.
+// TODO() Compensate that. This is probably cause by the delayed adoption of pitch changes due
+// to the SoundTouch chunk size.
+
+constexpr SINT kBackBufferSize = 1024;
 
 }  // namespace
 
-EngineBufferScaleST::EngineBufferScaleST(ReadAheadManager *pReadAheadManager)
-    : m_pReadAheadManager(pReadAheadManager),
-      m_pSoundTouch(std::make_unique<soundtouch::SoundTouch>()),
-      m_bBackwards(false) {
+EngineBufferScaleST::EngineBufferScaleST(ReadAheadManager* pReadAheadManager)
+        : m_pReadAheadManager(pReadAheadManager),
+          m_pSoundTouch(std::make_unique<soundtouch::SoundTouch>()),
+          m_bufferBack(kBackBufferSize),
+          m_bBackwards(false) {
     m_pSoundTouch->setChannels(getOutputSignal().getChannelCount());
     m_pSoundTouch->setRate(m_dBaseRate);
     m_pSoundTouch->setPitch(1.0);
     m_pSoundTouch->setSetting(SETTING_USE_QUICKSEEK, 1);
     // Initialize the internal buffers to prevent re-allocations
     // in the real-time thread.
     onSampleRateChanged();
-
 }
 
 EngineBufferScaleST::~EngineBufferScaleST() {
@@ -90,31 +93,32 @@ void EngineBufferScaleST::setScaleParameters(double base_rate,
 }
 
 void EngineBufferScaleST::onSampleRateChanged() {
-    buffer_back.clear();
+    m_bufferBack.clear();
     if (!getOutputSignal().isValid()) {
         return;
     }
     m_pSoundTouch->setSampleRate(getOutputSignal().getSampleRate());
-    const auto bufferSize = getOutputSignal().frames2samples(kSeekOffsetFrames);
-    if (bufferSize > buffer_back.size()) {
-        // grow buffer
-        buffer_back = mixxx::SampleBuffer(bufferSize);
-    }
+
     // Setting the tempo to a very low value will force SoundTouch
     // to preallocate buffers large enough to (almost certainly)
     // avoid memory reallocations during playback.
     m_pSoundTouch->setTempo(0.1);
-    m_pSoundTouch->putSamples(buffer_back.data(), kSeekOffsetFrames);
-    m_pSoundTouch->clear();
+    clear();
     m_pSoundTouch->setTempo(m_dTempoRatio);
+    clear();
 }
 
 void EngineBufferScaleST::clear() {
     m_pSoundTouch->clear();
 
     // compensate seek offset for a rate of 1.0
-    SampleUtil::clear(buffer_back.data(), buffer_back.size());
-    m_pSoundTouch->putSamples(buffer_back.data(), kSeekOffsetFrames);
+    if (SoundTouch::getVersionId() < 20302) {
+        DEBUG_ASSERT(SoundTouch::getVersionId() >= 20101);
+        // from SoundTouch 2.3.0 the initial offset is corrected internally
+        m_effectiveRate = m_dBaseRate * m_dTempoRatio;
+        SampleUtil::clear(m_bufferBack.data(), m_bufferBack.size());
+        m_pSoundTouch->putSamples(m_bufferBack.data(), kSeekOffsetFramesV20101);
+    }
 }
 
 double EngineBufferScaleST::scaleBuffer(
@@ -127,8 +131,7 @@ double EngineBufferScaleST::scaleBuffer(
         return 0.0;
     }
 
-    SINT total_received_frames = 0;
-
+    double readFramesProcessed = 0;
     SINT remaining_frames = getOutputSignal().samples2frames(iOutputBufferSize);
     CSAMPLE* read = pOutputBuffer;
     bool last_read_failed = false;
@@ -137,21 +140,23 @@ double EngineBufferScaleST::scaleBuffer(
                 read, remaining_frames);
         DEBUG_ASSERT(remaining_frames >= received_frames);
         remaining_frames -= received_frames;
-        total_received_frames += received_frames;
+        readFramesProcessed += m_effectiveRate * received_frames;
         read += getOutputSignal().frames2samples(received_frames);
 
         if (remaining_frames > 0) {
+            // The requested setting becomes effective after all previous frames have been processed
+            m_effectiveRate = m_dBaseRate * m_dTempoRatio;
             SINT iAvailSamples = m_pReadAheadManager->getNextSamples(
-                        // The value doesn't matter here. All that matters is we
-                        // are going forward or backward.
-                        (m_bBackwards ? -1.0 : 1.0) * m_dBaseRate * m_dTempoRatio,
-                        buffer_back.data(),
-                        buffer_back.size());
+                    // The value doesn't matter here. All that matters is we
+                    // are going forward or backward.
+                    (m_bBackwards ? -1.0 : 1.0) * m_effectiveRate,
+                    m_bufferBack.data(),
+                    m_bufferBack.size());
             SINT iAvailFrames = getOutputSignal().samples2frames(iAvailSamples);
 
             if (iAvailFrames > 0) {
                 last_read_failed = false;
-                m_pSoundTouch->putSamples(buffer_back.data(), iAvailFrames);
+                m_pSoundTouch->putSamples(m_bufferBack.data(), iAvailFrames);
             } else {
                 if (last_read_failed) {
                     m_pSoundTouch->flush();
@@ -162,13 +167,8 @@ double EngineBufferScaleST::scaleBuffer(
         }
     }
 
-    // framesRead is interpreted as the total number of virtual sample frames
+    // readFramesProcessed is interpreted as the total number of frames
     // consumed to produce the scaled buffer. Due to this, we do not take into
     // account directionality or starting point.
-    // NOTE(rryan): Why no m_dPitchAdjust here? SoundTouch implements pitch
-    // shifting as a tempo shift of (1/m_dPitchAdjust) and a rate shift of
-    // (*m_dPitchAdjust) so these two cancel out.
-    double framesRead = m_dBaseRate * m_dTempoRatio * total_received_frames;
-
-    return framesRead;
+    return readFramesProcessed;
 }
diff --git a/src/engine/bufferscalers/enginebufferscalest.h b/src/engine/bufferscalers/enginebufferscalest.h
@@ -40,7 +40,7 @@ class EngineBufferScaleST : public EngineBufferScale {
     std::unique_ptr<soundtouch::SoundTouch> m_pSoundTouch;
 
     // Temporary buffer for reading from the RAMAN.
-    mixxx::SampleBuffer buffer_back;
+    mixxx::SampleBuffer m_bufferBack;
 
     // Holds the playback direction.
     bool m_bBackwards;