-
Notifications
You must be signed in to change notification settings - Fork 57
Fix AudioStream catch-up clicks: add cooldown + crossfade #260
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6b3f700
affd10a
0f74ba1
c4fdbec
bb2d3d5
36d6487
2ba5e67
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ public sealed class AudioStream : IDisposable | |
| private readonly AudioProbe _probe; | ||
| private RingBuffer _buffer; | ||
| private short[] _tempBuffer; | ||
| private short[] _crossfadeScratch; | ||
| private uint _numChannels; | ||
| private uint _sampleRate; | ||
| private AudioResampler _resampler = new AudioResampler(); | ||
|
|
@@ -29,9 +30,15 @@ public sealed class AudioStream : IDisposable | |
| private const float BufferSizeSeconds = 0.2f; // 200ms ring buffer for all platforms | ||
| private const float PrimingThresholdSeconds = 0.03f; // Wait for 30ms of data before playing | ||
|
|
||
| // Drift correction: skip samples when buffer fills up due to clock drift | ||
| private const float HighWaterMarkPercent = 0.50f; // Target 50% fill level after correction | ||
| private const float SkipPerCallbackPercent = 0.05f; // Skip 5% of callback samples per call | ||
| // Drift correction: skip samples when the buffer fills up due to clock drift. | ||
| // HWM at 50% (100ms of 200ms) so normal network jitter does not trip catch-up. | ||
| // Cooldown prevents back-to-back skips, which sound like a gravelly click train; | ||
| // one occasional skip is inaudible thanks to the crossfade in OnAudioRead. | ||
| private const float HighWaterMarkPercent = 0.50f; | ||
| private const float SkipPerCallbackPercent = 0.05f; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we'd better re-visit the logic, does skipping 5% of audio noticable ? if it is noticeable, we'd better re-consider the approach. Like will it work better if we just skip initial 50ms of audio in the buffer so that we will have a continuous audio stream later ? or should we increase the buffer size ? |
||
| private const int SkipCooldownCallbacks = 10; | ||
| private const int CrossfadeFrames = 128; // ~2.7ms @ 48kHz | ||
| private int _skipCooldown = 0; | ||
|
|
||
| /// <summary> | ||
| /// Creates a new audio stream from a remote audio track, attaching it to the | ||
|
|
@@ -87,11 +94,13 @@ private void OnAudioRead(float[] data, int channels, int sampleRate) | |
| _buffer?.Dispose(); | ||
| _buffer = new RingBuffer(bufferSize * sizeof(short)); | ||
| _tempBuffer = new short[data.Length]; | ||
| _crossfadeScratch = new short[CrossfadeFrames * channels]; | ||
| _numChannels = (uint)channels; | ||
| _sampleRate = (uint)sampleRate; | ||
|
|
||
| // Buffer was recreated, need to re-prime | ||
| _isPrimed = false; | ||
| _skipCooldown = 0; | ||
| } | ||
|
|
||
| static float S16ToFloat(short v) | ||
|
|
@@ -127,7 +136,7 @@ static float S16ToFloat(short v) | |
| if (valuesAvailableToRead < data.Length) | ||
| { | ||
| _isPrimed = false; | ||
| Utils.Debug($"AudioStream underrun detected, re-priming (got {valuesAvailableToRead} samples)"); | ||
| Utils.Debug($"AudioStream underrun detected, re-priming (got {valuesAvailableToRead} samples but want to read {data.Length})"); | ||
|
|
||
| // Output silence immediately instead of playing partial/choppy samples. | ||
| // On next frames, the !_isPrimed check above will ensure we wait for 30ms | ||
|
|
@@ -147,21 +156,50 @@ static float S16ToFloat(short v) | |
| // If the buffer is empty or doesn't have enough data, bytesRead will be less than requested. | ||
| int samplesRead = bytesRead / sizeof(short); | ||
|
|
||
| // Drift correction: if buffer is filling up (producer faster than consumer), | ||
| // skip a small number of samples to prevent overflow and keep latency bounded. | ||
| // Drift correction: if the buffer is filling up (producer faster than | ||
| // consumer), discard a small run of samples and crossfade the output tail | ||
| // into the post-skip window so the seam is inaudible. Cooldown spaces | ||
| // skips out so we never produce back-to-back artifacts. | ||
| if (_skipCooldown > 0) | ||
| _skipCooldown--; | ||
|
|
||
| int highWaterBytes = (int)(_buffer.Capacity * HighWaterMarkPercent); | ||
| int remainingBytes = _buffer.AvailableRead(); | ||
| if (remainingBytes > highWaterBytes) | ||
| if (_skipCooldown == 0 && _buffer.AvailableRead() > highWaterBytes) | ||
| { | ||
| int skipBytes = (int)(data.Length * sizeof(short) * SkipPerCallbackPercent); | ||
| int frameSize = channels * sizeof(short); | ||
| int skipBytes = (int)(data.Length * sizeof(short) * SkipPerCallbackPercent); | ||
| skipBytes -= skipBytes % frameSize; // align to frame boundary | ||
|
|
||
| if (skipBytes > 0) | ||
| int crossfadeShorts = CrossfadeFrames * channels; | ||
| int crossfadeBytes = crossfadeShorts * sizeof(short); | ||
|
|
||
| // Only skip if we still have enough data left to fill the crossfade | ||
| // window; never trade a catch-up artifact for an underrun artifact. | ||
| if (skipBytes > 0 && _buffer.AvailableRead() >= skipBytes + crossfadeBytes) | ||
| { | ||
| _buffer.SkipRead(skipBytes); | ||
|
|
||
| var postBytes = MemoryMarshal.Cast<short, byte>(_crossfadeScratch.AsSpan().Slice(0, crossfadeShorts)); | ||
| _buffer.Read(postBytes); | ||
|
|
||
| // Linearly crossfade the last crossfadeFrames frames of _tempBuffer | ||
| // with the post-skip samples: the step discontinuity becomes a | ||
| // short linear ramp that is continuous with the next callback. | ||
| int tailStart = samplesRead - crossfadeShorts; | ||
| if (tailStart >= 0) | ||
| { | ||
| for (int i = 0; i < crossfadeShorts; i++) | ||
| { | ||
| int frameIdx = i / channels; | ||
| float t = (frameIdx + 1) / (float)CrossfadeFrames; | ||
| short pre = _tempBuffer[tailStart + i]; | ||
| short post = _crossfadeScratch[i]; | ||
| _tempBuffer[tailStart + i] = (short)(pre * (1f - t) + post * t); | ||
| } | ||
| _skipCooldown = SkipCooldownCallbacks; | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Clear the entire output buffer to silence, then fill with the samples | ||
| // we successfully read from the ring buffer. | ||
|
|
@@ -174,7 +212,7 @@ static float S16ToFloat(short v) | |
| } | ||
|
|
||
| // Called when application goes to background or returns to foreground | ||
| private void OnApplicationPause(bool pause) | ||
| internal void OnApplicationPause(bool pause) | ||
| { | ||
| if (_disposed) | ||
| return; | ||
|
|
@@ -266,6 +304,7 @@ private void Dispose(bool disposing) | |
| _buffer?.Dispose(); | ||
| _buffer = null; | ||
| _tempBuffer = null; | ||
| _crossfadeScratch = null; | ||
| _resampler?.Dispose(); | ||
| _resampler = null; | ||
| Handle.Dispose(); | ||
|
|
@@ -278,5 +317,17 @@ private void Dispose(bool disposing) | |
| { | ||
| Dispose(false); | ||
| } | ||
|
|
||
| // For testing and debugging | ||
| internal float GetBufferFill() | ||
| { | ||
| lock(_lock) | ||
| { | ||
| if (_buffer == null) | ||
| return 0; | ||
| return _buffer.AvailableReadInPercent(); | ||
| } | ||
|
|
||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,164 @@ | ||
| using System.Collections; | ||
| using UnityEngine.TestTools; | ||
| using LiveKit.PlayModeTests.Utils; | ||
| using LiveKit.Proto; | ||
| using NUnit.Framework; | ||
| using UnityEngine; | ||
|
|
||
| namespace LiveKit.PlayModeTests | ||
| { | ||
| class AudioBufferTests | ||
| { | ||
| const string AudioTrackName = "test-audio-track"; | ||
| static TrackPublishOptions AudioOptions() => | ||
| new TrackPublishOptions { Source = TrackSource.SourceMicrophone }; | ||
|
|
||
|
|
||
| AudioStream _audioStream; | ||
| AudioSource _audioSource; | ||
|
|
||
| TestRoomContext _context; | ||
|
|
||
| private IEnumerator SetUp() | ||
| { | ||
| var sineWaveAudioSource = new SineWaveAudioSource(); | ||
|
|
||
| var sender = TestRoomContext.ConnectionOptions.Default; | ||
| sender.Identity = "sender"; | ||
| var receiver = TestRoomContext.ConnectionOptions.Default; | ||
| receiver.Identity = "receiver"; | ||
|
|
||
| _context = new TestRoomContext(new[] { sender, receiver }); | ||
| var senderRoom = _context.Rooms[0]; | ||
| var subscriberRoom = _context.Rooms[1]; | ||
|
|
||
| var audioTrack = LocalAudioTrack.CreateAudioTrack(AudioTrackName, sineWaveAudioSource, senderRoom); | ||
|
|
||
| yield return _context.ConnectAll(); | ||
| Assert.IsNull(_context.ConnectionError, _context.ConnectionError); | ||
|
|
||
| var publishAudioTrack = senderRoom.LocalParticipant.PublishTrack(audioTrack, AudioOptions()); | ||
|
|
||
| var subscribedExp = new Expectation(timeoutSeconds: 10); | ||
| RemoteAudioTrack subscribedTrack = null; | ||
| subscriberRoom.TrackSubscribed += (remoteTrack, publication, participant) => | ||
| { | ||
| if (remoteTrack is RemoteAudioTrack remoteAudioTrack) | ||
| { | ||
| subscribedTrack = remoteAudioTrack; | ||
| subscribedExp.Fulfill(); | ||
| } | ||
| }; | ||
|
|
||
| yield return publishAudioTrack; | ||
| Assert.IsFalse(publishAudioTrack.IsError); | ||
|
|
||
| sineWaveAudioSource.Start(); | ||
| yield return subscribedExp.Wait(); | ||
| Assert.IsNull(subscribedExp.Error); | ||
|
|
||
| // We need an AudioListener in the scene in order for an AudioThread to read from sources | ||
| var listenerGO = new GameObject("LatencyTestAudioListener"); | ||
| var audioListener = listenerGO.AddComponent<AudioListener>(); | ||
|
|
||
| // This GO will be hooked up with the AudioStream | ||
| var receiverGO = new GameObject("LatencyTestReceiver"); | ||
| _audioSource = receiverGO.AddComponent<AudioSource>(); | ||
|
|
||
| // AudioStream constructor adds AudioProbe and starts playback | ||
| _audioStream = new AudioStream(subscribedTrack, _audioSource); | ||
|
|
||
| // Init buffer with one audio frame read | ||
| var readOneAudioFrame = new Expectation( () => { return _audioStream.GetBufferFill() != 0f; } ); | ||
| yield return readOneAudioFrame.Wait(); | ||
| } | ||
|
|
||
| [UnityTest, Category("E2E")] | ||
| public IEnumerator AudioBuffer_WhenBufferRunsFull_TriggersCatchupMechanism() | ||
| { | ||
| yield return SetUp(); | ||
|
|
||
| // Backgrounded | ||
| MockApplicationBackgrounded(); | ||
|
|
||
| // Buffer is full at this point | ||
| var BufferFillsInBackgroundExpectation = new Expectation(() => { return _audioStream.GetBufferFill() == 1f; }); | ||
| yield return BufferFillsInBackgroundExpectation.Wait(); | ||
| Assert.IsNull(BufferFillsInBackgroundExpectation.Error); | ||
|
|
||
| Debug.Log("Buffer is filled"); | ||
|
|
||
| // Resume consumption without foregrounding event to prevent full buffer clear | ||
| _audioSource.UnPause(); | ||
|
|
||
| // Buffer fill is quickly reduced due to catchup logic | ||
| var BufferFillReducedThroughCatchup = new Expectation(() => { return _audioStream.GetBufferFill() < 0.6f; }); | ||
| yield return BufferFillReducedThroughCatchup.Wait(); | ||
| Assert.IsNull(BufferFillReducedThroughCatchup.Error); | ||
|
|
||
| _context.Dispose(); | ||
| } | ||
|
|
||
| [UnityTest, Category("E2E")] | ||
| public IEnumerator AudioBuffer_FillLevelStaysStable() | ||
| { | ||
| yield return SetUp(); | ||
|
|
||
| // Fill buffer above prime within short time | ||
| var bufferFillsAbovePrime = new Expectation(() => { return _audioStream.GetBufferFill() > 0.15f; }, 1f); | ||
| yield return bufferFillsAbovePrime.Wait(); | ||
| Assert.IsNull(bufferFillsAbovePrime.Error); | ||
|
|
||
| // Buffer stays at stable levels over long time | ||
| var elapsedTime = 0f; | ||
| while (elapsedTime < 3f) | ||
| { | ||
| elapsedTime += Time.deltaTime; | ||
| Assert.That(_audioStream.GetBufferFill(), Is.LessThan(0.8f)); | ||
| yield return null; | ||
| } | ||
|
|
||
| _context.Dispose(); | ||
| } | ||
|
|
||
| [UnityTest, Category("E2E")] | ||
| public IEnumerator AudioBuffer_WhenAppForegrounded_BufferIsCleared() | ||
| { | ||
| yield return SetUp(); | ||
|
|
||
| // Backgrounded | ||
| MockApplicationBackgrounded(); | ||
|
|
||
| // Buffer is full at this point | ||
| var BufferFillsInBackgroundExpectation = new Expectation(() => { return _audioStream.GetBufferFill() == 1f; }); | ||
| yield return BufferFillsInBackgroundExpectation.Wait(); | ||
| Assert.IsNull(BufferFillsInBackgroundExpectation.Error); | ||
|
|
||
| // Foregrounded | ||
| MockApplicationForegrounded(); | ||
|
|
||
| // Buffer is cleared | ||
| var fillAfterForegrounded = _audioStream.GetBufferFill(); | ||
| Assert.That(fillAfterForegrounded, Is.EqualTo(0f)); | ||
|
|
||
| // Buffer refills above primed threshold of 30ms, I give it 100ms for that | ||
| var BufferRefillsUntilPrimed = new Expectation(() => { return _audioStream.GetBufferFill() >= 0.15f; }, 0.1f); | ||
| yield return BufferFillsInBackgroundExpectation.Wait(); | ||
| Assert.IsNull(BufferFillsInBackgroundExpectation.Error); | ||
|
|
||
| _context.Dispose(); | ||
| } | ||
|
|
||
| private void MockApplicationBackgrounded() | ||
| { | ||
| _audioSource.Pause(); | ||
| _audioStream.OnApplicationPause(true); | ||
| } | ||
|
|
||
| private void MockApplicationForegrounded() | ||
| { | ||
| _audioSource.UnPause(); | ||
| _audioStream.OnApplicationPause(false); | ||
| } | ||
| } | ||
| } |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it is still 50% for the HIghWatermarkPercent ?