Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 63 additions & 12 deletions Runtime/Scripts/AudioStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ public sealed class AudioStream : IDisposable
private readonly AudioProbe _probe;
private RingBuffer _buffer;
private short[] _tempBuffer;
private short[] _crossfadeScratch;
private uint _numChannels;
private uint _sampleRate;
private AudioResampler _resampler = new AudioResampler();
Expand All @@ -29,9 +30,15 @@ public sealed class AudioStream : IDisposable
private const float BufferSizeSeconds = 0.2f; // 200ms ring buffer for all platforms
private const float PrimingThresholdSeconds = 0.03f; // Wait for 30ms of data before playing

// Drift correction: skip samples when buffer fills up due to clock drift
private const float HighWaterMarkPercent = 0.50f; // Target 50% fill level after correction
private const float SkipPerCallbackPercent = 0.05f; // Skip 5% of callback samples per call
// Drift correction: skip samples when the buffer fills up due to clock drift.
// HWM at 50% (100ms of 200ms) so normal network jitter does not trip catch-up.
// Cooldown prevents back-to-back skips, which sound like a gravelly click train;
// one occasional skip is inaudible thanks to the crossfade in OnAudioRead.
private const float HighWaterMarkPercent = 0.50f;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is still 50% for the HIghWatermarkPercent ?

private const float SkipPerCallbackPercent = 0.05f;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we'd better re-visit the logic, does skipping 5% of audio noticable ?

if it is noticeable, we'd better re-consider the approach. Like will it work better if we just skip initial 50ms of audio in the buffer so that we will have a continuous audio stream later ? or should we increase the buffer size ?

private const int SkipCooldownCallbacks = 10;
private const int CrossfadeFrames = 128; // ~2.7ms @ 48kHz
private int _skipCooldown = 0;

/// <summary>
/// Creates a new audio stream from a remote audio track, attaching it to the
Expand Down Expand Up @@ -87,11 +94,13 @@ private void OnAudioRead(float[] data, int channels, int sampleRate)
_buffer?.Dispose();
_buffer = new RingBuffer(bufferSize * sizeof(short));
_tempBuffer = new short[data.Length];
_crossfadeScratch = new short[CrossfadeFrames * channels];
_numChannels = (uint)channels;
_sampleRate = (uint)sampleRate;

// Buffer was recreated, need to re-prime
_isPrimed = false;
_skipCooldown = 0;
}

static float S16ToFloat(short v)
Expand Down Expand Up @@ -127,7 +136,7 @@ static float S16ToFloat(short v)
if (valuesAvailableToRead < data.Length)
{
_isPrimed = false;
Utils.Debug($"AudioStream underrun detected, re-priming (got {valuesAvailableToRead} samples)");
Utils.Debug($"AudioStream underrun detected, re-priming (got {valuesAvailableToRead} samples but want to read {data.Length})");

// Output silence immediately instead of playing partial/choppy samples.
// On next frames, the !_isPrimed check above will ensure we wait for 30ms
Expand All @@ -147,21 +156,50 @@ static float S16ToFloat(short v)
// If the buffer is empty or doesn't have enough data, bytesRead will be less than requested.
int samplesRead = bytesRead / sizeof(short);

// Drift correction: if buffer is filling up (producer faster than consumer),
// skip a small number of samples to prevent overflow and keep latency bounded.
// Drift correction: if the buffer is filling up (producer faster than
// consumer), discard a small run of samples and crossfade the output tail
// into the post-skip window so the seam is inaudible. Cooldown spaces
// skips out so we never produce back-to-back artifacts.
if (_skipCooldown > 0)
_skipCooldown--;

int highWaterBytes = (int)(_buffer.Capacity * HighWaterMarkPercent);
int remainingBytes = _buffer.AvailableRead();
if (remainingBytes > highWaterBytes)
if (_skipCooldown == 0 && _buffer.AvailableRead() > highWaterBytes)
{
int skipBytes = (int)(data.Length * sizeof(short) * SkipPerCallbackPercent);
int frameSize = channels * sizeof(short);
int skipBytes = (int)(data.Length * sizeof(short) * SkipPerCallbackPercent);
skipBytes -= skipBytes % frameSize; // align to frame boundary

if (skipBytes > 0)
int crossfadeShorts = CrossfadeFrames * channels;
int crossfadeBytes = crossfadeShorts * sizeof(short);

// Only skip if we still have enough data left to fill the crossfade
// window; never trade a catch-up artifact for an underrun artifact.
if (skipBytes > 0 && _buffer.AvailableRead() >= skipBytes + crossfadeBytes)
{
_buffer.SkipRead(skipBytes);

var postBytes = MemoryMarshal.Cast<short, byte>(_crossfadeScratch.AsSpan().Slice(0, crossfadeShorts));
_buffer.Read(postBytes);

// Linearly crossfade the last crossfadeFrames frames of _tempBuffer
// with the post-skip samples: the step discontinuity becomes a
// short linear ramp that is continuous with the next callback.
int tailStart = samplesRead - crossfadeShorts;
if (tailStart >= 0)
{
for (int i = 0; i < crossfadeShorts; i++)
{
int frameIdx = i / channels;
float t = (frameIdx + 1) / (float)CrossfadeFrames;
short pre = _tempBuffer[tailStart + i];
short post = _crossfadeScratch[i];
_tempBuffer[tailStart + i] = (short)(pre * (1f - t) + post * t);
}
_skipCooldown = SkipCooldownCallbacks;
}
}
}
}

// Clear the entire output buffer to silence, then fill with the samples
// we successfully read from the ring buffer.
Expand All @@ -174,7 +212,7 @@ static float S16ToFloat(short v)
}

// Called when application goes to background or returns to foreground
private void OnApplicationPause(bool pause)
internal void OnApplicationPause(bool pause)
{
if (_disposed)
return;
Expand Down Expand Up @@ -266,6 +304,7 @@ private void Dispose(bool disposing)
_buffer?.Dispose();
_buffer = null;
_tempBuffer = null;
_crossfadeScratch = null;
_resampler?.Dispose();
_resampler = null;
Handle.Dispose();
Expand All @@ -278,5 +317,17 @@ private void Dispose(bool disposing)
{
Dispose(false);
}

// For testing and debugging
internal float GetBufferFill()
{
lock(_lock)
{
if (_buffer == null)
return 0;
return _buffer.AvailableReadInPercent();
}

}
}
}
164 changes: 164 additions & 0 deletions Tests/PlayMode/AudioBufferTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
using System.Collections;
using UnityEngine.TestTools;
using LiveKit.PlayModeTests.Utils;
using LiveKit.Proto;
using NUnit.Framework;
using UnityEngine;

namespace LiveKit.PlayModeTests
{
class AudioBufferTests
{
const string AudioTrackName = "test-audio-track";
static TrackPublishOptions AudioOptions() =>
new TrackPublishOptions { Source = TrackSource.SourceMicrophone };


AudioStream _audioStream;
AudioSource _audioSource;

TestRoomContext _context;

private IEnumerator SetUp()
{
var sineWaveAudioSource = new SineWaveAudioSource();

var sender = TestRoomContext.ConnectionOptions.Default;
sender.Identity = "sender";
var receiver = TestRoomContext.ConnectionOptions.Default;
receiver.Identity = "receiver";

_context = new TestRoomContext(new[] { sender, receiver });
var senderRoom = _context.Rooms[0];
var subscriberRoom = _context.Rooms[1];

var audioTrack = LocalAudioTrack.CreateAudioTrack(AudioTrackName, sineWaveAudioSource, senderRoom);

yield return _context.ConnectAll();
Assert.IsNull(_context.ConnectionError, _context.ConnectionError);

var publishAudioTrack = senderRoom.LocalParticipant.PublishTrack(audioTrack, AudioOptions());

var subscribedExp = new Expectation(timeoutSeconds: 10);
RemoteAudioTrack subscribedTrack = null;
subscriberRoom.TrackSubscribed += (remoteTrack, publication, participant) =>
{
if (remoteTrack is RemoteAudioTrack remoteAudioTrack)
{
subscribedTrack = remoteAudioTrack;
subscribedExp.Fulfill();
}
};

yield return publishAudioTrack;
Assert.IsFalse(publishAudioTrack.IsError);

sineWaveAudioSource.Start();
yield return subscribedExp.Wait();
Assert.IsNull(subscribedExp.Error);

// We need an AudioListener in the scene in order for an AudioThread to read from sources
var listenerGO = new GameObject("LatencyTestAudioListener");
var audioListener = listenerGO.AddComponent<AudioListener>();

// This GO will be hooked up with the AudioStream
var receiverGO = new GameObject("LatencyTestReceiver");
_audioSource = receiverGO.AddComponent<AudioSource>();

// AudioStream constructor adds AudioProbe and starts playback
_audioStream = new AudioStream(subscribedTrack, _audioSource);

// Init buffer with one audio frame read
var readOneAudioFrame = new Expectation( () => { return _audioStream.GetBufferFill() != 0f; } );
yield return readOneAudioFrame.Wait();
}

[UnityTest, Category("E2E")]
public IEnumerator AudioBuffer_WhenBufferRunsFull_TriggersCatchupMechanism()
{
yield return SetUp();

// Backgrounded
MockApplicationBackgrounded();

// Buffer is full at this point
var BufferFillsInBackgroundExpectation = new Expectation(() => { return _audioStream.GetBufferFill() == 1f; });
yield return BufferFillsInBackgroundExpectation.Wait();
Assert.IsNull(BufferFillsInBackgroundExpectation.Error);

Debug.Log("Buffer is filled");

// Resume consumption without foregrounding event to prevent full buffer clear
_audioSource.UnPause();

// Buffer fill is quickly reduced due to catchup logic
var BufferFillReducedThroughCatchup = new Expectation(() => { return _audioStream.GetBufferFill() < 0.6f; });
yield return BufferFillReducedThroughCatchup.Wait();
Assert.IsNull(BufferFillReducedThroughCatchup.Error);

_context.Dispose();
}

[UnityTest, Category("E2E")]
public IEnumerator AudioBuffer_FillLevelStaysStable()
{
yield return SetUp();

// Fill buffer above prime within short time
var bufferFillsAbovePrime = new Expectation(() => { return _audioStream.GetBufferFill() > 0.15f; }, 1f);
yield return bufferFillsAbovePrime.Wait();
Assert.IsNull(bufferFillsAbovePrime.Error);

// Buffer stays at stable levels over long time
var elapsedTime = 0f;
while (elapsedTime < 3f)
{
elapsedTime += Time.deltaTime;
Assert.That(_audioStream.GetBufferFill(), Is.LessThan(0.8f));
yield return null;
}

_context.Dispose();
}

[UnityTest, Category("E2E")]
public IEnumerator AudioBuffer_WhenAppForegrounded_BufferIsCleared()
{
yield return SetUp();

// Backgrounded
MockApplicationBackgrounded();

// Buffer is full at this point
var BufferFillsInBackgroundExpectation = new Expectation(() => { return _audioStream.GetBufferFill() == 1f; });
yield return BufferFillsInBackgroundExpectation.Wait();
Assert.IsNull(BufferFillsInBackgroundExpectation.Error);

// Foregrounded
MockApplicationForegrounded();

// Buffer is cleared
var fillAfterForegrounded = _audioStream.GetBufferFill();
Assert.That(fillAfterForegrounded, Is.EqualTo(0f));

// Buffer refills above primed threshold of 30ms, I give it 100ms for that
var BufferRefillsUntilPrimed = new Expectation(() => { return _audioStream.GetBufferFill() >= 0.15f; }, 0.1f);
yield return BufferFillsInBackgroundExpectation.Wait();
Assert.IsNull(BufferFillsInBackgroundExpectation.Error);

_context.Dispose();
}

private void MockApplicationBackgrounded()
{
_audioSource.Pause();
_audioStream.OnApplicationPause(true);
}

private void MockApplicationForegrounded()
{
_audioSource.UnPause();
_audioStream.OnApplicationPause(false);
}
}
}
11 changes: 11 additions & 0 deletions Tests/PlayMode/AudioBufferTests.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading