Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions src/agents/realtime/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,19 @@

from .config import RealtimeAudioFormat

PCM16_SAMPLE_RATE_HZ = 24_000
PCM16_SAMPLE_WIDTH_BYTES = 2
G711_SAMPLE_RATE_HZ = 8_000


def calculate_audio_length_ms(format: RealtimeAudioFormat | None, audio_bytes: bytes) -> float:
if format and isinstance(format, str) and format.startswith("g711"):
return (len(audio_bytes) / 8000) * 1000
return (len(audio_bytes) / 24 / 2) * 1000
if not audio_bytes:
return 0.0

normalized_format = format.lower() if isinstance(format, str) else None

if normalized_format and normalized_format.startswith("g711"):
return (len(audio_bytes) / G711_SAMPLE_RATE_HZ) * 1000

samples = len(audio_bytes) / PCM16_SAMPLE_WIDTH_BYTES
return (samples / PCM16_SAMPLE_RATE_HZ) * 1000
19 changes: 12 additions & 7 deletions tests/realtime/test_openai_realtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,23 +698,27 @@ async def test_audio_timing_calculation_accuracy(self, model):
for event in audio_deltas:
await model._handle_ws_event(event)

# Should accumulate audio length: 8 bytes / 24 / 2 * 1000 = milliseconds
# Total: 8 bytes / 24 / 2 * 1000
expected_length = (8 / 24 / 2) * 1000
# Should accumulate audio length: 8 bytes -> 4 samples -> (4 / 24000) * 1000 ≈ 0.167 ms
expected_length = (8 / (24_000 * 2)) * 1000

# Test through the actual audio state tracker
audio_state = model._audio_state_tracker.get_state("item_1", 0)
assert audio_state is not None
assert abs(audio_state.audio_length_ms - expected_length) < 0.001
assert audio_state.audio_length_ms == pytest.approx(expected_length, rel=0, abs=1e-6)

def test_calculate_audio_length_ms_pure_function(self, model):
"""Test the pure audio length calculation function."""
from agents.realtime._util import calculate_audio_length_ms

# Test various audio buffer sizes for pcm16 format
assert calculate_audio_length_ms("pcm16", b"test") == (4 / 24 / 2) * 1000 # 4 bytes
expected_pcm = (len(b"test") / (24_000 * 2)) * 1000
assert calculate_audio_length_ms("pcm16", b"test") == pytest.approx(
expected_pcm, rel=0, abs=1e-6
) # 4 bytes
assert calculate_audio_length_ms("pcm16", b"") == 0 # empty
assert calculate_audio_length_ms("pcm16", b"a" * 48) == 1000.0 # exactly 1000ms worth
assert calculate_audio_length_ms("pcm16", b"a" * 48) == pytest.approx(
(48 / (24_000 * 2)) * 1000, rel=0, abs=1e-6
) # exactly 1ms worth

# Test g711 format
assert calculate_audio_length_ms("g711_ulaw", b"test") == (4 / 8000) * 1000 # 4 bytes
Expand All @@ -741,7 +745,8 @@ async def test_handle_audio_delta_state_management(self, model):
# Test that audio state is tracked correctly
audio_state = model._audio_state_tracker.get_state("test_item", 5)
assert audio_state is not None
assert audio_state.audio_length_ms == (4 / 24 / 2) * 1000 # 4 bytes in milliseconds
expected_ms = (len(b"test") / (24_000 * 2)) * 1000
assert audio_state.audio_length_ms == pytest.approx(expected_ms, rel=0, abs=1e-6)

# Test that last audio item is tracked
last_item = model._audio_state_tracker.get_last_audio_item()
Expand Down
11 changes: 6 additions & 5 deletions tests/realtime/test_playback_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ def test_audio_state_accumulation_across_deltas(self):

state = tracker.get_state("item_1", 0)
assert state is not None
# Should accumulate: 8 bytes / 24 / 2 * 1000 = 166.67ms
expected_length = (8 / 24 / 2) * 1000
assert abs(state.audio_length_ms - expected_length) < 0.01
# Should accumulate: 8 bytes -> 4 samples -> (4 / 24000) * 1000 ≈ 0.167ms
expected_length = (8 / (24_000 * 2)) * 1000
assert state.audio_length_ms == pytest.approx(expected_length, rel=0, abs=1e-6)

def test_state_cleanup_on_interruption(self):
"""Test both trackers properly reset state on interruption."""
Expand Down Expand Up @@ -105,8 +105,9 @@ def test_audio_length_calculation_with_different_formats(self):
# Test PCM format (24kHz, default)
pcm_bytes = b"test" # 4 bytes
pcm_length = calculate_audio_length_ms("pcm16", pcm_bytes)
assert pcm_length == (4 / 24 / 2) * 1000 # ~83.33ms
expected_pcm = (len(pcm_bytes) / (24_000 * 2)) * 1000
assert pcm_length == pytest.approx(expected_pcm, rel=0, abs=1e-6)

# Test None format (defaults to PCM)
none_length = calculate_audio_length_ms(None, pcm_bytes)
assert none_length == pcm_length
assert none_length == pytest.approx(expected_pcm, rel=0, abs=1e-6)
6 changes: 3 additions & 3 deletions tests/realtime/test_playback_tracker_manual_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ def test_playback_tracker_on_play_bytes_and_state():
tr = RealtimePlaybackTracker()
tr.set_audio_format("pcm16") # PCM path

# 48k bytes -> (48000 / 24 / 2) * 1000 = 1,000,000ms per current util
# 48k bytes -> (48000 / (24000 * 2)) * 1000 = 1_000ms
tr.on_play_bytes("item1", 0, b"x" * 48000)
st = tr.get_state()
assert st["current_item_id"] == "item1"
assert st["elapsed_ms"] and abs(st["elapsed_ms"] - 1_000_000.0) < 1e-6
assert st["elapsed_ms"] and abs(st["elapsed_ms"] - 1_000.0) < 1e-6

# Subsequent play on same item accumulates
tr.on_play_ms("item1", 0, 500.0)
st2 = tr.get_state()
assert st2["elapsed_ms"] and abs(st2["elapsed_ms"] - 1_000_500.0) < 1e-6
assert st2["elapsed_ms"] and abs(st2["elapsed_ms"] - 1_500.0) < 1e-6

# Interruption clears state
tr.on_interrupted()
Expand Down