From 32d02a53e7bcf1e714d3639551135a8722d1ae68 Mon Sep 17 00:00:00 2001 From: Lucas Wang Date: Sun, 19 Oct 2025 14:26:14 +0800 Subject: [PATCH] fix: prevent ValueError when audio buffer is empty in STT (fixes #821) Problem: When _turn_audio_buffer is empty, calling np.concatenate([]) in _audio_to_base64() raises: ValueError: need at least one array to concatenate This occurs at line 126 in openai_stt.py when: - Turn ends before audio data arrives (network latency) - Transcript generated without corresponding audio - Audio data loss due to connection issues Fix: Add check for non-empty buffer before encoding: if self._trace_include_sensitive_audio_data and self._turn_audio_buffer: This ensures _audio_to_base64() is only called when there is actual audio data to process. Testing: - Created reproduction test showing the exact error - Created verification test with 5 scenarios: 1. Empty buffer (bug case) - now returns None gracefully 2. Non-empty buffer (normal case) - works as before 3. Tracing disabled - no encoding attempted 4. Empty transcript - early return works 5. Multiple arrays - concatenates correctly - All existing tests pass (37/37) Impact: - No breaking changes - Backward compatible - Only affects empty buffer edge case Generated with Lucas Wang --- src/agents/voice/models/openai_stt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/agents/voice/models/openai_stt.py b/src/agents/voice/models/openai_stt.py index f0255f24b..7ac008428 100644 --- a/src/agents/voice/models/openai_stt.py +++ b/src/agents/voice/models/openai_stt.py @@ -122,7 +122,8 @@ def _end_turn(self, _transcript: str) -> None: return if self._tracing_span: - if self._trace_include_sensitive_audio_data: + # Only encode audio if tracing is enabled AND buffer is not empty + if self._trace_include_sensitive_audio_data and self._turn_audio_buffer: self._tracing_span.span_data.input = _audio_to_base64(self._turn_audio_buffer) self._tracing_span.span_data.input_format = "pcm"