From a0c221dd81775914abebfb9cb01e02a050fd4174 Mon Sep 17 00:00:00 2001
From: Lucas Wang <lucas_wang@automodules.com>
Date: Sun, 19 Oct 2025 01:49:24 +0800
Subject: [PATCH] fix: handle odd-length audio chunks in voice streaming (fixes
 #1824)

This change fixes a ValueError that occurred when audio chunks from TTS
providers (e.g., ElevenLabs MP3 streams) had an odd number of bytes.

The issue was in StreamedAudioResult._transform_audio_buffer which used
np.frombuffer with dtype=np.int16. Since int16 requires 2 bytes per element,
buffers with odd byte lengths would cause:
  ValueError: buffer size must be a multiple of element size

Solution:
- Pad the combined buffer with a zero byte if it has odd length
- This ensures the buffer size is always a multiple of 2 bytes
- The padding has minimal audio impact (< 1 sample)

The fix applies to all TTS providers that may produce odd-length chunks,
not just ElevenLabs.

Testing:
- Linting (ruff check) - passed
- Type checking (mypy) - passed
- Formatting (ruff format) - passed

Generated with Lucas Wang<lucas_wang@automodules.com>

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/agents/voice/result.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/agents/voice/result.py b/src/agents/voice/result.py
index fea79902e..e2d24f50c 100644
--- a/src/agents/voice/result.py
+++ b/src/agents/voice/result.py
@@ -88,7 +88,16 @@ async def _add_error(self, error: Exception):
     def _transform_audio_buffer(
         self, buffer: list[bytes], output_dtype: npt.DTypeLike
     ) -> npt.NDArray[np.int16 | np.float32]:
-        np_array = np.frombuffer(b"".join(buffer), dtype=np.int16)
+        # Combine all chunks
+        combined_buffer = b"".join(buffer)
+
+        # Pad with a zero byte if the buffer length is odd
+        # This is needed because np.frombuffer with dtype=np.int16 requires
+        # the buffer size to be a multiple of 2 bytes
+        if len(combined_buffer) % 2 != 0:
+            combined_buffer += b"\x00"
+
+        np_array = np.frombuffer(combined_buffer, dtype=np.int16)
 
         if output_dtype == np.int16:
             return np_array