Skip to content
5 changes: 4 additions & 1 deletion src/agents/voice/models/openai_stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,10 @@ async def _handle_events(self) -> None:
break

event_type = event.get("type", "unknown")
if event_type == "input_audio_transcription_completed":
if event_type in [
"input_audio_transcription_completed", # legacy
"conversation.item.input_audio_transcription.completed",
]:
transcript = cast(str, event.get("transcript", ""))
if len(transcript) > 0:
self._end_turn(transcript)
Expand Down
35 changes: 24 additions & 11 deletions tests/voice/test_openai_stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,22 +184,35 @@ async def test_stream_audio_sends_correct_json():


@pytest.mark.asyncio
async def test_transcription_event_puts_output_in_queue():
@pytest.mark.parametrize(
"created,updated,completed",
[
(
{"type": "transcription_session.created"},
{"type": "transcription_session.updated"},
{"type": "input_audio_transcription_completed", "transcript": "Hello world!"},
),
(
{"type": "session.created"},
{"type": "session.updated"},
{
"type": "conversation.item.input_audio_transcription.completed",
"transcript": "Hello world!",
},
),
],
)
async def test_transcription_event_puts_output_in_queue(created, updated, completed):
"""
Test that a 'input_audio_transcription_completed' event
Test that a 'input_audio_transcription_completed' event and
'conversation.item.input_audio_transcription.completed'
yields a transcript from transcribe_turns().
"""
mock_ws = create_mock_websocket(
[
json.dumps({"type": "transcription_session.created"}),
json.dumps({"type": "transcription_session.updated"}),
# Once configured, we mock a completed transcription event:
json.dumps(
{
"type": "input_audio_transcription_completed",
"transcript": "Hello world!",
}
),
json.dumps(created),
json.dumps(updated),
json.dumps(completed),
]
)

Expand Down