Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions livekit-agents/livekit/agents/voice/audio_recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,7 @@ def clear_user_turn(self) -> None:
self._last_speaking_time = None
self._vad_speech_started = False
self._user_turn_committed = False
self._turn_tracker = _UserTurnTracker()

# end any in-progress user_turn span so the next speech starts a fresh one
if self._user_turn_span is not None and self._user_turn_span.is_recording():
Expand Down
46 changes: 46 additions & 0 deletions tests/test_user_turn_exceeded.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import asyncio

from livekit.agents import Agent, UserTurnExceededEvent
from livekit.agents.voice.transcription.synchronizer import _SyncedAudioOutput

from .fake_session import FakeActions, create_session, run_session

Expand Down Expand Up @@ -113,6 +114,51 @@ async def test_reset_on_agent_speaking() -> None:
assert len(agent.exceeded_events) == 0


async def test_no_accumulation_after_clear_user_turn() -> None:
"""Turns discarded via clear_user_turn() (e.g. push-to-talk cancel) must not
contribute to the next turn's accumulated word count or duration."""
speed = 5.0
agent = _CapturingAgent()

actions = FakeActions()
actions.add_user_speech(0.5, 1.5, "one two three", stt_delay=0.2)
actions.add_llm("reply", ttft=0.1, duration=0.1)
actions.add_tts(0.5, ttfb=0.1, duration=0.1)

session = create_session(
actions,
speed_factor=speed,
turn_handling={"user_turn_limit": {"max_words": 5}},
)

synchronizer = (
session.output.audio._synchronizer
if isinstance(session.output.audio, _SyncedAudioOutput)
else None
)

try:
await session.start(agent)

# turn 1: 3 words accumulate in the tracker via the normal path
recognition = session._activity._audio_recognition # type: ignore[union-attr]
recognition._check_user_turn_limit("one two three")
assert recognition._turn_tracker.words == 3
assert len(agent.exceeded_events) == 0

# discard turn 1 (push-to-talk cancel)
session.clear_user_turn()

# turn 2: 3 more words. With the bug, words would be 6 (>=5) and fire the event.
recognition._check_user_turn_limit("four five six")
assert recognition._turn_tracker.words == 3
assert len(agent.exceeded_events) == 0
finally:
await session.aclose()
if synchronizer is not None:
await synchronizer.aclose()


async def test_accumulation_across_interrupted_turns() -> None:
"""When a user turn completes and the user interrupts the agent before it speaks,
the previous turn is committed to chat context and the exceeded event's
Expand Down
Loading