livekit · longcw · May 27, 2026 · May 27, 2026
diff --git a/livekit-agents/livekit/agents/voice/audio_recognition.py b/livekit-agents/livekit/agents/voice/audio_recognition.py
@@ -705,6 +705,7 @@ def clear_user_turn(self) -> None:
         self._last_speaking_time = None
         self._vad_speech_started = False
         self._user_turn_committed = False
+        self._turn_tracker = _UserTurnTracker()
 
         # end any in-progress user_turn span so the next speech starts a fresh one
         if self._user_turn_span is not None and self._user_turn_span.is_recording():

diff --git a/tests/test_user_turn_exceeded.py b/tests/test_user_turn_exceeded.py
@@ -3,6 +3,7 @@
 import asyncio
 
 from livekit.agents import Agent, UserTurnExceededEvent
+from livekit.agents.voice.transcription.synchronizer import _SyncedAudioOutput
 
 from .fake_session import FakeActions, create_session, run_session
 
@@ -113,6 +114,51 @@ async def test_reset_on_agent_speaking() -> None:
     assert len(agent.exceeded_events) == 0
 
 
+async def test_no_accumulation_after_clear_user_turn() -> None:
+    """Turns discarded via clear_user_turn() (e.g. push-to-talk cancel) must not
+    contribute to the next turn's accumulated word count or duration."""
+    speed = 5.0
+    agent = _CapturingAgent()
+
+    actions = FakeActions()
+    actions.add_user_speech(0.5, 1.5, "one two three", stt_delay=0.2)
+    actions.add_llm("reply", ttft=0.1, duration=0.1)
+    actions.add_tts(0.5, ttfb=0.1, duration=0.1)
+
+    session = create_session(
+        actions,
+        speed_factor=speed,
+        turn_handling={"user_turn_limit": {"max_words": 5}},
+    )
+
+    synchronizer = (
+        session.output.audio._synchronizer
+        if isinstance(session.output.audio, _SyncedAudioOutput)
+        else None
+    )
+
+    try:
+        await session.start(agent)
+
+        # turn 1: 3 words accumulate in the tracker via the normal path
+        recognition = session._activity._audio_recognition  # type: ignore[union-attr]
+        recognition._check_user_turn_limit("one two three")
+        assert recognition._turn_tracker.words == 3
+        assert len(agent.exceeded_events) == 0
+
+        # discard turn 1 (push-to-talk cancel)
+        session.clear_user_turn()
+
+        # turn 2: 3 more words. With the bug, words would be 6 (>=5) and fire the event.
+        recognition._check_user_turn_limit("four five six")
+        assert recognition._turn_tracker.words == 3
+        assert len(agent.exceeded_events) == 0
+    finally:
+        await session.aclose()
+        if synchronizer is not None:
+            await synchronizer.aclose()
+
+
 async def test_accumulation_across_interrupted_turns() -> None:
     """When a user turn completes and the user interrupts the agent before it speaks,
     the previous turn is committed to chat context and the exceeded event's