From 1800cfd28d18b95882c821c1633f11c71cf2826b Mon Sep 17 00:00:00 2001 From: James Walker Date: Tue, 4 Nov 2025 11:53:46 +0000 Subject: [PATCH 1/4] Update Audio Events with multichannel --- docs/speech-to-text/realtime/realtime_diarization.mdx | 2 +- docs/speech-to-text/realtime/sidebar.ts | 2 +- spec/realtime.yaml | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/speech-to-text/realtime/realtime_diarization.mdx b/docs/speech-to-text/realtime/realtime_diarization.mdx index fd5b57d..c14b51b 100644 --- a/docs/speech-to-text/realtime/realtime_diarization.mdx +++ b/docs/speech-to-text/realtime/realtime_diarization.mdx @@ -170,7 +170,7 @@ Transcripts are returned independently for each channel, with the `channel` prop :::warning The `channel` property will be returned for `AddTranscript` and `AddPartialTranscript` messages only. -Features such as [audio events](/speech-to-text/features/audio-events), [translation](/speech-to-text/features/translation) and [end of turn detection](/speech-to-text/realtime/end-of-turn) do not currently include this property. To request this feature, please contact [support](https://support.speechmatics.com). +The [translation](/speech-to-text/features/translation) feature does not currently include this property. To request this feature, please contact [support](https://support.speechmatics.com). ::: ### Channel and speaker diarization diff --git a/docs/speech-to-text/realtime/sidebar.ts b/docs/speech-to-text/realtime/sidebar.ts index 4d3fd93..2bd29cb 100644 --- a/docs/speech-to-text/realtime/sidebar.ts +++ b/docs/speech-to-text/realtime/sidebar.ts @@ -28,7 +28,7 @@ export default { }, { type: "doc", - id: "speech-to-text/realtime/end-of-turn", + id: "speech-to-text/realtime/end-of-utterance", }, { type: "category", diff --git a/spec/realtime.yaml b/spec/realtime.yaml index 1d3cdb2..1603405 100644 --- a/spec/realtime.yaml +++ b/spec/realtime.yaml @@ -565,6 +565,9 @@ components: const: AudioEventStarted event: $ref: "#/components/schemas/AudioEventStartData" + channel: + type: string + description: The channel identifier to which the audio belongs. This field is only seen in multichannel. required: - message - event @@ -575,6 +578,9 @@ components: const: AudioEventEnded event: $ref: "#/components/schemas/AudioEventEndData" + channel: + type: string + description: The channel identifier to which the audio belongs. This field is only seen in multichannel. required: - message - event From 922898de7f1edeb3cd5edd28e134a2ec03a14e3f Mon Sep 17 00:00:00 2001 From: J-Jaywalker <103967935+J-Jaywalker@users.noreply.github.com> Date: Wed, 5 Nov 2025 10:34:30 +0000 Subject: [PATCH 2/4] Update docs/speech-to-text/realtime/sidebar.ts Co-authored-by: Tudor Evans <104087420+TudorCRL@users.noreply.github.com> --- docs/speech-to-text/realtime/sidebar.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/speech-to-text/realtime/sidebar.ts b/docs/speech-to-text/realtime/sidebar.ts index 2bd29cb..4d3fd93 100644 --- a/docs/speech-to-text/realtime/sidebar.ts +++ b/docs/speech-to-text/realtime/sidebar.ts @@ -28,7 +28,7 @@ export default { }, { type: "doc", - id: "speech-to-text/realtime/end-of-utterance", + id: "speech-to-text/realtime/end-of-turn", }, { type: "category", From 991b43d623489116fca0c7205848c18c036b1080 Mon Sep 17 00:00:00 2001 From: James Walker Date: Wed, 5 Nov 2025 10:45:43 +0000 Subject: [PATCH 3/4] Fix sidebar.ts --- docs/speech-to-text/realtime/sidebar.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/speech-to-text/realtime/sidebar.ts b/docs/speech-to-text/realtime/sidebar.ts index 4d3fd93..2bd29cb 100644 --- a/docs/speech-to-text/realtime/sidebar.ts +++ b/docs/speech-to-text/realtime/sidebar.ts @@ -28,7 +28,7 @@ export default { }, { type: "doc", - id: "speech-to-text/realtime/end-of-turn", + id: "speech-to-text/realtime/end-of-utterance", }, { type: "category", From 6e8686f5b9808301625d0f892f4b610d33602e42 Mon Sep 17 00:00:00 2001 From: James Walker Date: Thu, 6 Nov 2025 10:40:48 +0000 Subject: [PATCH 4/4] Try fixing broken links --- docs/deployments/index.md | 2 +- docs/speech-to-text/realtime/guides/python-using-microphone.mdx | 2 +- spec/realtime.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/deployments/index.md b/docs/deployments/index.md index c75855b..7498c74 100644 --- a/docs/deployments/index.md +++ b/docs/deployments/index.md @@ -37,7 +37,7 @@ Feature availability varies depending on the deployment method you choose. Below | [Custom Dictionary](/speech-to-text/features/custom-dictionary) | Batch, Realtime | SaaS, On-prem | | [Diarization](/speech-to-text/features/diarization) | Batch, Realtime | SaaS, On-prem | | [Disfluencies and Word Replacement](/speech-to-text/formatting#disfluencies) | Batch, Realtime | SaaS, On-prem | -| [End-of-Turn](/speech-to-text/realtime/end-of-turn) | Realtime | SaaS, On-prem | +| [End-of-Turn](/speech-to-text/realtime/end-of-utterance) | Realtime | SaaS, On-prem | | [Feature Discovery](/speech-to-text/features/feature-discovery) | Batch, Realtime | SaaS | | [Fetch URL](/speech-to-text/batch/input#fetch-url) | Batch | SaaS, On-Prem | | [Language Identification](/speech-to-text/batch/language-identification) | Batch | SaaS | diff --git a/docs/speech-to-text/realtime/guides/python-using-microphone.mdx b/docs/speech-to-text/realtime/guides/python-using-microphone.mdx index 2f41f89..1fe8c8e 100644 --- a/docs/speech-to-text/realtime/guides/python-using-microphone.mdx +++ b/docs/speech-to-text/realtime/guides/python-using-microphone.mdx @@ -50,6 +50,6 @@ pip3 install pyaudio ## Enhanced Voice AI Features -For building voice AI applications, translation systems, or dictation software with microphone input, consider enabling [End of Utterance Detection](/speech-to-text/realtime/end-of-turn#end-of-utterance-configuration). This feature detects when users finish speaking by monitoring silence periods, enabling natural turn-taking and responsive voice applications. +For building voice AI applications, translation systems, or dictation software with microphone input, consider enabling [End of Utterance Detection](/speech-to-text/realtime/end-of-utterance#end-of-utterance-configuration). This feature detects when users finish speaking by monitoring silence periods, enabling natural turn-taking and responsive voice applications. The end-of-utterance feature works perfectly with microphone streaming and can be added to the configuration above by including `conversation_config` with a silence threshold parameter. \ No newline at end of file diff --git a/spec/realtime.yaml b/spec/realtime.yaml index 1603405..0b3baad 100644 --- a/spec/realtime.yaml +++ b/spec/realtime.yaml @@ -188,7 +188,7 @@ components: EndOfUtterance: summary: | Indicates the end of an utterance, triggered by a configurable period of non-speech. - The message is sent when no speech has been detected for a short period of time, configurable by the `end_of_utterance_silence_trigger` parameter in `conversation_config` (see [End Of Utterance](https://docs.speechmatics.com/speech-to-text/realtime/end-of-turn#end-of-utterance-configuration)). + The message is sent when no speech has been detected for a short period of time, configurable by the `end_of_utterance_silence_trigger` parameter in `conversation_config` (see [End Of Utterance](https://docs.speechmatics.com/speech-to-text/realtime/end-of-utterance#end-of-utterance-configuration)). Like punctuation, an `EndOfUtterance` has zero duration. payload: