Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions specs/swml/Methods/ai/ai_languages.tsp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import "@typespec/json-schema";
import "../../Shared/Types/main.tsp";

using TypeSpec.JsonSchema;

Expand Down Expand Up @@ -33,6 +34,22 @@ model LanguagesBase {
#deprecated "The `engine` property is deprecated. Please include the engine in the voice field."
@doc("The engine to use for the language. For example, 'elevenlabs'.")
engine?: string;

@doc("TTS engine-specific parameters for this language.")
params?: LanguageParams;
}

@summary("LanguageParams")
model LanguageParams {
@doc("The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice. IMPORTANT: Only works with ElevenLabs TTS engine.")
@minValue(0.0)
@maxValue(1.0)
stability?: float | SWMLVar = 0.50;

@doc("The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice. IMPORTANT: Only works with ElevenLabs TTS engine.")
@minValue(0.0)
@maxValue(1.0)
similarity?: float | SWMLVar = 0.75;
}

@summary("LanguagesWithSoloFillers")
Expand Down
23 changes: 13 additions & 10 deletions specs/swml/Methods/ai/ai_params.tsp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import "@typespec/json-schema";
import "../../Shared/Types/main.tsp";

using TypeSpec.JsonSchema;

Expand Down Expand Up @@ -190,16 +191,6 @@ model AIParams {
@maxValue(10000)
end_of_speech_timeout?: integer | SWMLVar;

@doc("The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice.")
@minValue(0.01)
@maxValue(1.0)
eleven_labs_stability?: float | SWMLVar;

@doc("The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice.")
@minValue(0.01)
@maxValue(1.0)
eleven_labs_similarity?: float | SWMLVar;

@doc("If `true`, enables usage accounting. The default is `false`.")
enable_accounting?: boolean | SWMLVar;

Expand Down Expand Up @@ -466,5 +457,17 @@ model AIParams {
""")
wake_prefix?: string;

#deprecated "The `eleven_labs_stability` property is deprecated. Please use `languages[].params.stability` instead."
@doc("The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice.")
@minValue(0.0)
@maxValue(1.0)
eleven_labs_stability?: float | SWMLVar = 0.50;

#deprecated "The `eleven_labs_similarity` property is deprecated. Please use `languages[].params.similarity` instead."
@doc("The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice.")
@minValue(0.0)
@maxValue(1.0)
eleven_labs_similarity?: float | SWMLVar = 0.75;

...TypeSpec.Record<unknown>;
}
101 changes: 75 additions & 26 deletions specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json
Original file line number Diff line number Diff line change
Expand Up @@ -3917,32 +3917,6 @@
"maximum": 10000,
"description": "Amount of silence, in ms, at the end of an utterance to detect end of speech. Allowed values from `250` - `10,000`. **Default:** `700` ms (Note: Documentation incorrectly lists 2000ms)."
},
"eleven_labs_stability": {
"anyOf": [
{
"type": "number"
},
{
"$ref": "#/$defs/SWMLVar"
}
],
"minimum": 0.01,
"maximum": 1,
"description": "The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice."
},
"eleven_labs_similarity": {
"anyOf": [
{
"type": "number"
},
{
"$ref": "#/$defs/SWMLVar"
}
],
"minimum": 0.01,
"maximum": 1,
"description": "The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice."
},
"enable_accounting": {
"anyOf": [
{
Expand Down Expand Up @@ -4554,6 +4528,36 @@
"wake_prefix": {
"type": "string",
"description": "Specifies an additional prefix that must be spoken along with the agent's name (`ai_name`)\nto wake the agent from a paused state. For example, if `ai_name` is \"computer\" and\n`wake_prefix` is \"hey\", the user would need to say \"hey computer\" to activate the agent."
},
"eleven_labs_stability": {
"anyOf": [
{
"type": "number"
},
{
"$ref": "#/$defs/SWMLVar"
}
],
"default": 0.5,
"minimum": 0,
"maximum": 1,
"description": "The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice.",
"deprecated": true
},
"eleven_labs_similarity": {
"anyOf": [
{
"type": "number"
},
{
"$ref": "#/$defs/SWMLVar"
}
],
"default": 0.75,
"minimum": 0,
"maximum": 1,
"description": "The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice.",
"deprecated": true
}
},
"unevaluatedProperties": {},
Expand Down Expand Up @@ -5284,6 +5288,10 @@
"description": "The engine to use for the language. For example, 'elevenlabs'.",
"deprecated": true
},
"params": {
"$ref": "#/$defs/LanguageParams",
"description": "TTS engine-specific parameters for this language."
},
"fillers": {
"type": "array",
"items": {
Expand Down Expand Up @@ -5337,6 +5345,10 @@
"description": "The engine to use for the language. For example, 'elevenlabs'.",
"deprecated": true
},
"params": {
"$ref": "#/$defs/LanguageParams",
"description": "TTS engine-specific parameters for this language."
},
"function_fillers": {
"type": "array",
"items": {
Expand Down Expand Up @@ -6423,6 +6435,43 @@
"not": {}
}
},
"LanguageParams": {
"type": "object",
"properties": {
"stability": {
"anyOf": [
{
"type": "number"
},
{
"$ref": "#/$defs/SWMLVar"
}
],
"default": 0.5,
"minimum": 0,
"maximum": 1,
"description": "The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice. IMPORTANT: Only works with ElevenLabs TTS engine."
},
"similarity": {
"anyOf": [
{
"type": "number"
},
{
"$ref": "#/$defs/SWMLVar"
}
],
"default": 0.75,
"minimum": 0,
"maximum": 1,
"description": "The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice. IMPORTANT: Only works with ElevenLabs TTS engine."
}
},
"unevaluatedProperties": {
"not": {}
},
"title": "LanguageParams"
},
"ConversationRole": {
"type": "string",
"enum": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,20 +144,23 @@ languages:

---

### Params
### ElevenLabs Voice Parameters

The `params` parameter is used to define the AI's `eleven_labs_stability` and `eleven_labs_similarity` parameters.
We use ElevenLabs TTS engine-specific parameters to fine-tune Santa's voice. These parameters are configured per-language using `languages[].params`.

The `eleven_labs_stability` parameter is used to define the stability of the AI's voice, while the `eleven_labs_similarity`
parameter is used to define the similarity of the AI's voice to the voice that is defined in the `voice` parameter.
The `stability` parameter controls the stability of the AI's voice, while the `similarity` parameter defines how closely the voice adheres to the original voice characteristics.
This allows us to control the AI's voice and make it more realistic and as close to Santa's voice as possible.

You can learn more about these settings here: [Eleven Labs Documentation](https://elevenlabs.io/docs/speech-synthesis/voice-settings#stability).

```yaml andJson
params:
eleven_labs_stability: 0.1
eleven_labs_similarity: 0.25
languages:
- name: English
code: en-US
voice: elevenlabs.rachel
params:
stability: 0.1
similarity: 0.25
```

---
Expand Down Expand Up @@ -509,15 +512,14 @@ sections:
### Step 6 Continue the conversation, keeping it playful and entertaining.
If another present is requested, gently remind them that only one gift can be chosen.
post_prompt_url: Post Prompt Webhook Here
params:
eleven_labs_stability: 0.1
eleven_labs_similarity: 0.25
languages:
- name: English
code: en-US
voice: gvU4yEv29ZpMc9IXoZcd
engine: elevenlabs
fillers:
voice: elevenlabs.gvU4yEv29ZpMc9IXoZcd
params:
stability: 0.1
similarity: 0.25
speech_fillers:
- one moment please,
- uhh ha,
- aah ha,
Expand Down
35 changes: 20 additions & 15 deletions website/docs/main/swml/reference/methods/ai/ai_params/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ import APIField from "@site/src/components/APIField";
[conscience]: ./conscience.mdx
[hold-music]: ./hold_music.mdx
[interrupt-prompt]: ./interrupt_prompt.mdx
[ai-languages]: ../ai_languages.mdx
[ai-languages]: /swml/methods/ai/languages
[ai-languages-params]: /swml/methods/ai/languages#params
[ai-params]: ./index.mdx
[post-prompt-url]: /swml/methods/ai/post_prompt_url
[get-visual-input]: /swml/methods/ai/swaig/internal_fillers#internal_fillers-parameters
Expand Down Expand Up @@ -329,20 +330,6 @@ Customize the AI agent's voice output, including volume control, voice character
Adjust the volume of the AI. Allowed values from `-50`-`50`.
</APIField>

<APIField
name="params.eleven_labs_similarity"
type="number"
>
The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice. Valid values range from `0.01` to `1.0`.<br /><br />**Important**: This will only works when `elevenlabs` is set in the [`ai.languages.voice`][ai-languages] as the engine id.
</APIField>

<APIField
name="params.eleven_labs_stability"
type="number"
>
The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice. Valid values range from `0.01` to `1.0`.<br /><br />**Important**: This will only works when `elevenlabs` is set in the [`ai.languages.voice`][ai-languages] as the engine id.
</APIField>

<APIField
name="params.max_emotion"
type="integer"
Expand Down Expand Up @@ -388,6 +375,24 @@ Customize the AI agent's voice output, including volume control, voice character
URL of a video file to play when AI is talking. Only works for calls that support video.
</APIField>

<APIField
name="params.eleven_labs_similarity"
type="number"
deprecated={true}
default="0.75"
>
<span className="deprecated-arg">The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice. Valid values range from `0.0` to `1.0`.</span><span className="deprecated-desc">**Deprecated**: Use [`languages[].params.similarity`][ai-languages-params] instead.</span>
</APIField>

<APIField
name="params.eleven_labs_stability"
type="number"
deprecated={true}
default="0.50"
>
<span className="deprecated-arg">The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice. Valid values range from `0.0` to `1.0`.</span><span className="deprecated-desc">**Deprecated**: Use [`languages[].params.stability`][ai-languages-params] instead.</span>
</APIField>

### Interruption & Barge Control

Manage how the AI agent handles interruptions when users speak over it, including when to stop speaking, acknowledge interruptions, or continue regardless.
Expand Down
2 changes: 1 addition & 1 deletion website/docs/main/swml/reference/methods/ai/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ tags: ['swml']
---

[hints]: /swml/methods/ai/hints
[languages]: ./ai_languages.mdx
[languages]: ./languages
[params]: ./ai_params/index.mdx
[post_prompt]: /swml/methods/ai/post_prompt
[post_prompt_url]: /swml/methods/ai/post_prompt_url
Expand Down
Loading