From f627e41b217d9e00b16afbdf65b289da12d121c3 Mon Sep 17 00:00:00 2001 From: Alexandros Pappas Date: Sun, 2 Mar 2025 21:11:15 +0100 Subject: [PATCH 1/6] feat: Implement ElevenLabs Text-to-Speech This commit introduces support for ElevenLabs Text-to-Speech (TTS) service within the Spring AI framework. **Key Changes:** - **New Model Module:** Added `spring-ai-elevenlabs` module for ElevenLabs integration. - **Core Classes:** - `ElevenLabsTextToSpeechModel`: Implements `TextToSpeechModel` and `StreamingTextToSpeechModel` for interacting with the ElevenLabs API. - `ElevenLabsTextToSpeechOptions`: Configuration options for the ElevenLabs TTS service. - `ElevenLabsApi`: Low-level client for interacting with the ElevenLabs API. - `ElevenLabsVoicesApi`: client for the elevenLabs Voices API - `Speech`, `TextToSpeechMessage`, `TextToSpeechPrompt`, `TextToSpeechResponse`: Data transfer objects. - **Auto-configuration:** - `ElevenLabsAutoConfiguration`: Spring Boot auto-configuration for easy setup. - `ElevenLabsConnectionProperties`: Configuration properties for ElevenLabs connection. - `ElevenLabsSpeechProperties`: Configuration properties for default TTS settings. - **API Clients:** Provides `ElevenLabsApi` for direct interaction with the ElevenLabs API. Also provides a `ElevenLabsVoicesApi`. - **Tests:** Includes comprehensive unit and integration tests. - **Documentation:** Added documentation to the Spring AI reference guide, including examples. **Functionality:** - **Text-to-Speech Conversion:** Allows users to convert text input into audio using ElevenLabs' high-quality voices. - **Streaming Support:** Supports real-time audio streaming, enabling immediate playback as audio is generated. - **Configurable Options:** Provides flexible configuration options for voice selection, output format, speed, stability, and more. - **Spring Boot Starter:** Includes a Spring Boot starter (`spring-ai-elevenlabs-spring-boot-starter`) for simplified dependency management and auto-configuration. **Notes:** - The classes defnined on tts package will be moved to core-package, along with any required refactoring needed to support OpenAi speech api. Signed-off-by: Alexandros Pappas --- models/spring-ai-elevenlabs/README.md | 3 + models/spring-ai-elevenlabs/pom.xml | 87 + .../ElevenLabsTextToSpeechModel.java | 210 +++ .../ElevenLabsTextToSpeechOptions.java | 408 +++++ .../aot/ElevenLabsRuntimeHints.java | 44 + .../ai/elevenlabs/api/ElevenLabsApi.java | 389 +++++ .../elevenlabs/api/ElevenLabsVoicesApi.java | 452 +++++ .../tts/DefaultTextToSpeechOptions.java | 147 ++ .../ai/elevenlabs/tts/Speech.java | 67 + .../tts/StreamingTextToSpeechModel.java | 45 + .../elevenlabs/tts/TextToSpeechMessage.java | 58 + .../ai/elevenlabs/tts/TextToSpeechModel.java | 42 + .../elevenlabs/tts/TextToSpeechOptions.java | 114 ++ .../ai/elevenlabs/tts/TextToSpeechPrompt.java | 84 + .../elevenlabs/tts/TextToSpeechResponse.java | 78 + .../resources/META-INF/spring/aot.factories | 2 + .../ElevenLabsTestConfiguration.java | 58 + .../ElevenLabsTextToSpeechModelIT.java | 80 + .../ElevenLabsTextToSpeechOptionsTests.java | 231 +++ .../ai/elevenlabs/api/ElevenLabsApiIT.java | 220 +++ .../elevenlabs/api/ElevenLabsVoicesApiIT.java | 112 ++ .../tts/DefaultTextToSpeechOptionsTests.java | 67 + .../src/test/resources/voices.json | 1482 +++++++++++++++++ pom.xml | 6 +- .../modules/ROOT/pages/api/audio/speech.adoc | 8 +- .../api/audio/speech/elevenlabs-speech.adoc | 241 +++ spring-ai-spring-boot-autoconfigure/pom.xml | 0 .../ElevenLabsAutoConfiguration.java | 79 + .../ElevenLabsConnectionProperties.java | 58 + .../ElevenLabsSpeechProperties.java | 68 + ...ot.autoconfigure.AutoConfiguration.imports | 0 .../ElevenLabsAutoConfigurationIT.java | 85 + .../elevenlabs/ElevenLabsPropertiesTests.java | 141 ++ .../spring-ai-starter-elevenlabs/pom.xml | 44 + 34 files changed, 5207 insertions(+), 3 deletions(-) create mode 100644 models/spring-ai-elevenlabs/README.md create mode 100644 models/spring-ai-elevenlabs/pom.xml create mode 100644 models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java create mode 100644 models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptions.java create mode 100644 models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/aot/ElevenLabsRuntimeHints.java create mode 100644 models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsApi.java create mode 100644 models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsVoicesApi.java create mode 100644 models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/DefaultTextToSpeechOptions.java create mode 100644 models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/Speech.java create mode 100644 models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/StreamingTextToSpeechModel.java create mode 100644 models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechMessage.java create mode 100644 models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechModel.java create mode 100644 models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechOptions.java create mode 100644 models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechPrompt.java create mode 100644 models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechResponse.java create mode 100644 models/spring-ai-elevenlabs/src/main/resources/META-INF/spring/aot.factories create mode 100644 models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTestConfiguration.java create mode 100644 models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModelIT.java create mode 100644 models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptionsTests.java create mode 100644 models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsApiIT.java create mode 100644 models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsVoicesApiIT.java create mode 100644 models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/tts/DefaultTextToSpeechOptionsTests.java create mode 100644 models/spring-ai-elevenlabs/src/test/resources/voices.json create mode 100644 spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc create mode 100644 spring-ai-spring-boot-autoconfigure/pom.xml create mode 100644 spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsAutoConfiguration.java create mode 100644 spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsConnectionProperties.java create mode 100644 spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsSpeechProperties.java create mode 100644 spring-ai-spring-boot-autoconfigure/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports create mode 100644 spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsAutoConfigurationIT.java create mode 100644 spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsPropertiesTests.java create mode 100644 spring-ai-spring-boot-starters/spring-ai-starter-elevenlabs/pom.xml diff --git a/models/spring-ai-elevenlabs/README.md b/models/spring-ai-elevenlabs/README.md new file mode 100644 index 00000000000..b7149d0b6f3 --- /dev/null +++ b/models/spring-ai-elevenlabs/README.md @@ -0,0 +1,3 @@ +# Spring AI - ElevenLabs Text-to-Speech + +[ElevenLabs Text-to-Speech Documentation](https://docs.spring.io/spring-ai/reference/api/audio/speech/elevenlabs-speech.html) \ No newline at end of file diff --git a/models/spring-ai-elevenlabs/pom.xml b/models/spring-ai-elevenlabs/pom.xml new file mode 100644 index 00000000000..ef036f30766 --- /dev/null +++ b/models/spring-ai-elevenlabs/pom.xml @@ -0,0 +1,87 @@ + + + 4.0.0 + + org.springframework.ai + spring-ai + 1.0.0-SNAPSHOT + ../../pom.xml + + + spring-ai-elevenlabs + jar + Spring AI Model - ElevenLabs + ElevenLabs Text-to-Speech model support + https://github.com/spring-projects/spring-ai + + + https://github.com/spring-projects/spring-ai + git://github.com/spring-projects/spring-ai.git + git@github.com:spring-projects/spring-ai.git + + + + + + + + + + + org.springframework.ai + spring-ai-core + ${project.parent.version} + + + + org.springframework.ai + spring-ai-retry + ${project.parent.version} + + + + io.rest-assured + json-path + + + + org.springframework + spring-context-support + + + + org.slf4j + slf4j-api + + + + + org.springframework.ai + spring-ai-test + ${project.version} + test + + + + io.micrometer + micrometer-observation-test + test + + + + com.fasterxml.jackson.dataformat + jackson-dataformat-xml + 2.11.1 + test + + + + io.projectreactor + reactor-test + test + + + + diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java new file mode 100644 index 00000000000..99b72fc67f5 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java @@ -0,0 +1,210 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs; + +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import reactor.core.publisher.Flux; + +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.ai.elevenlabs.tts.Speech; +import org.springframework.ai.elevenlabs.tts.StreamingTextToSpeechModel; +import org.springframework.ai.elevenlabs.tts.TextToSpeechModel; +import org.springframework.ai.elevenlabs.tts.TextToSpeechPrompt; +import org.springframework.ai.elevenlabs.tts.TextToSpeechResponse; +import org.springframework.ai.retry.RetryUtils; +import org.springframework.retry.support.RetryTemplate; +import org.springframework.util.Assert; +import org.springframework.util.LinkedMultiValueMap; +import org.springframework.util.MultiValueMap; + +/** + * Implementation of the {@link TextToSpeechModel} and {@link StreamingTextToSpeechModel} + * interfaces + * + * @author Alexandros Pappas + */ +public class ElevenLabsTextToSpeechModel implements TextToSpeechModel, StreamingTextToSpeechModel { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final ElevenLabsApi elevenLabsApi; + + private final RetryTemplate retryTemplate; + + private final ElevenLabsTextToSpeechOptions defaultOptions; + + public ElevenLabsTextToSpeechModel(ElevenLabsApi elevenLabsApi, ElevenLabsTextToSpeechOptions defaultOptions) { + this(elevenLabsApi, defaultOptions, RetryUtils.DEFAULT_RETRY_TEMPLATE); + } + + public ElevenLabsTextToSpeechModel(ElevenLabsApi elevenLabsApi, ElevenLabsTextToSpeechOptions defaultOptions, + RetryTemplate retryTemplate) { + Assert.notNull(elevenLabsApi, "ElevenLabsApi must not be null"); + Assert.notNull(defaultOptions, "ElevenLabsSpeechOptions must not be null"); + Assert.notNull(retryTemplate, "RetryTemplate must not be null"); + + this.elevenLabsApi = elevenLabsApi; + this.defaultOptions = defaultOptions; + this.retryTemplate = retryTemplate; + } + + public static Builder builder() { + return new Builder(); + } + + @Override + public TextToSpeechResponse call(TextToSpeechPrompt prompt) { + ElevenLabsApi.SpeechRequest request = createRequest(prompt); + String voiceId = getOptions(prompt).getVoice(); + + MultiValueMap queryParameters = new LinkedMultiValueMap<>(); + if (getOptions(prompt).getEnableLogging() != null) { + queryParameters.add("enable_logging", getOptions(prompt).getEnableLogging().toString()); + } + if (getOptions(prompt).getFormat() != null) { + queryParameters.add("output_format", getOptions(prompt).getFormat()); + } + + byte[] audioData = retryTemplate.execute(context -> { + var response = elevenLabsApi.textToSpeech(request, voiceId, queryParameters); + if (response.getBody() == null) { + logger.warn("No speech response returned for request: {}", request); + return new byte[0]; + } + return response.getBody(); + }); + + return new TextToSpeechResponse(List.of(new Speech(audioData))); + } + + @Override + public Flux stream(TextToSpeechPrompt prompt) { + ElevenLabsApi.SpeechRequest request = createRequest(prompt); + String voiceId = getOptions(prompt).getVoice(); + + MultiValueMap queryParameters = new LinkedMultiValueMap<>(); + if (getOptions(prompt).getEnableLogging() != null) { + queryParameters.add("enable_logging", getOptions(prompt).getEnableLogging().toString()); + } + if (getOptions(prompt).getFormat() != null) { + queryParameters.add("output_format", getOptions(prompt).getFormat()); + } + + return retryTemplate.execute(context -> elevenLabsApi.textToSpeechStream(request, voiceId, queryParameters) + .map(entity -> new TextToSpeechResponse(List.of(new Speech(entity.getBody()))))); + } + + private ElevenLabsApi.SpeechRequest createRequest(TextToSpeechPrompt prompt) { + ElevenLabsTextToSpeechOptions options = getOptions(prompt); + + String voiceId = options.getVoice(); + Assert.notNull(voiceId, "A voiceId must be specified in the ElevenLabsSpeechOptions."); + + String text = prompt.getInstructions().getText(); + Assert.hasText(text, "Prompt must contain text to convert to speech."); + + return ElevenLabsApi.SpeechRequest.builder() + .text(text) + .modelId(options.getModelId()) + .voiceSettings(options.getVoiceSettings()) + .languageCode(options.getLanguageCode()) + .pronunciationDictionaryLocators(options.getPronunciationDictionaryLocators()) + .seed(options.getSeed()) + .previousText(options.getPreviousText()) + .nextText(options.getNextText()) + .previousRequestIds(options.getPreviousRequestIds()) + .nextRequestIds(options.getNextRequestIds()) + .usePvcAsIvc(options.getUsePvcAsIvc()) + .applyTextNormalization(options.getApplyTextNormalization()) + .build(); + } + + private ElevenLabsTextToSpeechOptions getOptions(TextToSpeechPrompt prompt) { + ElevenLabsTextToSpeechOptions runtimeOptions = (prompt + .getOptions() instanceof ElevenLabsTextToSpeechOptions elevenLabsSpeechOptions) ? elevenLabsSpeechOptions + : null; + return (runtimeOptions != null) ? merge(runtimeOptions, this.defaultOptions) : this.defaultOptions; + } + + private ElevenLabsTextToSpeechOptions merge(ElevenLabsTextToSpeechOptions runtimeOptions, + ElevenLabsTextToSpeechOptions defaultOptions) { + return ElevenLabsTextToSpeechOptions.builder() + .modelId(getOrDefault(runtimeOptions.getModelId(), defaultOptions.getModelId())) + .voice(getOrDefault(runtimeOptions.getVoice(), defaultOptions.getVoice())) + .voiceId(getOrDefault(runtimeOptions.getVoiceId(), defaultOptions.getVoiceId())) + .format(getOrDefault(runtimeOptions.getFormat(), defaultOptions.getFormat())) + .outputFormat(getOrDefault(runtimeOptions.getOutputFormat(), defaultOptions.getOutputFormat())) + .voiceSettings(getOrDefault(runtimeOptions.getVoiceSettings(), defaultOptions.getVoiceSettings())) + .languageCode(getOrDefault(runtimeOptions.getLanguageCode(), defaultOptions.getLanguageCode())) + .pronunciationDictionaryLocators(getOrDefault(runtimeOptions.getPronunciationDictionaryLocators(), + defaultOptions.getPronunciationDictionaryLocators())) + .seed(getOrDefault(runtimeOptions.getSeed(), defaultOptions.getSeed())) + .previousText(getOrDefault(runtimeOptions.getPreviousText(), defaultOptions.getPreviousText())) + .nextText(getOrDefault(runtimeOptions.getNextText(), defaultOptions.getNextText())) + .previousRequestIds( + getOrDefault(runtimeOptions.getPreviousRequestIds(), defaultOptions.getPreviousRequestIds())) + .nextRequestIds(getOrDefault(runtimeOptions.getNextRequestIds(), defaultOptions.getNextRequestIds())) + .usePvcAsIvc(getOrDefault(runtimeOptions.getUsePvcAsIvc(), defaultOptions.getUsePvcAsIvc())) + .applyTextNormalization(getOrDefault(runtimeOptions.getApplyTextNormalization(), + defaultOptions.getApplyTextNormalization())) + .build(); + } + + private T getOrDefault(T runtimeValue, T defaultValue) { + return runtimeValue != null ? runtimeValue : defaultValue; + } + + @Override + public ElevenLabsTextToSpeechOptions getDefaultOptions() { + return this.defaultOptions; + } + + public static class Builder { + + private ElevenLabsApi elevenLabsApi; + + private RetryTemplate retryTemplate = RetryUtils.DEFAULT_RETRY_TEMPLATE; + + private ElevenLabsTextToSpeechOptions defaultOptions = ElevenLabsTextToSpeechOptions.builder().build(); + + public Builder elevenLabsApi(ElevenLabsApi elevenLabsApi) { + this.elevenLabsApi = elevenLabsApi; + return this; + } + + public Builder retryTemplate(RetryTemplate retryTemplate) { + this.retryTemplate = retryTemplate; + return this; + } + + public Builder defaultOptions(ElevenLabsTextToSpeechOptions defaultOptions) { + this.defaultOptions = defaultOptions; + return this; + } + + public ElevenLabsTextToSpeechModel build() { + Assert.notNull(elevenLabsApi, "ElevenLabsApi must not be null"); + Assert.notNull(defaultOptions, "ElevenLabsSpeechOptions must not be null"); + return new ElevenLabsTextToSpeechModel(elevenLabsApi, defaultOptions, retryTemplate); + } + + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptions.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptions.java new file mode 100644 index 00000000000..d35c8128558 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptions.java @@ -0,0 +1,408 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs; + +import java.util.List; +import java.util.Objects; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; + +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.ai.elevenlabs.tts.TextToSpeechOptions; + +/** + * Options for ElevenLabs text-to-speech. + * + * @author Alexandros Pappas + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class ElevenLabsTextToSpeechOptions implements TextToSpeechOptions { + + @JsonProperty("model_id") + private String modelId; + + // Path Params + @JsonProperty("voice_id") + private String voiceId; + + // End Path Params + + // Query Params + @JsonProperty("enable_logging") + private Boolean enableLogging; + + @JsonProperty("output_format") + private String outputFormat; + + // End Query Params + + @JsonProperty("voice_settings") + private ElevenLabsApi.SpeechRequest.VoiceSettings voiceSettings; + + @JsonProperty("language_code") + private String languageCode; + + @JsonProperty("pronunciation_dictionary_locators") + private List pronunciationDictionaryLocators; + + @JsonProperty("seed") + private Integer seed; + + @JsonProperty("previous_text") + private String previousText; + + @JsonProperty("next_text") + private String nextText; + + @JsonProperty("previous_request_ids") + private List previousRequestIds; + + @JsonProperty("next_request_ids") + private List nextRequestIds; + + @JsonProperty("use_pvc_as_ivc") + private Boolean usePvcAsIvc; + + @JsonProperty("apply_text_normalization") + private ElevenLabsApi.SpeechRequest.TextNormalizationMode applyTextNormalization; + + public static Builder builder() { + return new ElevenLabsTextToSpeechOptions.Builder(); + } + + @Override + @JsonIgnore + public String getModel() { + return getModelId(); + } + + @JsonIgnore + public void setModel(String model) { + setModelId(model); + } + + public String getModelId() { + return this.modelId; + } + + public void setModelId(String modelId) { + this.modelId = modelId; + } + + @Override + @JsonIgnore + public String getVoice() { + return getVoiceId(); + } + + @JsonIgnore + public void setVoice(String voice) { + setVoiceId(voice); + } + + public String getVoiceId() { + return this.voiceId; + } + + public void setVoiceId(String voiceId) { + this.voiceId = voiceId; + } + + public Boolean getEnableLogging() { + return this.enableLogging; + } + + public void setEnableLogging(Boolean enableLogging) { + this.enableLogging = enableLogging; + } + + @Override + @JsonIgnore + public String getFormat() { + return getOutputFormat(); + } + + @JsonIgnore + public void setFormat(String format) { + setOutputFormat(format); + } + + public String getOutputFormat() { + return this.outputFormat; + } + + public void setOutputFormat(String outputFormat) { + this.outputFormat = outputFormat; + } + + @Override + @JsonIgnore + public Double getSpeed() { + if (this.getVoiceSettings() != null) { + return this.getVoiceSettings().speed(); + } + return null; + } + + @JsonIgnore + public void setSpeed(Double speed) { + if (speed != null) { + if (this.getVoiceSettings() == null) { + this.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(null, null, null, null, speed)); + } + else { + this.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(this.getVoiceSettings().stability(), + this.getVoiceSettings().similarityBoost(), this.getVoiceSettings().style(), + this.getVoiceSettings().useSpeakerBoost(), speed)); + } + } + else { + if (this.getVoiceSettings() != null) { + this.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(this.getVoiceSettings().stability(), + this.getVoiceSettings().similarityBoost(), this.getVoiceSettings().style(), + this.getVoiceSettings().useSpeakerBoost(), null)); + } + } + } + + public ElevenLabsApi.SpeechRequest.VoiceSettings getVoiceSettings() { + return this.voiceSettings; + } + + public void setVoiceSettings(ElevenLabsApi.SpeechRequest.VoiceSettings voiceSettings) { + this.voiceSettings = voiceSettings; + } + + public String getLanguageCode() { + return this.languageCode; + } + + public void setLanguageCode(String languageCode) { + this.languageCode = languageCode; + } + + public List getPronunciationDictionaryLocators() { + return this.pronunciationDictionaryLocators; + } + + public void setPronunciationDictionaryLocators( + List pronunciationDictionaryLocators) { + this.pronunciationDictionaryLocators = pronunciationDictionaryLocators; + } + + public Integer getSeed() { + return this.seed; + } + + public void setSeed(Integer seed) { + this.seed = seed; + } + + public String getPreviousText() { + return this.previousText; + } + + public void setPreviousText(String previousText) { + this.previousText = previousText; + } + + public String getNextText() { + return this.nextText; + } + + public void setNextText(String nextText) { + this.nextText = nextText; + } + + public List getPreviousRequestIds() { + return this.previousRequestIds; + } + + public void setPreviousRequestIds(List previousRequestIds) { + this.previousRequestIds = previousRequestIds; + } + + public List getNextRequestIds() { + return this.nextRequestIds; + } + + public void setNextRequestIds(List nextRequestIds) { + this.nextRequestIds = nextRequestIds; + } + + public Boolean getUsePvcAsIvc() { + return this.usePvcAsIvc; + } + + public void setUsePvcAsIvc(Boolean usePvcAsIvc) { + this.usePvcAsIvc = usePvcAsIvc; + } + + public ElevenLabsApi.SpeechRequest.TextNormalizationMode getApplyTextNormalization() { + return this.applyTextNormalization; + } + + public void setApplyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizationMode applyTextNormalization) { + this.applyTextNormalization = applyTextNormalization; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof ElevenLabsTextToSpeechOptions that)) + return false; + return Objects.equals(modelId, that.modelId) && Objects.equals(voiceId, that.voiceId) + && Objects.equals(outputFormat, that.outputFormat) && Objects.equals(voiceSettings, that.voiceSettings) + && Objects.equals(languageCode, that.languageCode) + && Objects.equals(pronunciationDictionaryLocators, that.pronunciationDictionaryLocators) + && Objects.equals(seed, that.seed) && Objects.equals(previousText, that.previousText) + && Objects.equals(nextText, that.nextText) + && Objects.equals(previousRequestIds, that.previousRequestIds) + && Objects.equals(nextRequestIds, that.nextRequestIds) && Objects.equals(usePvcAsIvc, that.usePvcAsIvc) + && Objects.equals(applyTextNormalization, that.applyTextNormalization); + } + + @Override + public int hashCode() { + return Objects.hash(modelId, voiceId, outputFormat, voiceSettings, languageCode, + pronunciationDictionaryLocators, seed, previousText, nextText, previousRequestIds, nextRequestIds, + usePvcAsIvc, applyTextNormalization); + } + + @Override + public String toString() { + return "ElevenLabsSpeechOptions{" + "modelId='" + modelId + '\'' + ", voiceId='" + voiceId + '\'' + + ", outputFormat='" + outputFormat + '\'' + ", voiceSettings=" + voiceSettings + ", languageCode='" + + languageCode + '\'' + ", pronunciationDictionaryLocators=" + pronunciationDictionaryLocators + + ", seed=" + seed + ", previousText='" + previousText + '\'' + ", nextText='" + nextText + '\'' + + ", previousRequestIds=" + previousRequestIds + ", nextRequestIds=" + nextRequestIds + ", usePvcAsIvc=" + + usePvcAsIvc + ", applyTextNormalization=" + applyTextNormalization + '}'; + } + + @Override + @SuppressWarnings("unchecked") + public ElevenLabsTextToSpeechOptions copy() { + return ElevenLabsTextToSpeechOptions.builder() + .modelId(this.getModelId()) + .voice(this.getVoice()) + .voiceId(this.getVoiceId()) + .format(this.getFormat()) + .outputFormat(this.getOutputFormat()) + .voiceSettings(this.getVoiceSettings()) + .languageCode(this.getLanguageCode()) + .pronunciationDictionaryLocators(this.getPronunciationDictionaryLocators()) + .seed(this.getSeed()) + .previousText(this.getPreviousText()) + .nextText(this.getNextText()) + .previousRequestIds(this.getPreviousRequestIds()) + .nextRequestIds(this.getNextRequestIds()) + .usePvcAsIvc(this.getUsePvcAsIvc()) + .applyTextNormalization(this.getApplyTextNormalization()) + .build(); + } + + public static class Builder { + + private final ElevenLabsTextToSpeechOptions options = new ElevenLabsTextToSpeechOptions(); + + public Builder modelId(String modelId) { + options.setModelId(modelId); + return this; + } + + public Builder voice(String voice) { + options.setVoice(voice); + return this; + } + + public Builder voiceId(String voiceId) { + options.setVoiceId(voiceId); + return this; + } + + public Builder format(String format) { + options.setFormat(format); + return this; + } + + public Builder outputFormat(String outputFormat) { + options.setOutputFormat(outputFormat); + return this; + } + + public Builder voiceSettings(ElevenLabsApi.SpeechRequest.VoiceSettings voiceSettings) { + options.setVoiceSettings(voiceSettings); + return this; + } + + public Builder languageCode(String languageCode) { + options.setLanguageCode(languageCode); + return this; + } + + public Builder pronunciationDictionaryLocators( + List pronunciationDictionaryLocators) { + options.setPronunciationDictionaryLocators(pronunciationDictionaryLocators); + return this; + } + + public Builder seed(Integer seed) { + options.setSeed(seed); + return this; + } + + public Builder previousText(String previousText) { + options.setPreviousText(previousText); + return this; + } + + public Builder nextText(String nextText) { + options.setNextText(nextText); + return this; + } + + public Builder previousRequestIds(List previousRequestIds) { + options.setPreviousRequestIds(previousRequestIds); + return this; + } + + public Builder nextRequestIds(List nextRequestIds) { + options.setNextRequestIds(nextRequestIds); + return this; + } + + public Builder usePvcAsIvc(Boolean usePvcAsIvc) { + options.setUsePvcAsIvc(usePvcAsIvc); + return this; + } + + public Builder applyTextNormalization( + ElevenLabsApi.SpeechRequest.TextNormalizationMode applyTextNormalization) { + options.setApplyTextNormalization(applyTextNormalization); + return this; + } + + public ElevenLabsTextToSpeechOptions build() { + return this.options; + } + + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/aot/ElevenLabsRuntimeHints.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/aot/ElevenLabsRuntimeHints.java new file mode 100644 index 00000000000..143969c270a --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/aot/ElevenLabsRuntimeHints.java @@ -0,0 +1,44 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.aot; + +import static org.springframework.ai.aot.AiRuntimeHints.findJsonAnnotatedClassesInPackage; + +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.aot.hint.MemberCategory; +import org.springframework.aot.hint.RuntimeHints; +import org.springframework.aot.hint.RuntimeHintsRegistrar; +import org.springframework.lang.NonNull; +import org.springframework.lang.Nullable; + +/** + * The ElevenLabsRuntimeHints class is responsible for registering runtime hints for + * ElevenLabs API classes. + * + * @author Alexandros Pappas + */ +public class ElevenLabsRuntimeHints implements RuntimeHintsRegistrar { + + @Override + public void registerHints(@NonNull RuntimeHints hints, @Nullable ClassLoader classLoader) { + var mcs = MemberCategory.values(); + for (var tr : findJsonAnnotatedClassesInPackage(ElevenLabsApi.class)) { + hints.reflection().registerType(tr, mcs); + } + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsApi.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsApi.java new file mode 100644 index 00000000000..af749b8181b --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsApi.java @@ -0,0 +1,389 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.api; + +import java.util.List; +import java.util.function.Consumer; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonValue; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +import org.springframework.ai.model.ApiKey; +import org.springframework.ai.model.NoopApiKey; +import org.springframework.ai.model.SimpleApiKey; +import org.springframework.ai.retry.RetryUtils; +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.util.Assert; +import org.springframework.util.LinkedMultiValueMap; +import org.springframework.util.MultiValueMap; +import org.springframework.web.client.ResponseErrorHandler; +import org.springframework.web.client.RestClient; +import org.springframework.web.reactive.function.client.WebClient; +import org.springframework.web.util.UriComponentsBuilder; + +/** + * Client for the ElevenLabs Text-to-Speech API. + * + * @author Alexandros Pappas + */ +public class ElevenLabsApi { + + public static final String DEFAULT_BASE_URL = "https://api.elevenlabs.io"; + + private final RestClient restClient; + + private final WebClient webClient; + + /** + * Create a new ElevenLabs API client. + * @param baseUrl The base URL for the ElevenLabs API. + * @param apiKey Your ElevenLabs API key. + * @param headers the http headers to use. + * @param restClientBuilder A builder for the Spring RestClient. + * @param webClientBuilder A builder for the Spring WebClient. + * @param responseErrorHandler A custom error handler for API responses. + */ + public ElevenLabsApi(String baseUrl, ApiKey apiKey, MultiValueMap headers, + RestClient.Builder restClientBuilder, WebClient.Builder webClientBuilder, + ResponseErrorHandler responseErrorHandler) { + + Consumer jsonContentHeaders = h -> { + if (!(apiKey instanceof NoopApiKey)) { + h.set("xi-api-key", apiKey.getValue()); + } + h.addAll(headers); + h.setContentType(MediaType.APPLICATION_JSON); + }; + + this.restClient = restClientBuilder.baseUrl(baseUrl) + .defaultHeaders(jsonContentHeaders) + .defaultStatusHandler(responseErrorHandler) + .build(); + + this.webClient = webClientBuilder.baseUrl(baseUrl).defaultHeaders(jsonContentHeaders).build(); + } + + public static Builder builder() { + return new Builder(); + } + + /** + * Convert text to speech using the specified voice and parameters. + * @param requestBody The request body containing text, model, and voice settings. + * @param voiceId The ID of the voice to use. Must not be null. + * @param queryParameters Additional query parameters for the API call. + * @return A ResponseEntity containing the generated audio as a byte array. + */ + public ResponseEntity textToSpeech(SpeechRequest requestBody, String voiceId, + MultiValueMap queryParameters) { + + Assert.notNull(voiceId, "voiceId must be provided. It cannot be null."); + Assert.notNull(requestBody, "requestBody can not be null."); + Assert.hasText(requestBody.text(), "requestBody.text must be provided. It cannot be null or empty."); + + UriComponentsBuilder uriBuilder = UriComponentsBuilder.fromPath("/v1/text-to-speech/{voice_id}") + .queryParams(queryParameters); + + return this.restClient.post() + .uri(uriBuilder.buildAndExpand(voiceId).toUriString()) + .body(requestBody) + .retrieve() + .toEntity(byte[].class); + } + + /** + * Convert text to speech using the specified voice and parameters, streaming the + * results. + * @param requestBody The request body containing text, model, and voice settings. + * @param voiceId The ID of the voice to use. Must not be null. + * @param queryParameters Additional query parameters for the API call. + * @return A Flux of ResponseEntity containing the generated audio chunks as byte + * arrays. + */ + public Flux> textToSpeechStream(SpeechRequest requestBody, String voiceId, + MultiValueMap queryParameters) { + Assert.notNull(voiceId, "voiceId must be provided for streaming. It cannot be null."); + Assert.notNull(requestBody, "requestBody can not be null."); + Assert.hasText(requestBody.text(), "requestBody.text must be provided. It cannot be null or empty."); + + UriComponentsBuilder uriBuilder = UriComponentsBuilder.fromPath("/v1/text-to-speech/{voice_id}/stream") + .queryParams(queryParameters); + + return this.webClient.post() + .uri(uriBuilder.buildAndExpand(voiceId).toUriString()) + .body(Mono.just(requestBody), SpeechRequest.class) + .accept(MediaType.APPLICATION_OCTET_STREAM) + .exchangeToFlux(clientResponse -> { + HttpHeaders headers = clientResponse.headers().asHttpHeaders(); + return clientResponse.bodyToFlux(byte[].class) + .map(bytes -> ResponseEntity.ok().headers(headers).body(bytes)); + }); + } + + /** + * The output format of the generated audio. + */ + public enum OutputFormat { + + MP3_22050_32("mp3_22050_32"), MP3_44100_32("mp3_44100_32"), MP3_44100_64("mp3_44100_64"), + MP3_44100_96("mp3_44100_96"), MP3_44100_128("mp3_44100_128"), MP3_44100_192("mp3_44100_192"), + PCM_16000("pcm_16000"), PCM_22050("pcm_22050"), PCM_24000("pcm_24000"), PCM_44100("pcm_44100"), + ULAW_8000("ulaw_8000"); + + private final String value; + + OutputFormat(String value) { + this.value = value; + } + + public String getValue() { + return this.value; + } + + } + + /** + * Represents a request to the ElevenLabs Text-to-Speech API. + */ + @JsonInclude(JsonInclude.Include.NON_NULL) + public record SpeechRequest(@JsonProperty("text") String text, @JsonProperty("model_id") String modelId, + @JsonProperty("language_code") String languageCode, + @JsonProperty("voice_settings") VoiceSettings voiceSettings, + @JsonProperty("pronunciation_dictionary_locators") List pronunciationDictionaryLocators, + @JsonProperty("seed") Integer seed, @JsonProperty("previous_text") String previousText, + @JsonProperty("next_text") String nextText, + @JsonProperty("previous_request_ids") List previousRequestIds, + @JsonProperty("next_request_ids") List nextRequestIds, + @JsonProperty("use_pvc_as_ivc") Boolean usePvcAsIvc, + @JsonProperty("apply_text_normalization") TextNormalizationMode applyTextNormalization) { + + public static Builder builder() { + return new Builder(); + } + + /** + * Text normalization mode. + */ + public enum TextNormalizationMode { + + @JsonProperty("auto") + AUTO("auto"), @JsonProperty("on") + ON("on"), @JsonProperty("off") + OFF("off"); + + public final String value; + + TextNormalizationMode(String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return this.value; + } + + } + + /** + * Voice settings to override defaults for the given voice. + */ + @JsonInclude(JsonInclude.Include.NON_NULL) + public record VoiceSettings(@JsonProperty("stability") Double stability, + @JsonProperty("similarity_boost") Double similarityBoost, @JsonProperty("style") Double style, + @JsonProperty("use_speaker_boost") Boolean useSpeakerBoost, @JsonProperty("speed") Double speed) { + } + + /** + * Locator for a pronunciation dictionary. + */ + @JsonInclude(JsonInclude.Include.NON_NULL) + public record PronunciationDictionaryLocator( + @JsonProperty("pronunciation_dictionary_id") String pronunciationDictionaryId, + @JsonProperty("version_id") String versionId) { + } + + public static class Builder { + + private String text; + + private String modelId; + + private String languageCode; + + private VoiceSettings voiceSettings; + + private List pronunciationDictionaryLocators; + + private Integer seed; + + private String previousText; + + private String nextText; + + private List previousRequestIds; + + private List nextRequestIds; + + private Boolean usePvcAsIvc; + + private TextNormalizationMode applyTextNormalization; + + public Builder text(String text) { + this.text = text; + return this; + } + + public Builder modelId(String modelId) { + this.modelId = modelId; + return this; + } + + public Builder languageCode(String languageCode) { + this.languageCode = languageCode; + return this; + } + + public Builder voiceSettings(VoiceSettings voiceSettings) { + this.voiceSettings = voiceSettings; + return this; + } + + public Builder pronunciationDictionaryLocators( + List pronunciationDictionaryLocators) { + this.pronunciationDictionaryLocators = pronunciationDictionaryLocators; + return this; + } + + public Builder seed(Integer seed) { + this.seed = seed; + return this; + } + + public Builder previousText(String previousText) { + this.previousText = previousText; + return this; + } + + public Builder nextText(String nextText) { + this.nextText = nextText; + return this; + } + + public Builder previousRequestIds(List previousRequestIds) { + this.previousRequestIds = previousRequestIds; + return this; + } + + public Builder nextRequestIds(List nextRequestIds) { + this.nextRequestIds = nextRequestIds; + return this; + } + + public Builder usePvcAsIvc(Boolean usePvcAsIvc) { + this.usePvcAsIvc = usePvcAsIvc; + return this; + } + + public Builder applyTextNormalization(TextNormalizationMode applyTextNormalization) { + this.applyTextNormalization = applyTextNormalization; + return this; + } + + public SpeechRequest build() { + Assert.hasText(text, "text must not be empty"); + return new SpeechRequest(text, modelId, languageCode, voiceSettings, pronunciationDictionaryLocators, + seed, previousText, nextText, previousRequestIds, nextRequestIds, usePvcAsIvc, + applyTextNormalization); + } + + } + + } + + /** + * Builder to construct {@link ElevenLabsApi} instance. + */ + public static class Builder { + + private String baseUrl = DEFAULT_BASE_URL; + + private ApiKey apiKey; + + private MultiValueMap headers = new LinkedMultiValueMap<>(); + + private RestClient.Builder restClientBuilder = RestClient.builder(); + + private WebClient.Builder webClientBuilder = WebClient.builder(); + + private ResponseErrorHandler responseErrorHandler = RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER; + + public Builder baseUrl(String baseUrl) { + Assert.hasText(baseUrl, "baseUrl cannot be null or empty"); + this.baseUrl = baseUrl; + return this; + } + + public Builder apiKey(ApiKey apiKey) { + Assert.notNull(apiKey, "apiKey cannot be null"); + this.apiKey = apiKey; + return this; + } + + public Builder apiKey(String simpleApiKey) { + Assert.notNull(simpleApiKey, "simpleApiKey cannot be null"); + this.apiKey = new SimpleApiKey(simpleApiKey); + return this; + } + + public Builder headers(MultiValueMap headers) { + Assert.notNull(headers, "headers cannot be null"); + this.headers = headers; + return this; + } + + public Builder restClientBuilder(RestClient.Builder restClientBuilder) { + Assert.notNull(restClientBuilder, "restClientBuilder cannot be null"); + this.restClientBuilder = restClientBuilder; + return this; + } + + public Builder webClientBuilder(WebClient.Builder webClientBuilder) { + Assert.notNull(webClientBuilder, "webClientBuilder cannot be null"); + this.webClientBuilder = webClientBuilder; + return this; + } + + public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler) { + Assert.notNull(responseErrorHandler, "responseErrorHandler cannot be null"); + this.responseErrorHandler = responseErrorHandler; + return this; + } + + public ElevenLabsApi build() { + Assert.notNull(this.apiKey, "apiKey must be set"); + return new ElevenLabsApi(this.baseUrl, this.apiKey, this.headers, this.restClientBuilder, + this.webClientBuilder, this.responseErrorHandler); + } + + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsVoicesApi.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsVoicesApi.java new file mode 100644 index 00000000000..51df40c6d4f --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsVoicesApi.java @@ -0,0 +1,452 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.api; + +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonValue; + +import org.springframework.ai.model.ApiKey; +import org.springframework.ai.model.NoopApiKey; +import org.springframework.ai.model.SimpleApiKey; +import org.springframework.ai.retry.RetryUtils; +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.util.Assert; +import org.springframework.util.LinkedMultiValueMap; +import org.springframework.util.MultiValueMap; +import org.springframework.web.client.ResponseErrorHandler; +import org.springframework.web.client.RestClient; + +/** + * Client for the ElevenLabs Voices API. + * + * @author Alexandros Pappas + */ +public class ElevenLabsVoicesApi { + + private static final String DEFAULT_BASE_URL = "https://api.elevenlabs.io"; + + private final RestClient restClient; + + /** + * Create a new ElevenLabs Voices API client. + * @param baseUrl The base URL for the ElevenLabs API. + * @param apiKey Your ElevenLabs API key. + * @param headers the http headers to use. + * @param restClientBuilder A builder for the Spring RestClient. + * @param responseErrorHandler A custom error handler for API responses. + */ + public ElevenLabsVoicesApi(String baseUrl, ApiKey apiKey, MultiValueMap headers, + RestClient.Builder restClientBuilder, ResponseErrorHandler responseErrorHandler) { + Consumer jsonContentHeaders = h -> { + if (!(apiKey instanceof NoopApiKey)) { + h.set("xi-api-key", apiKey.getValue()); + } + h.addAll(headers); + h.setContentType(MediaType.APPLICATION_JSON); + }; + + this.restClient = restClientBuilder.baseUrl(baseUrl) + .defaultHeaders(jsonContentHeaders) + .defaultStatusHandler(responseErrorHandler) + .build(); + + } + + public static Builder builder() { + return new Builder(); + } + + /** + * Retrieves a list of all available voices from the ElevenLabs API. + * @return A ResponseEntity containing a Voices object, which contains the list of + * voices. + */ + public ResponseEntity getVoices() { + return this.restClient.get().uri("/v1/voices").retrieve().toEntity(Voices.class); + } + + /** + * Gets the default settings for voices. "similarity_boost" corresponds to ”Clarity + + * Similarity Enhancement” in the web app and "stability" corresponds to "Stability" + * slider in the web app. + * @return {@link ResponseEntity} containing the {@link VoiceSettings} record. + */ + public ResponseEntity getDefaultVoiceSettings() { + return this.restClient.get().uri("/v1/voices/settings/default").retrieve().toEntity(VoiceSettings.class); + } + + /** + * Returns the settings for a specific voice. "similarity_boost" corresponds to + * "Clarity + Similarity Enhancement" in the web app and "stability" corresponds to + * the "Stability" slider in the web app. + * @param voiceId The ID of the voice to get settings for. Required. + * @return {@link ResponseEntity} containing the {@link VoiceSettings} record. + */ + public ResponseEntity getVoiceSettings(String voiceId) { + Assert.hasText(voiceId, "voiceId cannot be null or empty"); + return this.restClient.get() + .uri("/v1/voices/{voiceId}/settings", voiceId) + .retrieve() + .toEntity(VoiceSettings.class); + } + + /** + * Returns metadata about a specific voice. + * @param voiceId ID of the voice to be used. You can use the Get voices endpoint list + * all the available voices. Required. + * @return {@link ResponseEntity} containing the {@link Voice} record. + */ + public ResponseEntity getVoice(String voiceId) { + Assert.hasText(voiceId, "voiceId cannot be null or empty"); + return this.restClient.get().uri("/v1/voices/{voiceId}", voiceId).retrieve().toEntity(Voice.class); + } + + public enum CategoryEnum { + + @JsonProperty("generated") + GENERATED("generated"), @JsonProperty("cloned") + CLONED("cloned"), @JsonProperty("premade") + PREMADE("premade"), @JsonProperty("professional") + PROFESSIONAL("professional"), @JsonProperty("famous") + FAMOUS("famous"), @JsonProperty("high_quality") + HIGH_QUALITY("high_quality"); + + public final String value; + + CategoryEnum(String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return this.value; + } + + } + + public enum SafetyControlEnum { + + @JsonProperty("NONE") + NONE("NONE"), @JsonProperty("BAN") + BAN("BAN"), @JsonProperty("CAPTCHA") + CAPTCHA("CAPTCHA"), @JsonProperty("CAPTCHA_AND_MODERATION") + CAPTCHA_AND_MODERATION("CAPTCHA_AND_MODERATION"), @JsonProperty("ENTERPRISE_BAN") + ENTERPRISE_BAN("ENTERPRISE_BAN"), @JsonProperty("ENTERPRISE_CAPTCHA") + ENTERPRISE_CAPTCHA("ENTERPRISE_CAPTCHA"); + + public final String value; + + SafetyControlEnum(String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return this.value; + } + + } + + /** + * Represents the response from the /v1/voices endpoint. + * + * @param voices A list of Voice objects representing the available voices. + */ + @JsonInclude(JsonInclude.Include.NON_NULL) + public record Voices(@JsonProperty("voices") List voices) { + } + + /** + * Represents a single voice from the ElevenLabs API. + */ + @JsonInclude(JsonInclude.Include.NON_NULL) + public record Voice(@JsonProperty("voice_id") String voiceId, @JsonProperty("name") String name, + @JsonProperty("samples") List samples, @JsonProperty("category") CategoryEnum category, + @JsonProperty("fine_tuning") FineTuning fineTuning, @JsonProperty("labels") Map labels, + @JsonProperty("description") String description, @JsonProperty("preview_url") String previewUrl, + @JsonProperty("available_for_tiers") List availableForTiers, + @JsonProperty("settings") VoiceSettings settings, @JsonProperty("sharing") VoiceSharing sharing, + @JsonProperty("high_quality_base_model_ids") List highQualityBaseModelIds, + @JsonProperty("verified_languages") List verifiedLanguages, + @JsonProperty("safety_control") SafetyControlEnum safetyControl, + @JsonProperty("voice_verification") VoiceVerification voiceVerification, + @JsonProperty("permission_on_resource") String permissionOnResource, + @JsonProperty("is_owner") Boolean isOwner, @JsonProperty("is_legacy") Boolean isLegacy, + @JsonProperty("is_mixed") Boolean isMixed, @JsonProperty("created_at_unix") Integer createdAtUnix) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record Sample(@JsonProperty("sample_id") String sampleId, @JsonProperty("file_name") String fileName, + @JsonProperty("mime_type") String mimeType, @JsonProperty("size_bytes") Integer sizeBytes, + @JsonProperty("hash") String hash) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record FineTuning(@JsonProperty("is_allowed_to_fine_tune") Boolean isAllowedToFineTune, + @JsonProperty("state") Map state, + @JsonProperty("verification_failures") List verificationFailures, + @JsonProperty("verification_attempts_count") Integer verificationAttemptsCount, + @JsonProperty("manual_verification_requested") Boolean manualVerificationRequested, + @JsonProperty("language") String language, @JsonProperty("progress") Map progress, + @JsonProperty("message") Map message, + @JsonProperty("dataset_duration_seconds") Double datasetDurationSeconds, + @JsonProperty("verification_attempts") List verificationAttempts, + @JsonProperty("slice_ids") List sliceIds, + @JsonProperty("manual_verification") ManualVerification manualVerification, + @JsonProperty("max_verification_attempts") Integer maxVerificationAttempts, + @JsonProperty("next_max_verification_attempts_reset_unix_ms") Long nextMaxVerificationAttemptsResetUnixMs) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record VoiceVerification(@JsonProperty("requires_verification") Boolean requiresVerification, + @JsonProperty("is_verified") Boolean isVerified, + @JsonProperty("verification_failures") List verificationFailures, + @JsonProperty("verification_attempts_count") Integer verificationAttemptsCount, + @JsonProperty("language") String language, + @JsonProperty("verification_attempts") List verificationAttempts) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record VerificationAttempt(@JsonProperty("text") String text, @JsonProperty("date_unix") Integer dateUnix, + @JsonProperty("accepted") Boolean accepted, @JsonProperty("similarity") Double similarity, + @JsonProperty("levenshtein_distance") Double levenshteinDistance, + @JsonProperty("recording") Recording recording) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record Recording(@JsonProperty("recording_id") String recordingId, + @JsonProperty("mime_type") String mimeType, @JsonProperty("size_bytes") Integer sizeBytes, + @JsonProperty("upload_date_unix") Integer uploadDateUnix, + @JsonProperty("transcription") String transcription) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record ManualVerification(@JsonProperty("extra_text") String extraText, + @JsonProperty("request_time_unix") Integer requestTimeUnix, + @JsonProperty("files") List files) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record ManualVerificationFile(@JsonProperty("file_id") String fileId, + @JsonProperty("file_name") String fileName, @JsonProperty("mime_type") String mimeType, + @JsonProperty("size_bytes") Integer sizeBytes, @JsonProperty("upload_date_unix") Integer uploadDateUnix) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record VoiceSettings(@JsonProperty("stability") Double stability, + @JsonProperty("similarity_boost") Double similarityBoost, @JsonProperty("style") Double style, + @JsonProperty("use_speaker_boost") Boolean useSpeakerBoost, @JsonProperty("speed") Double speed) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record VoiceSharing(@JsonProperty("status") StatusEnum status, + @JsonProperty("history_item_sample_id") String historyItemSampleId, + @JsonProperty("date_unix") Integer dateUnix, + @JsonProperty("whitelisted_emails") List whitelistedEmails, + @JsonProperty("public_owner_id") String publicOwnerId, + @JsonProperty("original_voice_id") String originalVoiceId, + @JsonProperty("financial_rewards_enabled") Boolean financialRewardsEnabled, + @JsonProperty("free_users_allowed") Boolean freeUsersAllowed, + @JsonProperty("live_moderation_enabled") Boolean liveModerationEnabled, @JsonProperty("rate") Double rate, + @JsonProperty("notice_period") Integer noticePeriod, @JsonProperty("disable_at_unix") Integer disableAtUnix, + @JsonProperty("voice_mixing_allowed") Boolean voiceMixingAllowed, + @JsonProperty("featured") Boolean featured, @JsonProperty("category") CategoryEnum category, + @JsonProperty("reader_app_enabled") Boolean readerAppEnabled, @JsonProperty("image_url") String imageUrl, + @JsonProperty("ban_reason") String banReason, @JsonProperty("liked_by_count") Integer likedByCount, + @JsonProperty("cloned_by_count") Integer clonedByCount, @JsonProperty("name") String name, + @JsonProperty("description") String description, @JsonProperty("labels") Map labels, + @JsonProperty("review_status") ReviewStatusEnum reviewStatus, + @JsonProperty("review_message") String reviewMessage, + @JsonProperty("enabled_in_library") Boolean enabledInLibrary, + @JsonProperty("instagram_username") String instagramUsername, + @JsonProperty("twitter_username") String twitterUsername, + @JsonProperty("youtube_username") String youtubeUsername, + @JsonProperty("tiktok_username") String tiktokUsername, + @JsonProperty("moderation_check") VoiceSharingModerationCheck moderationCheck, + @JsonProperty("reader_restricted_on") List readerRestrictedOn) { + public enum StatusEnum { + + @JsonProperty("enabled") + ENABLED("enabled"), @JsonProperty("disabled") + DISABLED("disabled"), @JsonProperty("copied") + COPIED("copied"), @JsonProperty("copied_disabled") + COPIED_DISABLED("copied_disabled"); + + public final String value; + + StatusEnum(String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return this.value; + } + + } + + public enum CategoryEnum { + + @JsonProperty("generated") + GENERATED("generated"), @JsonProperty("professional") + PROFESSIONAL("professional"), @JsonProperty("high_quality") + HIGH_QUALITY("high_quality"), @JsonProperty("famous") + FAMOUS("famous"); + + public final String value; + + CategoryEnum(String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return this.value; + } + + } + + public enum ReviewStatusEnum { + + @JsonProperty("not_requested") + NOT_REQUESTED("not_requested"), @JsonProperty("pending") + PENDING("pending"), @JsonProperty("declined") + DECLINED("declined"), @JsonProperty("allowed") + ALLOWED("allowed"), @JsonProperty("allowed_with_changes") + ALLOWED_WITH_CHANGES("allowed_with_changes"); + + public final String value; + + ReviewStatusEnum(String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return this.value; + } + + } + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record VoiceSharingModerationCheck(@JsonProperty("date_checked_unix") Integer dateCheckedUnix, + @JsonProperty("name_value") String nameValue, @JsonProperty("name_check") Boolean nameCheck, + @JsonProperty("description_value") String descriptionValue, + @JsonProperty("description_check") Boolean descriptionCheck, + @JsonProperty("sample_ids") List sampleIds, + @JsonProperty("sample_checks") List sampleChecks, + @JsonProperty("captcha_ids") List captchaIds, + @JsonProperty("captcha_checks") List captchaChecks) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record ReaderResource(@JsonProperty("resource_type") ResourceTypeEnum resourceType, + @JsonProperty("resource_id") String resourceId) { + + public enum ResourceTypeEnum { + + @JsonProperty("read") + READ("read"), @JsonProperty("collection") + COLLECTION("collection"); + + public final String value; + + ResourceTypeEnum(String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return this.value; + } + + } + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record VerifiedVoiceLanguage(@JsonProperty("language") String language, + @JsonProperty("model_id") String modelId, @JsonProperty("accent") String accent) { + } + + /** + * Builder to construct {@link ElevenLabsVoicesApi} instance. + */ + public static class Builder { + + private String baseUrl = DEFAULT_BASE_URL; + + private ApiKey apiKey; + + private MultiValueMap headers = new LinkedMultiValueMap<>(); + + private RestClient.Builder restClientBuilder = RestClient.builder(); + + private ResponseErrorHandler responseErrorHandler = RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER; + + public Builder baseUrl(String baseUrl) { + Assert.hasText(baseUrl, "baseUrl cannot be null or empty"); + this.baseUrl = baseUrl; + return this; + } + + public Builder apiKey(ApiKey apiKey) { + Assert.notNull(apiKey, "apiKey cannot be null"); + this.apiKey = apiKey; + return this; + } + + public Builder apiKey(String simpleApiKey) { + Assert.notNull(simpleApiKey, "simpleApiKey cannot be null"); + this.apiKey = new SimpleApiKey(simpleApiKey); + return this; + } + + public Builder headers(MultiValueMap headers) { + Assert.notNull(headers, "headers cannot be null"); + this.headers = headers; + return this; + } + + public Builder restClientBuilder(RestClient.Builder restClientBuilder) { + Assert.notNull(restClientBuilder, "restClientBuilder cannot be null"); + this.restClientBuilder = restClientBuilder; + return this; + } + + public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler) { + Assert.notNull(responseErrorHandler, "responseErrorHandler cannot be null"); + this.responseErrorHandler = responseErrorHandler; + return this; + } + + public ElevenLabsVoicesApi build() { + Assert.notNull(this.apiKey, "apiKey must be set"); + return new ElevenLabsVoicesApi(this.baseUrl, this.apiKey, this.headers, this.restClientBuilder, + this.responseErrorHandler); + } + + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/DefaultTextToSpeechOptions.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/DefaultTextToSpeechOptions.java new file mode 100644 index 00000000000..96d55c841e2 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/DefaultTextToSpeechOptions.java @@ -0,0 +1,147 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.tts; + +import java.util.Objects; + +import com.fasterxml.jackson.annotation.JsonInclude; + +/** + * Default implementation of the {@link TextToSpeechOptions} interface. + * + * @author Alexandros Pappas + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class DefaultTextToSpeechOptions implements TextToSpeechOptions { + + private final String model; + + private final String voice; + + private final String format; + + private final Double speed; + + private DefaultTextToSpeechOptions(String model, String voice, String format, Double speed) { + this.model = model; + this.voice = voice; + this.format = format; + this.speed = speed; + } + + public static Builder builder() { + return new Builder(); + } + + @Override + public String getModel() { + return this.model; + } + + @Override + public String getVoice() { + return this.voice; + } + + @Override + public String getFormat() { + return this.format; + } + + @Override + public Double getSpeed() { + return this.speed; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof DefaultTextToSpeechOptions that)) + return false; + return Objects.equals(model, that.model) && Objects.equals(voice, that.voice) + && Objects.equals(format, that.format) && Objects.equals(speed, that.speed); + } + + @Override + public int hashCode() { + return Objects.hash(model, voice, format, speed); + } + + @Override + public String toString() { + return "DefaultTextToSpeechOptions{" + "model='" + model + '\'' + ", voice='" + voice + '\'' + ", format='" + + format + '\'' + ", speed=" + speed + '}'; + } + + @Override + @SuppressWarnings("unchecked") + public DefaultTextToSpeechOptions copy() { + return new Builder(this).build(); + } + + public static class Builder implements TextToSpeechOptions.Builder { + + private String model; + + private String voice; + + private String format; + + private Double speed; + + public Builder() { + } + + private Builder(DefaultTextToSpeechOptions options) { + this.model = options.model; + this.voice = options.voice; + this.format = options.format; + this.speed = options.speed; + } + + @Override + public Builder model(String model) { + this.model = model; + return this; + } + + @Override + public Builder voice(String voice) { + this.voice = voice; + return this; + } + + @Override + public Builder format(String format) { + this.format = format; + return this; + } + + @Override + public Builder speed(Double speed) { + this.speed = speed; + return this; + } + + public DefaultTextToSpeechOptions build() { + return new DefaultTextToSpeechOptions(this.model, this.voice, this.format, this.speed); + } + + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/Speech.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/Speech.java new file mode 100644 index 00000000000..2307b64d086 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/Speech.java @@ -0,0 +1,67 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.tts; + +import java.util.Arrays; +import java.util.Objects; + +import org.springframework.ai.model.ModelResult; +import org.springframework.ai.model.ResultMetadata; + +/** + * Implementation of the {@link ModelResult} interface for the speech model. + * + * @author Alexandros Pappas + */ +public class Speech implements ModelResult { + + private final byte[] speech; + + public Speech(byte[] speech) { + this.speech = speech; + } + + @Override + public byte[] getOutput() { + return this.speech; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof Speech speech1)) + return false; + return Arrays.equals(speech, speech1.speech); + } + + @Override + public int hashCode() { + return Objects.hash(Arrays.hashCode(speech)); + } + + @Override + public String toString() { + return "Speech{" + "speech=" + Arrays.toString(speech) + '}'; + } + + @Override + public ResultMetadata getMetadata() { + return null; + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/StreamingTextToSpeechModel.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/StreamingTextToSpeechModel.java new file mode 100644 index 00000000000..409ed788f1d --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/StreamingTextToSpeechModel.java @@ -0,0 +1,45 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.tts; + +import reactor.core.publisher.Flux; + +import org.springframework.ai.model.StreamingModel; + +/** + * Interface for the streaming text to speech model. + * + * @author Alexandros Pappas + */ +public interface StreamingTextToSpeechModel extends StreamingModel { + + default Flux stream(String text) { + TextToSpeechPrompt prompt = new TextToSpeechPrompt(text); + return stream(prompt).map(response -> (response.getResult() == null || response.getResult().getOutput() == null) + ? new byte[0] : response.getResult().getOutput()); + } + + default Flux stream(String text, TextToSpeechOptions options) { + TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options); + return stream(prompt).map(response -> (response.getResult() == null || response.getResult().getOutput() == null) + ? new byte[0] : response.getResult().getOutput()); + } + + @Override + Flux stream(TextToSpeechPrompt prompt); + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechMessage.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechMessage.java new file mode 100644 index 00000000000..88997c42ea5 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechMessage.java @@ -0,0 +1,58 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.tts; + +import java.util.Objects; + +/** + * Implementation of the {@link TextToSpeechMessage} interface for the text to speech + * message. + * + * @author Alexandros Pappas + */ +public class TextToSpeechMessage { + + private final String text; + + public TextToSpeechMessage(String text) { + this.text = text; + } + + public String getText() { + return text; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof TextToSpeechMessage that)) + return false; + return Objects.equals(text, that.text); + } + + @Override + public int hashCode() { + return Objects.hash(text); + } + + @Override + public String toString() { + return "TextToSpeechMessage{" + "text='" + text + '\'' + '}'; + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechModel.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechModel.java new file mode 100644 index 00000000000..6c6d6f327d6 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechModel.java @@ -0,0 +1,42 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.tts; + +import org.springframework.ai.model.Model; +import org.springframework.ai.model.ModelResult; + +/** + * Interface for the text to speech model. + * + * @author Alexandros Pappas + */ +public interface TextToSpeechModel extends Model { + + default byte[] call(String text) { + TextToSpeechPrompt prompt = new TextToSpeechPrompt(text); + ModelResult result = call(prompt).getResult(); + return (result != null) ? result.getOutput() : new byte[0]; + } + + @Override + TextToSpeechResponse call(TextToSpeechPrompt prompt); + + default TextToSpeechOptions getDefaultOptions() { + return TextToSpeechOptions.builder().build(); + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechOptions.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechOptions.java new file mode 100644 index 00000000000..d204a32212e --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechOptions.java @@ -0,0 +1,114 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.tts; + +import org.springframework.ai.model.ModelOptions; +import org.springframework.lang.Nullable; + +/** + * Interface for text-to-speech model options. Defines the common, portable options that + * should be supported by all implementations. + * + * @author Alexandros Pappas + */ +public interface TextToSpeechOptions extends ModelOptions { + + /** + * Creates a new {@link TextToSpeechOptions.Builder} to create the default + * {@link TextToSpeechOptions}. + * @return Returns a new {@link TextToSpeechOptions.Builder}. + */ + static TextToSpeechOptions.Builder builder() { + return new DefaultTextToSpeechOptions.Builder(); + } + + /** + * Returns the model to use for text-to-speech. + * @return The model name. + */ + @Nullable + String getModel(); + + /** + * Returns the voice to use for text-to-speech. + * @return The voice identifier. + */ + @Nullable + String getVoice(); + + /** + * Returns the output format for the generated audio. + * @return The output format (e.g., "mp3", "wav"). + */ + @Nullable + String getFormat(); + + /** + * Returns the speed of the generated speech. + * @return The speech speed. + */ + @Nullable + Double getSpeed(); + + /** + * Returns a copy of this {@link TextToSpeechOptions}. + * @return a copy of this {@link TextToSpeechOptions} + */ + T copy(); + + /** + * Builder for {@link TextToSpeechOptions}. + */ + interface Builder { + + /** + * Sets the model to use for text-to-speech. + * @param model The model name. + * @return This builder. + */ + Builder model(String model); + + /** + * Sets the voice to use for text-to-speech. + * @param voice The voice identifier. + * @return This builder. + */ + Builder voice(String voice); + + /** + * Sets the output format for the generated audio. + * @param format The output format (e.g., "mp3", "wav"). + * @return This builder. + */ + Builder format(String format); + + /** + * Sets the speed of the generated speech. + * @param speed The speech speed. + * @return This builder. + */ + Builder speed(Double speed); + + /** + * Builds the {@link TextToSpeechOptions}. + * @return The {@link TextToSpeechOptions}. + */ + TextToSpeechOptions build(); + + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechPrompt.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechPrompt.java new file mode 100644 index 00000000000..56b656e5457 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechPrompt.java @@ -0,0 +1,84 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.tts; + +import java.util.Objects; + +import org.springframework.ai.model.ModelRequest; + +/** + * Implementation of the {@link ModelRequest} interface for the text to speech prompt. + * + * @author Alexandros Pappas + */ +public class TextToSpeechPrompt implements ModelRequest { + + private final TextToSpeechMessage message; + + private TextToSpeechOptions options; + + public TextToSpeechPrompt(String text) { + this(new TextToSpeechMessage(text), TextToSpeechOptions.builder().build()); + } + + public TextToSpeechPrompt(String text, TextToSpeechOptions options) { + this(new TextToSpeechMessage(text), options); + } + + public TextToSpeechPrompt(TextToSpeechMessage message) { + this(message, TextToSpeechOptions.builder().build()); + } + + public TextToSpeechPrompt(TextToSpeechMessage message, TextToSpeechOptions options) { + this.message = message; + this.options = options; + } + + @Override + public TextToSpeechMessage getInstructions() { + return this.message; + } + + @Override + public TextToSpeechOptions getOptions() { + return this.options; + } + + public void setOptions(TextToSpeechOptions options) { + this.options = options; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof TextToSpeechPrompt that)) + return false; + return Objects.equals(message, that.message) && Objects.equals(options, that.options); + } + + @Override + public int hashCode() { + return Objects.hash(message, options); + } + + @Override + public String toString() { + return "TextToSpeechPrompt{" + "message=" + message + ", options=" + options + '}'; + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechResponse.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechResponse.java new file mode 100644 index 00000000000..3fd8f5c9dce --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechResponse.java @@ -0,0 +1,78 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.tts; + +import java.util.List; +import java.util.Objects; + +import org.springframework.ai.model.ModelResponse; +import org.springframework.ai.model.ResponseMetadata; + +/** + * Implementation of the {@link ModelResponse} interface for the text to speech response. + * + * @author Alexandros Pappas + */ +public class TextToSpeechResponse implements ModelResponse { + + private final List results; + + private final ResponseMetadata metadata; + + public TextToSpeechResponse(List results) { + this(results, null); + } + + public TextToSpeechResponse(List results, ResponseMetadata metadata) { + this.results = results; + this.metadata = metadata; + } + + @Override + public List getResults() { + return this.results; + } + + public Speech getResult() { + return this.results.get(0); + } + + @Override + public ResponseMetadata getMetadata() { + return this.metadata; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof TextToSpeechResponse that)) + return false; + return Objects.equals(results, that.results); + } + + @Override + public int hashCode() { + return Objects.hash(results); + } + + @Override + public String toString() { + return "TextToSpeechResponse{" + "results=" + results + '}'; + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/resources/META-INF/spring/aot.factories b/models/spring-ai-elevenlabs/src/main/resources/META-INF/spring/aot.factories new file mode 100644 index 00000000000..b2d77ead057 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/resources/META-INF/spring/aot.factories @@ -0,0 +1,2 @@ +org.springframework.aot.hint.RuntimeHintsRegistrar=\ + org.springframework.ai.elevenlabs.aot.ElevenLabsRuntimeHints \ No newline at end of file diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTestConfiguration.java b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTestConfiguration.java new file mode 100644 index 00000000000..e57b27dbfd2 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTestConfiguration.java @@ -0,0 +1,58 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs; + +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.ai.elevenlabs.api.ElevenLabsVoicesApi; +import org.springframework.ai.model.SimpleApiKey; +import org.springframework.boot.SpringBootConfiguration; +import org.springframework.context.annotation.Bean; +import org.springframework.util.StringUtils; + +/** + * Configuration class for the ElevenLabs API. + * + * @author Alexandros Pappas + */ +@SpringBootConfiguration +public class ElevenLabsTestConfiguration { + + @Bean + public ElevenLabsApi elevenLabsApi() { + return ElevenLabsApi.builder().apiKey(getApiKey()).build(); + } + + @Bean + public ElevenLabsVoicesApi elevenLabsVoicesApi() { + return ElevenLabsVoicesApi.builder().apiKey(getApiKey()).build(); + } + + private SimpleApiKey getApiKey() { + String apiKey = System.getenv("ELEVEN_LABS_API_KEY"); + if (!StringUtils.hasText(apiKey)) { + throw new IllegalArgumentException( + "You must provide an API key. Put it in an environment variable under the name ELEVEN_LABS_API_KEY"); + } + return new SimpleApiKey(apiKey); + } + + @Bean + public ElevenLabsTextToSpeechModel elevenLabsSpeechModel() { + return ElevenLabsTextToSpeechModel.builder().elevenLabsApi(elevenLabsApi()).build(); + } + +} diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModelIT.java b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModelIT.java new file mode 100644 index 00000000000..840d9b2b558 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModelIT.java @@ -0,0 +1,80 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import reactor.core.publisher.Flux; + +import org.springframework.ai.elevenlabs.tts.Speech; +import org.springframework.ai.elevenlabs.tts.TextToSpeechPrompt; +import org.springframework.ai.elevenlabs.tts.TextToSpeechResponse; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; + +/** + * Integration tests for the {@link ElevenLabsTextToSpeechModel}. + * + *

+ * These tests require a valid ElevenLabs API key to be set as an environment variable + * named {@code ELEVEN_LABS_API_KEY}. + * + * @author Alexandros Pappas + */ +@SpringBootTest(classes = ElevenLabsTestConfiguration.class) +@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".+") +public class ElevenLabsTextToSpeechModelIT { + + private static final String VOICE_ID = "9BWtsMINqrJLrRacOk9x"; + + @Autowired + private ElevenLabsTextToSpeechModel textToSpeechModel; + + @Test + void textToSpeechWithVoiceTest() { + ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder().voice(VOICE_ID).build(); + TextToSpeechPrompt prompt = new TextToSpeechPrompt("Hello, world!", options); + TextToSpeechResponse response = textToSpeechModel.call(prompt); + + assertThat(response).isNotNull(); + List results = response.getResults(); + assertThat(results).hasSize(1); + Speech speech = results.get(0); + assertThat(speech.getOutput()).isNotEmpty(); + } + + @Test + void textToSpeechStreamWithVoiceTest() { + ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder().voice(VOICE_ID).build(); + TextToSpeechPrompt prompt = new TextToSpeechPrompt( + "Hello, world! This is a test of streaming speech synthesis.", options); + Flux responseFlux = textToSpeechModel.stream(prompt); + + List responses = responseFlux.collectList().block(); + assertThat(responses).isNotNull().isNotEmpty(); + + responses.forEach(response -> { + assertThat(response).isNotNull(); + assertThat(response.getResults()).hasSize(1); + assertThat(response.getResults().get(0).getOutput()).isNotEmpty(); + }); + } + +} diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptionsTests.java b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptionsTests.java new file mode 100644 index 00000000000..9ed39b8f602 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptionsTests.java @@ -0,0 +1,231 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import org.junit.jupiter.api.Test; + +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; + +/** + * Tests for the {@link ElevenLabsTextToSpeechOptions}. + * + *

+ * These tests require a valid ElevenLabs API key to be set as an environment variable + * named {@code ELEVEN_LABS_API_KEY}. + * + * @author Alexandros Pappas + */ +public class ElevenLabsTextToSpeechOptionsTests { + + @Test + public void testBuilderWithAllFields() { + ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder() + .modelId("test-model") + .voice("test-voice") + .voiceId("test-voice-id") // Test both voice and voiceId + .format("mp3_44100_128") + .outputFormat("mp3_44100_128") + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.8, 0.9, true, 1.2)) + .languageCode("en") + .pronunciationDictionaryLocators( + List.of(new ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator("dict1", "v1"))) + .seed(12345) + .previousText("previous") + .nextText("next") + .previousRequestIds(List.of("req1", "req2")) + .nextRequestIds(List.of("req3", "req4")) + .usePvcAsIvc(true) + .applyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON) + .build(); + + assertThat(options.getModelId()).isEqualTo("test-model"); + assertThat(options.getVoice()).isEqualTo("test-voice-id"); + assertThat(options.getVoiceId()).isEqualTo("test-voice-id"); + assertThat(options.getFormat()).isEqualTo("mp3_44100_128"); + assertThat(options.getOutputFormat()).isEqualTo("mp3_44100_128"); + assertThat(options.getVoiceSettings()).isNotNull(); + assertThat(options.getVoiceSettings().stability()).isEqualTo(0.5); + assertThat(options.getVoiceSettings().similarityBoost()).isEqualTo(0.8); + assertThat(options.getVoiceSettings().style()).isEqualTo(0.9); + assertThat(options.getVoiceSettings().useSpeakerBoost()).isTrue(); + assertThat(options.getSpeed()).isEqualTo(1.2); // Check via getter + assertThat(options.getLanguageCode()).isEqualTo("en"); + assertThat(options.getPronunciationDictionaryLocators()).hasSize(1); + assertThat(options.getPronunciationDictionaryLocators().get(0).pronunciationDictionaryId()).isEqualTo("dict1"); + assertThat(options.getPronunciationDictionaryLocators().get(0).versionId()).isEqualTo("v1"); + assertThat(options.getSeed()).isEqualTo(12345); + assertThat(options.getPreviousText()).isEqualTo("previous"); + assertThat(options.getNextText()).isEqualTo("next"); + assertThat(options.getPreviousRequestIds()).containsExactly("req1", "req2"); + assertThat(options.getNextRequestIds()).containsExactly("req3", "req4"); + assertThat(options.getUsePvcAsIvc()).isTrue(); + assertThat(options.getApplyTextNormalization()).isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON); + } + + @Test + public void testCopy() { + ElevenLabsTextToSpeechOptions original = ElevenLabsTextToSpeechOptions.builder() + .modelId("test-model") + .voice("test-voice") + .format("mp3_44100_128") + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.8, null, null, null)) + .build(); + + ElevenLabsTextToSpeechOptions copied = original.copy(); + + assertThat(copied).isNotSameAs(original).isEqualTo(original); + + copied = ElevenLabsTextToSpeechOptions.builder().modelId("new-model").build(); + assertThat(original.getModelId()).isEqualTo("test-model"); + assertThat(copied.getModelId()).isEqualTo("new-model"); + } + + @Test + public void testSetters() { + ElevenLabsTextToSpeechOptions options = new ElevenLabsTextToSpeechOptions(); + options.setModelId("test-model"); + options.setVoice("test-voice"); + options.setVoiceId("test-voice-id"); + options.setOutputFormat("mp3_44100_128"); + options.setFormat("mp3_44100_128"); + options.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.8, null, null, null)); + options.setLanguageCode("en"); + options.setPronunciationDictionaryLocators( + List.of(new ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator("dict1", "v1"))); + options.setSeed(12345); + options.setPreviousText("previous"); + options.setNextText("next"); + options.setPreviousRequestIds(List.of("req1", "req2")); + options.setNextRequestIds(List.of("req3", "req4")); + options.setUsePvcAsIvc(true); + options.setApplyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON); + + assertThat(options.getModelId()).isEqualTo("test-model"); + assertThat(options.getVoice()).isEqualTo("test-voice-id"); + assertThat(options.getVoiceId()).isEqualTo("test-voice-id"); + assertThat(options.getFormat()).isEqualTo("mp3_44100_128"); + assertThat(options.getOutputFormat()).isEqualTo("mp3_44100_128"); + assertThat(options.getVoiceSettings()).isNotNull(); + assertThat(options.getVoiceSettings().stability()).isEqualTo(0.5); + assertThat(options.getVoiceSettings().similarityBoost()).isEqualTo(0.8); + assertThat(options.getLanguageCode()).isEqualTo("en"); + assertThat(options.getPronunciationDictionaryLocators()).hasSize(1); + assertThat(options.getPronunciationDictionaryLocators().get(0).pronunciationDictionaryId()).isEqualTo("dict1"); + assertThat(options.getPronunciationDictionaryLocators().get(0).versionId()).isEqualTo("v1"); + assertThat(options.getSeed()).isEqualTo(12345); + assertThat(options.getPreviousText()).isEqualTo("previous"); + assertThat(options.getNextText()).isEqualTo("next"); + assertThat(options.getPreviousRequestIds()).containsExactly("req1", "req2"); + assertThat(options.getNextRequestIds()).containsExactly("req3", "req4"); + assertThat(options.getUsePvcAsIvc()).isTrue(); + assertThat(options.getApplyTextNormalization()).isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON); + } + + @Test + public void testDefaultValues() { + ElevenLabsTextToSpeechOptions options = new ElevenLabsTextToSpeechOptions(); + assertThat(options.getModelId()).isNull(); + assertThat(options.getVoice()).isNull(); + assertThat(options.getVoiceId()).isNull(); + assertThat(options.getFormat()).isNull(); + assertThat(options.getOutputFormat()).isNull(); + assertThat(options.getSpeed()).isNull(); + assertThat(options.getVoiceSettings()).isNull(); + assertThat(options.getLanguageCode()).isNull(); + assertThat(options.getPronunciationDictionaryLocators()).isNull(); + assertThat(options.getSeed()).isNull(); + assertThat(options.getPreviousText()).isNull(); + assertThat(options.getNextText()).isNull(); + assertThat(options.getPreviousRequestIds()).isNull(); + assertThat(options.getNextRequestIds()).isNull(); + assertThat(options.getUsePvcAsIvc()).isNull(); + assertThat(options.getApplyTextNormalization()).isNull(); + } + + @Test + public void testSetSpeed() { + // 1. Setting speed via voiceSettings, no existing voiceSettings + ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder() + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(null, null, null, null, 1.5)) + .build(); + assertThat(options.getSpeed()).isEqualTo(1.5); + assertThat(options.getVoiceSettings()).isNotNull(); + assertThat(options.getVoiceSettings().speed()).isEqualTo(1.5); + + // 2. Setting speed via voiceSettings, existing voiceSettings + ElevenLabsTextToSpeechOptions options2 = ElevenLabsTextToSpeechOptions.builder() + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, null)) + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 2.0)) // Overwrite + .build(); + assertThat(options2.getSpeed()).isEqualTo(2.0f); + assertThat(options2.getVoiceSettings().speed()).isEqualTo(2.0f); + assertThat(options2.getVoiceSettings().stability()).isEqualTo(0.1); + + // 3. Setting voiceSettings with null speed, existing voiceSettings + ElevenLabsTextToSpeechOptions options3 = ElevenLabsTextToSpeechOptions.builder() + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 2.0)) + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, null)) // Overwrite + .build(); + assertThat(options3.getSpeed()).isNull(); + assertThat(options3.getVoiceSettings().speed()).isNull(); + assertThat(options3.getVoiceSettings().stability()).isEqualTo(0.1); + + // 4. Setting voiceSettings to null, no existing voiceSettings (shouldn't create + // voiceSettings) + ElevenLabsTextToSpeechOptions options4 = ElevenLabsTextToSpeechOptions.builder().build(); + assertThat(options4.getSpeed()).isNull(); + assertThat(options4.getVoiceSettings()).isNull(); + + // 5. Setting voiceSettings directly, with speed. + ElevenLabsTextToSpeechOptions options5 = ElevenLabsTextToSpeechOptions.builder() + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 2.5)) + .build(); + assertThat(options5.getSpeed()).isEqualTo(2.5f); + assertThat(options5.getVoiceSettings().speed()).isEqualTo(2.5f); + + // 6. Setting voiceSettings directly, without speed (speed should be null). + ElevenLabsTextToSpeechOptions options6 = ElevenLabsTextToSpeechOptions.builder() + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, null)) + .build(); + assertThat(options6.getSpeed()).isNull(); + assertThat(options6.getVoiceSettings().speed()).isNull(); + + // 7. Setting voiceSettings to null, after previously setting it. + ElevenLabsTextToSpeechOptions options7 = ElevenLabsTextToSpeechOptions.builder() + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 1.5)) + .voiceSettings(null) + .build(); + assertThat(options7.getSpeed()).isNull(); + assertThat(options7.getVoiceSettings()).isNull(); + + // 8. Setting speed via setSpeed method + ElevenLabsTextToSpeechOptions options8 = ElevenLabsTextToSpeechOptions.builder().build(); + options8.setSpeed(3.0); + assertThat(options8.getSpeed()).isEqualTo(3.0); + assertThat(options8.getVoiceSettings()).isNotNull(); + assertThat(options8.getVoiceSettings().speed()).isEqualTo(3.0); + + // 9. Setting speed to null via setSpeed method + options8.setSpeed(null); + assertThat(options8.getSpeed()).isNull(); + assertThat(options8.getVoiceSettings().speed()).isNull(); + } + +} diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsApiIT.java b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsApiIT.java new file mode 100644 index 00000000000..e2637915663 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsApiIT.java @@ -0,0 +1,220 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.api; + +import java.util.concurrent.atomic.AtomicInteger; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import reactor.core.publisher.Flux; +import reactor.test.StepVerifier; + +import org.springframework.ai.elevenlabs.ElevenLabsTestConfiguration; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.http.ResponseEntity; +import org.springframework.util.LinkedMultiValueMap; +import org.springframework.util.MultiValueMap; + +/** + * Integration tests for the {@link ElevenLabsApi}. + * + *

+ * These tests require a valid ElevenLabs API key to be set as an environment variable + * named {@code ELEVEN_LABS_API_KEY}. + * + * @author Alexandros Pappas + */ +@SpringBootTest(classes = ElevenLabsTestConfiguration.class) +@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".+") +public class ElevenLabsApiIT { + + @Autowired + private ElevenLabsApi elevenLabsApi; + + @Test + public void testTextToSpeech() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("Hello, world!") + .modelId("eleven_monolingual_v1") + .build(); + + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + ResponseEntity response = elevenLabsApi.textToSpeech(request, validVoiceId, null); + + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull().isNotEmpty(); + } + + @Test + public void testTextToSpeechWithVoiceSettings() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("Hello, with Voice settings!") + .modelId("eleven_monolingual_v1") + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.7, 0.0, true, 1.0)) + .build(); + + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + ResponseEntity response = elevenLabsApi.textToSpeech(request, validVoiceId, null); + + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull().isNotEmpty(); + } + + @Test + public void testTextToSpeechWithQueryParams() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("Hello, testing query params!") + .modelId("eleven_monolingual_v1") + .build(); + + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + MultiValueMap queryParams = new LinkedMultiValueMap<>(); + queryParams.add("optimize_streaming_latency", "2"); + queryParams.add("output_format", ElevenLabsApi.OutputFormat.MP3_22050_32.getValue()); + + ResponseEntity response = elevenLabsApi.textToSpeech(request, validVoiceId, queryParams); + + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull().isNotEmpty(); + } + + @Test + public void testTextToSpeechVoiceIdNull() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("This should fail.") + .modelId("eleven_monolingual_v1") + .build(); + + Exception exception = assertThrows(IllegalArgumentException.class, + () -> elevenLabsApi.textToSpeech(request, null, null)); + assertThat(exception.getMessage()).isEqualTo("voiceId must be provided. It cannot be null."); + } + + @Test + public void testTextToSpeechTextEmpty() { + Exception exception = assertThrows(IllegalArgumentException.class, + () -> ElevenLabsApi.SpeechRequest.builder().text("").modelId("eleven_monolingual_v1").build()); + assertThat(exception.getMessage()).isEqualTo("text must not be empty"); + } + + // Streaming API tests + + @Test + public void testTextToSpeechStream() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("This is a longer text to ensure multiple chunks are received through the streaming API.") + .modelId("eleven_monolingual_v1") + .build(); + + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + Flux> responseFlux = elevenLabsApi.textToSpeechStream(request, validVoiceId, null); + + // Track the number of chunks received + AtomicInteger chunkCount = new AtomicInteger(0); + + StepVerifier.create(responseFlux).thenConsumeWhile(response -> { + // Verify each chunk's response properties + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull().isNotEmpty(); + // Count this chunk + chunkCount.incrementAndGet(); + return true; + }).verifyComplete(); + + // Verify we received at least one chunk + assertThat(chunkCount.get()).isPositive(); + } + + @Test + public void testTextToSpeechStreamWithVoiceSettings() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("Hello, with Voice settings in streaming mode!") + .modelId("eleven_monolingual_v1") + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.7, null, null, null)) + .build(); + + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + Flux> responseFlux = elevenLabsApi.textToSpeechStream(request, validVoiceId, null); + + StepVerifier.create(responseFlux).thenConsumeWhile(response -> { + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull().isNotEmpty(); + return true; + }).verifyComplete(); + } + + @Test + public void testTextToSpeechStreamWithQueryParams() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("Hello, testing streaming with query params!") + .modelId("eleven_monolingual_v1") + .build(); + + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + MultiValueMap queryParams = new LinkedMultiValueMap<>(); + queryParams.add("optimize_streaming_latency", "2"); + queryParams.add("output_format", "mp3_44100_128"); + + Flux> responseFlux = elevenLabsApi.textToSpeechStream(request, validVoiceId, + queryParams); + + StepVerifier.create(responseFlux).thenConsumeWhile(response -> { + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull().isNotEmpty(); + return true; + }).verifyComplete(); + } + + @Test + public void testTextToSpeechStreamVoiceIdNull() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("This should fail.") + .modelId("eleven_monolingual_v1") + .build(); + + Exception exception = assertThrows(IllegalArgumentException.class, + () -> elevenLabsApi.textToSpeechStream(request, null, null)); + assertThat(exception.getMessage()).isEqualTo("voiceId must be provided for streaming. It cannot be null."); + } + + @Test + public void testTextToSpeechStreamRequestBodyNull() { + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + + Exception exception = assertThrows(IllegalArgumentException.class, + () -> elevenLabsApi.textToSpeechStream(null, validVoiceId, null)); + assertThat(exception.getMessage()).isEqualTo("requestBody can not be null."); + } + + @Test + public void testTextToSpeechStreamTextEmpty() { + Exception exception = assertThrows(IllegalArgumentException.class, () -> { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("") + .modelId("eleven_monolingual_v1") + .build(); + + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + elevenLabsApi.textToSpeechStream(request, validVoiceId, null); + }); + assertThat(exception.getMessage()).isEqualTo("text must not be empty"); + } + +} diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsVoicesApiIT.java b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsVoicesApiIT.java new file mode 100644 index 00000000000..e27d8d5047a --- /dev/null +++ b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsVoicesApiIT.java @@ -0,0 +1,112 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.api; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import org.springframework.ai.elevenlabs.ElevenLabsTestConfiguration; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.http.ResponseEntity; + +/** + * Integration tests for the {@link ElevenLabsVoicesApi}. + * + *

+ * These tests require a valid ElevenLabs API key to be set as an environment variable + * named {@code ELEVEN_LABS_API_KEY}. + * + * @author Alexandros Pappas + */ +@SpringBootTest(classes = ElevenLabsTestConfiguration.class) +@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".+") +public class ElevenLabsVoicesApiIT { + + @Autowired + private ElevenLabsVoicesApi voicesApi; + + @Test + void getVoices() { + ResponseEntity response = voicesApi.getVoices(); + System.out.println("Response: " + response); + + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull(); + ElevenLabsVoicesApi.Voices voicesResponse = response.getBody(); + + List voices = voicesResponse.voices(); + assertThat(voices).isNotNull().isNotEmpty(); + + for (ElevenLabsVoicesApi.Voice voice : voices) { + assertThat(voice.voiceId()).isNotBlank(); + } + } + + @Test + void getDefaultVoiceSettings() { + ResponseEntity response = voicesApi.getDefaultVoiceSettings(); + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull(); + + ElevenLabsVoicesApi.VoiceSettings settings = response.getBody(); + assertThat(settings.stability()).isNotNull(); + assertThat(settings.similarityBoost()).isNotNull(); + assertThat(settings.style()).isNotNull(); + assertThat(settings.useSpeakerBoost()).isNotNull(); + } + + @Test + void getVoiceSettings() { + ResponseEntity voicesResponse = voicesApi.getVoices(); + assertThat(voicesResponse.getStatusCode().is2xxSuccessful()).isTrue(); + List voices = voicesResponse.getBody().voices(); + assertThat(voices).isNotEmpty(); + String voiceId = voices.get(0).voiceId(); + + ResponseEntity settingsResponse = voicesApi.getVoiceSettings(voiceId); + assertThat(settingsResponse.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(settingsResponse.getBody()).isNotNull(); + + ElevenLabsVoicesApi.VoiceSettings settings = settingsResponse.getBody(); + assertThat(settings.stability()).isNotNull(); + assertThat(settings.similarityBoost()).isNotNull(); + assertThat(settings.style()).isNotNull(); + assertThat(settings.useSpeakerBoost()).isNotNull(); + } + + @Test + void getVoice() { + ResponseEntity voicesResponse = voicesApi.getVoices(); + assertThat(voicesResponse.getStatusCode().is2xxSuccessful()).isTrue(); + List voices = voicesResponse.getBody().voices(); + assertThat(voices).isNotEmpty(); + String voiceId = voices.get(0).voiceId(); + + ResponseEntity voiceResponse = voicesApi.getVoice(voiceId); + assertThat(voiceResponse.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(voiceResponse.getBody()).isNotNull(); + + ElevenLabsVoicesApi.Voice voice = voiceResponse.getBody(); + assertThat(voice.voiceId()).isEqualTo(voiceId); + assertThat(voice.name()).isNotBlank(); + } + +} diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/tts/DefaultTextToSpeechOptionsTests.java b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/tts/DefaultTextToSpeechOptionsTests.java new file mode 100644 index 00000000000..bc6ff9b81e3 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/tts/DefaultTextToSpeechOptionsTests.java @@ -0,0 +1,67 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.tts; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.within; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for {@link DefaultTextToSpeechOptions}. + * + * @author Alexandros Pappas + */ +class DefaultTextToSpeechOptionsTests { + + @Test + void testBuilderWithAllFields() { + TextToSpeechOptions options = DefaultTextToSpeechOptions.builder() + .model("test-model") + .voice("test-voice") + .format("test-format") + .speed(0.8) + .build(); + + assertThat(options.getModel()).isEqualTo("test-model"); + assertThat(options.getVoice()).isEqualTo("test-voice"); + assertThat(options.getFormat()).isEqualTo("test-format"); + assertThat(options.getSpeed()).isCloseTo(0.8, within(0.0001)); + } + + @Test + void testCopy() { + TextToSpeechOptions original = DefaultTextToSpeechOptions.builder() + .model("test-model") + .voice("test-voice") + .format("test-format") + .speed(0.8) + .build(); + + DefaultTextToSpeechOptions copied = original.copy(); + assertThat(copied).isNotSameAs(original).isEqualTo(original); + } + + @Test + void testDefaultValues() { + DefaultTextToSpeechOptions options = DefaultTextToSpeechOptions.builder().build(); + assertThat(options.getModel()).isNull(); + assertThat(options.getVoice()).isNull(); + assertThat(options.getFormat()).isNull(); + assertThat(options.getSpeed()).isNull(); + } + +} diff --git a/models/spring-ai-elevenlabs/src/test/resources/voices.json b/models/spring-ai-elevenlabs/src/test/resources/voices.json new file mode 100644 index 00000000000..da6b3ffcb97 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/test/resources/voices.json @@ -0,0 +1,1482 @@ +{ + "voices": [ + { + "voice_id": "9BWtsMINqrJLrRacOk9x", + "name": "Aria", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_multilingual_v2": "fine_tuned", + "eleven_turbo_v2_5": "fine_tuned", + "eleven_flash_v2_5": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_flash_v2": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "expressive", + "age": "middle-aged", + "gender": "female", + "use_case": "social media" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/9BWtsMINqrJLrRacOk9x/405766b8-1f4e-4d3c-aba1-6f25333823ec.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "CwhRBWXzGAHq8TQ4Fs17", + "name": "Roger", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_multilingual_v2": "fine_tuned", + "eleven_turbo_v2_5": "failed", + "eleven_flash_v2_5": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_flash_v2": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "confident", + "age": "middle-aged", + "gender": "male", + "use_case": "social media" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/CwhRBWXzGAHq8TQ4Fs17/58ee3ff5-f6f2-4628-93b8-e38eb31806b0.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "EXAVITQu4vr4xnSDxMaL", + "name": "Sarah", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": {}, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": {}, + "message": {}, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "american", + "description": "soft", + "age": "young", + "gender": "female", + "use_case": "news" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/EXAVITQu4vr4xnSDxMaL/01a3e33c-6e99-4ee7-8543-ff2216a32186.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_turbo_v2", + "eleven_multilingual_v2", + "eleven_turbo_v2_5" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "FGY2WhTYpPnrIDTdsKH5", + "name": "Laura", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_multilingual_v2": "fine_tuned", + "eleven_turbo_v2_5": "fine_tuned", + "eleven_flash_v2_5": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_flash_v2": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "upbeat", + "age": "young", + "gender": "female", + "use_case": "social media" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/FGY2WhTYpPnrIDTdsKH5/67341759-ad08-41a5-be6e-de12fe448618.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "IKne3meq5aSn9XLyUdCD", + "name": "Charlie", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "Australian", + "description": "natural", + "age": "middle aged", + "gender": "male", + "use_case": "conversational" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/IKne3meq5aSn9XLyUdCD/102de6f2-22ed-43e0-a1f1-111fa75c5481.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_multilingual_v1", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "JBFqnCBsd6RMkjVDRZzb", + "name": "George", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_turbo_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_v2_flash": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_turbo_v2": "", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "British", + "description": "warm", + "age": "middle aged", + "gender": "male", + "use_case": "narration" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/JBFqnCBsd6RMkjVDRZzb/e6206d1a-0721-4787-aafb-06a6e705cac5.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "N2lVS1w4EtoT3dr4eOWO", + "name": "Callum", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "Transatlantic", + "description": "intense", + "age": "middle-aged", + "gender": "male", + "use_case": "characters" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/N2lVS1w4EtoT3dr4eOWO/ac833bd8-ffda-4938-9ebc-b0f99ca25481.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_multilingual_v1", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "SAz9YHcvj6GT2YYXdXww", + "name": "River", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_multilingual_v2": "fine_tuned", + "eleven_turbo_v2_5": "fine_tuned", + "eleven_flash_v2_5": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned", + "eleven_multilingual_sts_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_turbo_v2": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_flash_v2": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "confident", + "age": "middle-aged", + "gender": "non-binary", + "use_case": "social media" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/SAz9YHcvj6GT2YYXdXww/e6c95f0b-2227-491a-b3d7-2249240decb7.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_sts_v2", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "TX3LPaxmHKxFdv7VOQHJ", + "name": "Liam", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_turbo_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_v2_flash": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_turbo_v2": "", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "articulate", + "age": "young", + "gender": "male", + "use_case": "narration" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/TX3LPaxmHKxFdv7VOQHJ/63148076-6363-42db-aea8-31424308b92c.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_multilingual_v1", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "XB0fDUnXU5powFXDhCwa", + "name": "Charlotte", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_multilingual_v2": "", + "eleven_turbo_v2_5": "", + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "Swedish", + "description": "seductive", + "age": "young", + "gender": "female", + "use_case": "characters" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/XB0fDUnXU5powFXDhCwa/942356dc-f10d-4d89-bda5-4f8505ee038b.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_multilingual_v1", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "Xb7hH8MSUJpSbSDYk0k2", + "name": "Alice", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "British", + "description": "confident", + "age": "middle-aged", + "gender": "female", + "use_case": "news" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/Xb7hH8MSUJpSbSDYk0k2/d10f7534-11f6-41fe-a012-2de1e482d336.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "XrExE9yKIg1WjnnlVkGX", + "name": "Matilda", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_turbo_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_v2_flash": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_turbo_v2": "", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "friendly", + "age": "middle-aged", + "gender": "female", + "use_case": "narration" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/XrExE9yKIg1WjnnlVkGX/b930e18d-6b4d-466e-bab2-0ae97c6d8535.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_multilingual_v1", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "bIHbv24MWmeRgasZH58o", + "name": "Will", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_multilingual_v2": "fine_tuned", + "eleven_turbo_v2_5": "fine_tuned", + "eleven_flash_v2_5": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_flash_v2": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "friendly", + "age": "young", + "gender": "male", + "use_case": "social media" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/bIHbv24MWmeRgasZH58o/8caf8f3d-ad29-4980-af41-53f20c72d7a4.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "cgSgspJ2msm6clMCkdW9", + "name": "Jessica", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_multilingual_v2": "fine_tuned", + "eleven_turbo_v2_5": "fine_tuned", + "eleven_flash_v2_5": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_flash_v2": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "expressive", + "age": "young", + "gender": "female", + "use_case": "conversational" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/cgSgspJ2msm6clMCkdW9/56a97bf8-b69b-448f-846c-c3a11683d45a.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "cjVigY5qzO86Huf0OWal", + "name": "Eric", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_multilingual_v2": "fine_tuned", + "eleven_turbo_v2_5": "fine_tuned", + "eleven_flash_v2_5": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_flash_v2": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "friendly", + "age": "middle-aged", + "gender": "male", + "use_case": "conversational" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/cjVigY5qzO86Huf0OWal/d098fda0-6456-4030-b3d8-63aa048c9070.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "iP95p4xoKVk53GoZ742B", + "name": "Chris", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "casual", + "age": "middle-aged", + "gender": "male", + "use_case": "conversational" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/iP95p4xoKVk53GoZ742B/3f4bde72-cc48-40dd-829f-57fbf906f4d7.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "nPczCjzI2devNBz1zQrb", + "name": "Brian", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "deep", + "age": "middle-aged", + "gender": "male", + "use_case": "narration" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/nPczCjzI2devNBz1zQrb/2dd3e72c-4fd3-42f1-93ea-abc5d4e5aa1d.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "onwK4e9ZLuTAKqWW03F9", + "name": "Daniel", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "British", + "description": "authoritative", + "age": "middle-aged", + "gender": "male", + "use_case": "news" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/onwK4e9ZLuTAKqWW03F9/7eee0236-1a72-4b86-b303-5dcadc007ba9.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_multilingual_v1", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "pFZP5JQG7iQjIQuC4Bku", + "name": "Lily", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "British", + "description": "warm", + "age": "middle-aged", + "gender": "female", + "use_case": "narration" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/pFZP5JQG7iQjIQuC4Bku/89b68b35-b3dd-4348-a84a-a3c13a3c2b30.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "pqHfZKP75CvOlQylNhV4", + "name": "Bill", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "trustworthy", + "age": "old", + "gender": "male", + "use_case": "narration" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/pqHfZKP75CvOlQylNhV4/d782b3ff-84ba-4029-848c-acf01285524d.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + } + ] +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index b403eb5ef62..8762799dd10 100644 --- a/pom.xml +++ b/pom.xml @@ -162,6 +162,7 @@ models/spring-ai-azure-openai models/spring-ai-bedrock models/spring-ai-bedrock-converse + models/spring-ai-elevenlabs models/spring-ai-huggingface models/spring-ai-minimax models/spring-ai-mistral-ai @@ -180,6 +181,7 @@ spring-ai-spring-boot-starters/spring-ai-starter-model-azure-openai spring-ai-spring-boot-starters/spring-ai-starter-model-bedrock spring-ai-spring-boot-starters/spring-ai-starter-model-bedrock-converse + spring-ai-spring-boot-starters/spring-ai-starter-elevenlabs spring-ai-spring-boot-starters/spring-ai-starter-model-huggingface spring-ai-spring-boot-starters/spring-ai-starter-model-minimax spring-ai-spring-boot-starters/spring-ai-starter-model-mistral-ai @@ -711,7 +713,8 @@ org.springframework.ai.anthropic/**/*IT.java org.springframework.ai.azure.openai/**/*IT.java org.springframework.ai.bedrock/**/*IT.java - org.springframework.ai.bedrock.converse/**/*IT.java + org.springframework.ai.bedrock.converse/**/*IT.java + org.springframework.ai.elevenlabs/**/*IT.java org.springframework.ai.huggingface/**/*IT.java org.springframework.ai.minimax/**/*IT.java org.springframework.ai.mistralai/**/*IT.java @@ -759,6 +762,7 @@ org.springframework.ai.autoconfigure.huggingface/**/**IT.java org.springframework.ai.autoconfigure.chat/**/**IT.java + org.springframework.ai.autoconfigure.elevenlabs/**/**IT.java org.springframework.ai.autoconfigure.embedding/**/**IT.java org.springframework.ai.autoconfigure.image/**/**IT.java diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech.adoc index adabcd80c04..52de29ff2a2 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech.adoc @@ -1,5 +1,9 @@ [[Speech]] = Text-To-Speech (TTS) API -Spring AI provides support for OpenAI's Speech API. -When additional providers for Speech are implemented, a common `SpeechModel` and `StreamingSpeechModel` interface will be extracted. \ No newline at end of file +Spring AI provides support for the following Text-To-Speech (TTS) providers: + +- xref:api/audio/speech/openai-speech.adoc[OpenAI's Speech API] +- xref:api/audio/speech/elevenlabs-speech.adoc[Eleven Labs Text-To-Speech API] + +Future enhancements may introduce additional providers, at which point a common `TextToSpeechModel` and `StreamingTextToSpeechModel` interface will be extracted. \ No newline at end of file diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc new file mode 100644 index 00000000000..ca499c0543c --- /dev/null +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc @@ -0,0 +1,241 @@ += ElevenLabs Text-to-Speech (TTS) + +== Introduction + +ElevenLabs provides natural-sounding speech synthesis software using deep learning. Its AI audio models generate realistic, versatile, and contextually-aware speech, voices, and sound effects across 32 languages. The ElevenLabs Text-to-Speech API enables users to bring any book, article, PDF, newsletter, or text to life with ultra-realistic AI narration. + +== Prerequisites + +. Create an ElevenLabs account and obtain an API key. You can sign up at the https://elevenlabs.io/sign-up[ElevenLabs signup page]. Your API key can be found on your profile page after logging in. +. Add the `spring-ai-elevenlabs` dependency to your project's build file. For more information, refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section. + +== Auto-configuration + +Spring AI provides Spring Boot auto-configuration for the ElevenLabs Text-to-Speech Client. +To enable it, add the following dependency to your project's Maven `pom.xml` file: + +[source,xml] +---- + + org.springframework.ai + spring-ai-elevenlabs-spring-boot-starter + +---- + +or to your Gradle `build.gradle` build file: + +[source,groovy] +---- +dependencies { + implementation 'org.springframework.ai:spring-ai-elevenlabs-spring-boot-starter' +} +---- + +TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file. + +== Speech Properties + +=== Connection Properties + +The prefix `spring.ai.elevenlabs` is used as the property prefix for *all* ElevenLabs related configurations (both connection and TTS specific settings). This is defined in `ElevenLabsConnectionProperties`. + +[cols="3,5,1"] +|==== +| Property | Description | Default +| spring.ai.elevenlabs.base-url | The base URL for the ElevenLabs API. | https://api.elevenlabs.io +| spring.ai.elevenlabs.api-key | Your ElevenLabs API key. | - +|==== + +=== Configuration Properties + +The prefix `spring.ai.elevenlabs.tts` is used as the property prefix to configure the ElevenLabs Text-to-Speech client, specifically. This is defined in `ElevenLabsSpeechProperties`. + +[cols="3,5,2"] +|==== +| Property | Description | Default + +| spring.ai.elevenlabs.tts.options.model-id | The ID of the model to use. | eleven_monolingual_v1 +| spring.ai.elevenlabs.tts.options.voice-id | The ID of the voice to use. This is the *voice ID*, not the voice name. | 9BWtsMINqrJLrRacOk9x +| spring.ai.elevenlabs.tts.options.output-format | The output format for the generated audio. See xref:elevenlabs-tts.adoc#output-formats[Output Formats] below. | mp3_22050_32 +| spring.ai.elevenlabs.tts.enabled | Enable or disable the ElevenLabs Text-to-Speech client. | true +|==== + +NOTE: The base URL and API key can also be configured *specifically* for TTS using `spring.ai.elevenlabs.tts.base-url` and `spring.ai.elevenlabs.tts.api-key`. However, it is generally recommended to use the global `spring.ai.elevenlabs` prefix for simplicity, unless you have a specific reason to use different credentials for different ElevenLabs services. The more specific `tts` properties will override the global ones. + +TIP: All properties prefixed with `spring.ai.elevenlabs.tts.options` can be overridden at runtime. + +[[output-formats]] +.Available Output Formats +[cols="1,1"] +|==== +| Enum Value | Description +| MP3_22050_32 | MP3, 22.05 kHz, 32 kbps +| MP3_44100_32 | MP3, 44.1 kHz, 32 kbps +| MP3_44100_64 | MP3, 44.1 kHz, 64 kbps +| MP3_44100_96 | MP3, 44.1 kHz, 96 kbps +| MP3_44100_128 | MP3, 44.1 kHz, 128 kbps +| MP3_44100_192 | MP3, 44.1 kHz, 192 kbps +| PCM_16000 | PCM, 16 kHz +| PCM_22050 | PCM, 22.05 kHz +| PCM_24000 | PCM, 24 kHz +| PCM_44100 | PCM, 44 kHz +| ULAW_8000 | µ-law, 8 kHz +|==== + + +== Runtime Options [[speech-options]] + +The `ElevenLabsSpeechOptions` class provides options to use when making a text-to-speech request. On start-up, the options specified by `spring.ai.elevenlabs.tts` are used, but you can override these at runtime. The following options are available: + +* `modelId`: The ID of the model to use. +* `voiceId`: The ID of the voice to use. +* `outputFormat`: The output format of the generated audio. +* `voiceSettings`: An object containing voice settings such as `stability`, `similarityBoost`, `style`, `useSpeakerBoost`, and `speed`. +* `languageCode`: The language code of the input text (e.g., "en" for English). +* `pronunciationDictionaryLocators`: A list of pronunciation dictionary locators. +* `seed`: A seed for random number generation, for reproducibility. +* `previousText`: Text before the main text, for context in multi-turn conversations (advanced use). +* `nextText`: Text after the main text, for context in multi-turn conversations (advanced use). +* `previousRequestIds`: Request IDs from previous turns in a conversation (advanced use). +* `nextRequestIds`: Request IDs for subsequent turns in a conversation (advanced use). +* `usePvcAsIvc`: Use PVC as IVC (advanced use). +* `applyTextNormalization`: Apply text normalization ("auto", "on", or "off"). + +For example: + +[source,java] +---- +ElevenLabsSpeechOptions speechOptions = ElevenLabsSpeechOptions.builder() + .modelId("eleven_multilingual_v2") + .voiceId("your_voice_id") + .outputFormat(ElevenLabsApi.OutputFormat.MP3_44100_128.getValue()) + .build(); + +TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("Hello, this is a text-to-speech example.", speechOptions); +TextToSpeechResponse response = elevenLabsSpeechModel.call(speechPrompt); +---- + +== Manual Configuration + +Add the `spring-ai-elevenlabs` dependency to your project's Maven `pom.xml` file: + +[source,xml] +---- + + org.springframework.ai + spring-ai-elevenlabs + +---- + +or to your Gradle `build.gradle` build file: + +[source,groovy] +---- +dependencies { + implementation 'org.springframework.ai:spring-ai-elevenlabs' +} +---- + +TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file. + +Next, create an `ElevenLabsSpeechModel`: + +[source,java] +---- +ElevenLabsApi elevenLabsApi = ElevenLabsApi.builder() + .apiKey(System.getenv("ELEVEN_LABS_API_KEY")) + .build(); + +ElevenLabsSpeechOptions defaultOptions = ElevenLabsSpeechOptions.builder() + .modelId("eleven_monolingual_v1") + .voiceId("your_voice_id") + .outputFormat("mp3_44100_128") + .build(); + +ElevenLabsSpeechModel elevenLabsSpeechModel = ElevenLabsSpeechModel.builder() + .elevenLabsApi(elevenLabsApi) + .defaultOptions(defaultOptions) + .build(); + + +TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("Hello, this is a text-to-speech example.", defaultOptions); +TextToSpeechResponse response = elevenLabsSpeechModel.call(speechPrompt); + + +byte[] responseAsBytes = response.getResult().get(0).getOutput(); +---- + +== Streaming Real-time Audio + +The ElevenLabs Speech API supports real-time audio streaming using chunk transfer encoding. This allows audio playback to begin before the entire audio file is generated. + +[source,java] +---- +ElevenLabsApi elevenLabsApi = ElevenLabsApi.builder() + .apiKey(System.getenv("ELEVEN_LABS_API_KEY")) + .build(); + +ElevenLabsSpeechOptions defaultOptions = ElevenLabsSpeechOptions.builder() + .modelId("eleven_monolingual_v1") + .voiceId("your_voice_id") + .outputFormat("mp3_44100_128") + .build(); + +ElevenLabsSpeechModel elevenLabsSpeechModel = ElevenLabsSpeechModel.builder() + .elevenLabsApi(elevenLabsApi) + .defaultOptions(defaultOptions) + .build(); + + +TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("Today is a wonderful day to build something people love!", defaultOptions); + +Flux responseStream = elevenLabsSpeechModel.stream(speechPrompt); + +// Process the stream, e.g., play the audio chunks +responseStream.subscribe(speechResponse -> { + byte[] audioChunk = speechResponse.getResult().get(0).getOutput(); + // Play the audioChunk +}); + +---- + +== Voices API + +The ElevenLabs Voices API allows you to retrieve information about available voices, their settings, and default voice settings. + +To use the Voices API, you'll need to create an instance of `ElevenLabsVoicesApi`: + +[source,java] +---- +ElevenLabsVoicesApi voicesApi = ElevenLabsVoicesApi.builder() + .apiKey(System.getenv("ELEVEN_LABS_API_KEY")) + .build(); +---- + +You can then use the following methods: + +* `getVoices()`: Retrieves a list of all available voices. +* `getDefaultVoiceSettings()`: Gets the default settings for voices. +* `getVoiceSettings(String voiceId)`: Returns the settings for a specific voice. +* `getVoice(String voiceId)`: Returns metadata about a specific voice. + +Example: + +[source,java] +---- +// Get all voices +ResponseEntity voicesResponse = voicesApi.getVoices(); +List voices = voicesResponse.getBody().voices(); + +// Get default voice settings +ResponseEntity defaultSettingsResponse = voicesApi.getDefaultVoiceSettings(); +ElevenLabsVoicesApi.VoiceSettings defaultSettings = defaultSettingsResponse.getBody(); + +// Get settings for a specific voice +ResponseEntity voiceSettingsResponse = voicesApi.getVoiceSettings(voiceId); +ElevenLabsVoicesApi.VoiceSettings voiceSettings = voiceSettingsResponse.getBody(); + +// Get details for a specific voice +ResponseEntity voiceDetailsResponse = voicesApi.getVoice(voiceId); +ElevenLabsVoicesApi.Voice voiceDetails = voiceDetailsResponse.getBody(); +---- \ No newline at end of file diff --git a/spring-ai-spring-boot-autoconfigure/pom.xml b/spring-ai-spring-boot-autoconfigure/pom.xml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsAutoConfiguration.java b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsAutoConfiguration.java new file mode 100644 index 00000000000..fb8ae6ed724 --- /dev/null +++ b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsAutoConfiguration.java @@ -0,0 +1,79 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.autoconfigure.elevenlabs; + +import org.springframework.ai.autoconfigure.retry.SpringAiRetryAutoConfiguration; +import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechModel; +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.beans.factory.ObjectProvider; +import org.springframework.boot.autoconfigure.AutoConfiguration; +import org.springframework.boot.autoconfigure.ImportAutoConfiguration; +import org.springframework.boot.autoconfigure.condition.ConditionalOnClass; +import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.boot.autoconfigure.web.client.RestClientAutoConfiguration; +import org.springframework.boot.autoconfigure.web.reactive.function.client.WebClientAutoConfiguration; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.retry.support.RetryTemplate; +import org.springframework.web.client.ResponseErrorHandler; +import org.springframework.web.client.RestClient; +import org.springframework.web.reactive.function.client.WebClient; + +/** + * {@link AutoConfiguration Auto-configuration} for ElevenLabs. + * + * @author Alexandros Pappas + */ +@AutoConfiguration(after = { RestClientAutoConfiguration.class, SpringAiRetryAutoConfiguration.class, + WebClientAutoConfiguration.class }) +@ConditionalOnClass(ElevenLabsApi.class) +@EnableConfigurationProperties({ ElevenLabsSpeechProperties.class, ElevenLabsConnectionProperties.class }) +@ConditionalOnProperty(prefix = ElevenLabsSpeechProperties.CONFIG_PREFIX, name = "enabled", havingValue = "true", + matchIfMissing = true) +@ImportAutoConfiguration(classes = { SpringAiRetryAutoConfiguration.class, RestClientAutoConfiguration.class, + WebClientAutoConfiguration.class }) +public class ElevenLabsAutoConfiguration { + + @Bean + @ConditionalOnMissingBean + public ElevenLabsApi elevenLabsApi(ElevenLabsConnectionProperties connectionProperties, + ObjectProvider restClientBuilderProvider, + ObjectProvider webClientBuilderProvider, ResponseErrorHandler responseErrorHandler) { + + return ElevenLabsApi.builder() + .baseUrl(connectionProperties.getBaseUrl()) + .apiKey(connectionProperties.getApiKey()) + .restClientBuilder(restClientBuilderProvider.getIfAvailable(RestClient::builder)) + .webClientBuilder(webClientBuilderProvider.getIfAvailable(WebClient::builder)) + .responseErrorHandler(responseErrorHandler) + .build(); + } + + @Bean + @ConditionalOnMissingBean + public ElevenLabsTextToSpeechModel elevenLabsSpeechModel(ElevenLabsApi elevenLabsApi, + ElevenLabsSpeechProperties speechProperties, RetryTemplate retryTemplate) { + + return ElevenLabsTextToSpeechModel.builder() + .elevenLabsApi(elevenLabsApi) + .defaultOptions(speechProperties.getOptions()) + .retryTemplate(retryTemplate) + .build(); + } + +} diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsConnectionProperties.java b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsConnectionProperties.java new file mode 100644 index 00000000000..643a817fd63 --- /dev/null +++ b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsConnectionProperties.java @@ -0,0 +1,58 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.autoconfigure.elevenlabs; + +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.boot.context.properties.ConfigurationProperties; + +/** + * Configuration properties for the ElevenLabs API connection. + * + * @author Alexandros Pappas + */ +@ConfigurationProperties(ElevenLabsConnectionProperties.CONFIG_PREFIX) +public class ElevenLabsConnectionProperties { + + public static final String CONFIG_PREFIX = "spring.ai.elevenlabs"; + + /** + * ElevenLabs API access key. + */ + private String apiKey; + + /** + * ElevenLabs API base URL. + */ + private String baseUrl = ElevenLabsApi.DEFAULT_BASE_URL; + + public String getApiKey() { + return this.apiKey; + } + + public void setApiKey(String apiKey) { + this.apiKey = apiKey; + } + + public String getBaseUrl() { + return this.baseUrl; + } + + public void setBaseUrl(String baseUrl) { + this.baseUrl = baseUrl; + } + +} diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsSpeechProperties.java b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsSpeechProperties.java new file mode 100644 index 00000000000..2a330afe8d6 --- /dev/null +++ b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsSpeechProperties.java @@ -0,0 +1,68 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.autoconfigure.elevenlabs; + +import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechOptions; +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.boot.context.properties.NestedConfigurationProperty; + +/** + * Configuration properties for the ElevenLabs Text-to-Speech API. + * + * @author Alexandros Pappas + */ +@ConfigurationProperties(ElevenLabsSpeechProperties.CONFIG_PREFIX) +public class ElevenLabsSpeechProperties { + + public static final String CONFIG_PREFIX = "spring.ai.elevenlabs.tts"; + + public static final String DEFAULT_MODEL_ID = "eleven_monolingual_v1"; + + private static final String DEFAULT_VOICE_ID = "9BWtsMINqrJLrRacOk9x"; + + private static final ElevenLabsApi.OutputFormat DEFAULT_OUTPUT_FORMAT = ElevenLabsApi.OutputFormat.MP3_22050_32; + + /** + * Enable ElevenLabs speech model. + */ + private boolean enabled = true; + + @NestedConfigurationProperty + private ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder() + .modelId(DEFAULT_MODEL_ID) + .voiceId(DEFAULT_VOICE_ID) + .outputFormat(DEFAULT_OUTPUT_FORMAT.getValue()) + .build(); + + public ElevenLabsTextToSpeechOptions getOptions() { + return this.options; + } + + public void setOptions(ElevenLabsTextToSpeechOptions options) { + this.options = options; + } + + public boolean isEnabled() { + return this.enabled; + } + + public void setEnabled(boolean enabled) { + this.enabled = enabled; + } + +} diff --git a/spring-ai-spring-boot-autoconfigure/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports b/spring-ai-spring-boot-autoconfigure/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports new file mode 100644 index 00000000000..e69de29bb2d diff --git a/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsAutoConfigurationIT.java b/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsAutoConfigurationIT.java new file mode 100644 index 00000000000..1c96a924c09 --- /dev/null +++ b/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsAutoConfigurationIT.java @@ -0,0 +1,85 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.autoconfigure.elevenlabs; + +import java.util.Arrays; + +import static org.assertj.core.api.Assertions.assertThat; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechModel; +import org.springframework.boot.autoconfigure.AutoConfigurations; +import org.springframework.boot.test.context.runner.ApplicationContextRunner; + +/** + * Integration tests for the {@link ElevenLabsAutoConfiguration}. + * + * @author Alexandros Pappas + */ +@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".*") +public class ElevenLabsAutoConfigurationIT { + + private static final org.apache.commons.logging.Log logger = org.apache.commons.logging.LogFactory + .getLog(ElevenLabsAutoConfigurationIT.class); + + private final ApplicationContextRunner contextRunner = new ApplicationContextRunner() + .withPropertyValues("spring.ai.elevenlabs.api-key=" + System.getenv("ELEVEN_LABS_API_KEY")) + .withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)); + + @Test + void speech() { + this.contextRunner.run(context -> { + ElevenLabsTextToSpeechModel speechModel = context.getBean(ElevenLabsTextToSpeechModel.class); + byte[] response = speechModel.call("H"); + assertThat(response).isNotNull(); + assertThat(verifyMp3FrameHeader(response)) + .withFailMessage("Expected MP3 frame header to be present in the response, but it was not found.") + .isTrue(); + assertThat(response).isNotEmpty(); + + logger.debug("Response: " + Arrays.toString(response)); + }); + } + + @Test + void speechStream() { + this.contextRunner.run(context -> { + ElevenLabsTextToSpeechModel speechModel = context.getBean(ElevenLabsTextToSpeechModel.class); + byte[] response = speechModel.call("Hello"); + assertThat(response).isNotNull(); + assertThat(verifyMp3FrameHeader(response)) + .withFailMessage("Expected MP3 frame header to be present in the response, but it was not found.") + .isTrue(); + assertThat(response).isNotEmpty(); + + logger.debug("Response: " + Arrays.toString(response)); + }); + } + + public boolean verifyMp3FrameHeader(byte[] audioResponse) { + // Check if the response is null or too short to contain a frame header + if (audioResponse == null || audioResponse.length < 2) { + return false; + } + // Check for the MP3 frame header + // 0xFFE0 is the sync word for an MP3 frame (11 bits set to 1 followed by 3 bits + // set to 0) + return (audioResponse[0] & 0xFF) == 0xFF && (audioResponse[1] & 0xE0) == 0xE0; + } + +} diff --git a/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsPropertiesTests.java b/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsPropertiesTests.java new file mode 100644 index 00000000000..a4a80433a79 --- /dev/null +++ b/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsPropertiesTests.java @@ -0,0 +1,141 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.autoconfigure.elevenlabs; + +import static org.assertj.core.api.Assertions.assertThat; +import org.junit.jupiter.api.Test; + +import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechModel; +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.boot.autoconfigure.AutoConfigurations; +import org.springframework.boot.test.context.runner.ApplicationContextRunner; + +/** + * Tests for the {@link ElevenLabsSpeechProperties} and + * {@link ElevenLabsConnectionProperties}. + * + * @author Alexandros Pappas + */ +public class ElevenLabsPropertiesTests { + + @Test + public void connectionProperties() { + new ApplicationContextRunner().withPropertyValues( + // @formatter:off + "spring.ai.elevenlabs.api-key=YOUR_API_KEY", + "spring.ai.elevenlabs.base-url=https://custom.api.elevenlabs.io", + "spring.ai.elevenlabs.tts.options.model-id=custom-model", + "spring.ai.elevenlabs.tts.options.voice=custom-voice", + "spring.ai.elevenlabs.tts.options.voice-settings.stability=0.6", + "spring.ai.elevenlabs.tts.options.voice-settings.similarity-boost=0.8", + "spring.ai.elevenlabs.tts.options.voice-settings.style=0.2", + "spring.ai.elevenlabs.tts.options.voice-settings.use-speaker-boost=false", + "spring.ai.elevenlabs.tts.options.voice-settings.speed=1.5" + // @formatter:on + ).withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)).run(context -> { + var speechProperties = context.getBean(ElevenLabsSpeechProperties.class); + var connectionProperties = context.getBean(ElevenLabsConnectionProperties.class); + + assertThat(connectionProperties.getApiKey()).isEqualTo("YOUR_API_KEY"); + assertThat(connectionProperties.getBaseUrl()).isEqualTo("https://custom.api.elevenlabs.io"); + + assertThat(speechProperties.getOptions().getModelId()).isEqualTo("custom-model"); + assertThat(speechProperties.getOptions().getVoice()).isEqualTo("custom-voice"); + assertThat(speechProperties.getOptions().getVoiceSettings().stability()).isEqualTo(0.6); + assertThat(speechProperties.getOptions().getVoiceSettings().similarityBoost()).isEqualTo(0.8); + assertThat(speechProperties.getOptions().getVoiceSettings().style()).isEqualTo(0.2); + assertThat(speechProperties.getOptions().getVoiceSettings().useSpeakerBoost()).isFalse(); + assertThat(speechProperties.getOptions().getSpeed()).isEqualTo(1.5f); + + // enabled is true by default + assertThat(speechProperties.isEnabled()).isTrue(); + }); + } + + @Test + public void speechOptionsTest() { + new ApplicationContextRunner().withPropertyValues( + // @formatter:off + "spring.ai.elevenlabs.api-key=YOUR_API_KEY", + "spring.ai.elevenlabs.tts.options.model-id=custom-model", + "spring.ai.elevenlabs.tts.options.voice=custom-voice", + "spring.ai.elevenlabs.tts.options.format=pcm_44100", + "spring.ai.elevenlabs.tts.options.voice-settings.stability=0.6", + "spring.ai.elevenlabs.tts.options.voice-settings.similarity-boost=0.8", + "spring.ai.elevenlabs.tts.options.voice-settings.style=0.2", + "spring.ai.elevenlabs.tts.options.voice-settings.use-speaker-boost=false", + "spring.ai.elevenlabs.tts.options.voice-settings.speed=1.2", + "spring.ai.elevenlabs.tts.options.language-code=en", + "spring.ai.elevenlabs.tts.options.seed=12345", + "spring.ai.elevenlabs.tts.options.previous-text=previous", + "spring.ai.elevenlabs.tts.options.next-text=next", + "spring.ai.elevenlabs.tts.options.use-pvc-as-ivc=true", + "spring.ai.elevenlabs.tts.options.apply-text-normalization=ON" + // @formatter:on + ).withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)).run(context -> { + var speechProperties = context.getBean(ElevenLabsSpeechProperties.class); + + assertThat(speechProperties.getOptions().getModelId()).isEqualTo("custom-model"); + assertThat(speechProperties.getOptions().getVoice()).isEqualTo("custom-voice"); + assertThat(speechProperties.getOptions().getFormat()).isEqualTo("pcm_44100"); + assertThat(speechProperties.getOptions().getVoiceSettings().stability()).isEqualTo(0.6); + assertThat(speechProperties.getOptions().getVoiceSettings().similarityBoost()).isEqualTo(0.8); + assertThat(speechProperties.getOptions().getVoiceSettings().style()).isEqualTo(0.2); + assertThat(speechProperties.getOptions().getVoiceSettings().useSpeakerBoost()).isFalse(); + assertThat(speechProperties.getOptions().getVoiceSettings().speed()).isEqualTo(1.2); + assertThat(speechProperties.getOptions().getSpeed()).isEqualTo(1.2); + assertThat(speechProperties.getOptions().getLanguageCode()).isEqualTo("en"); + assertThat(speechProperties.getOptions().getSeed()).isEqualTo(12345); + assertThat(speechProperties.getOptions().getPreviousText()).isEqualTo("previous"); + assertThat(speechProperties.getOptions().getNextText()).isEqualTo("next"); + assertThat(speechProperties.getOptions().getUsePvcAsIvc()).isTrue(); + assertThat(speechProperties.getOptions().getApplyTextNormalization()) + .isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON); + }); + } + + @Test + public void speechActivation() { + + // It is enabled by default + new ApplicationContextRunner().withPropertyValues("spring.ai.elevenlabs.api-key=YOUR_API_KEY") + .withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)) + .run(context -> { + assertThat(context.getBeansOfType(ElevenLabsSpeechProperties.class)).isNotEmpty(); + assertThat(context.getBeansOfType(ElevenLabsTextToSpeechModel.class)).isNotEmpty(); + }); + + // Explicitly enable the text-to-speech autoconfiguration. + new ApplicationContextRunner() + .withPropertyValues("spring.ai.elevenlabs.api-key=YOUR_API_KEY", "spring.ai.elevenlabs.tts.enabled=true") + .withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)) + .run(context -> { + assertThat(context.getBeansOfType(ElevenLabsSpeechProperties.class)).isNotEmpty(); + assertThat(context.getBeansOfType(ElevenLabsTextToSpeechModel.class)).isNotEmpty(); + }); + + // Explicitly disable the text-to-speech autoconfiguration. + new ApplicationContextRunner() + .withPropertyValues("spring.ai.elevenlabs.api-key=YOUR_API_KEY", "spring.ai.elevenlabs.tts.enabled=false") + .withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)) + .run(context -> { + assertThat(context.getBeansOfType(ElevenLabsSpeechProperties.class)).isEmpty(); + assertThat(context.getBeansOfType(ElevenLabsTextToSpeechModel.class)).isEmpty(); + }); + } + +} diff --git a/spring-ai-spring-boot-starters/spring-ai-starter-elevenlabs/pom.xml b/spring-ai-spring-boot-starters/spring-ai-starter-elevenlabs/pom.xml new file mode 100644 index 00000000000..6ad6673146e --- /dev/null +++ b/spring-ai-spring-boot-starters/spring-ai-starter-elevenlabs/pom.xml @@ -0,0 +1,44 @@ + + + 4.0.0 + + org.springframework.ai + spring-ai + 1.0.0-SNAPSHOT + ../../pom.xml + + spring-ai-elevenlabs-spring-boot-starter + jar + Spring AI Starter - ElevenLabs + Spring AI ElevenLabs Auto Configuration + https://github.com/spring-projects/spring-ai + + + https://github.com/spring-projects/spring-ai + git://github.com/spring-projects/spring-ai.git + git@github.com:spring-projects/spring-ai.git + + + + + + org.springframework.boot + spring-boot-starter + + + + org.springframework.ai + spring-ai-spring-boot-autoconfigure + ${project.parent.version} + + + + org.springframework.ai + spring-ai-elevenlabs + ${project.parent.version} + + + + From 01507f39639d2b6111a1c143d4a880e047368017 Mon Sep 17 00:00:00 2001 From: Alexandros Pappas Date: Tue, 10 Jun 2025 12:56:57 +0200 Subject: [PATCH 2/6] feat: rebase with master and update the code Signed-off-by: Alexandros Pappas --- .../pom.xml | 90 +++++++++++++++++++ .../ElevenLabsAutoConfiguration.java | 4 +- .../ElevenLabsConnectionProperties.java | 2 +- .../ElevenLabsSpeechProperties.java | 4 +- ...ot.autoconfigure.AutoConfiguration.imports | 16 ++++ .../ElevenLabsAutoConfigurationIT.java | 13 ++- .../ElevenLabsPropertiesTests.java | 8 +- models/spring-ai-elevenlabs/pom.xml | 6 +- .../ElevenLabsTextToSpeechModel.java | 15 ++-- .../ElevenLabsTextToSpeechOptions.java | 48 +++++----- .../ai/elevenlabs/api/ElevenLabsApi.java | 26 +++--- .../ElevenLabsTextToSpeechModelIT.java | 6 +- .../ElevenLabsTextToSpeechOptionsTests.java | 10 +-- .../ai/elevenlabs/api/ElevenLabsApiIT.java | 25 +++--- .../ai/openai/audio/speech/Speech.java | 3 + .../ai/openai/audio/speech/SpeechMessage.java | 3 + .../ai/openai/audio/speech/SpeechModel.java | 3 + .../ai/openai/audio/speech/SpeechPrompt.java | 3 + .../openai/audio/speech/SpeechResponse.java | 3 + .../audio/speech/StreamingSpeechModel.java | 3 + pom.xml | 3 +- spring-ai-bom/pom.xml | 8 +- .../api/audio/speech/elevenlabs-speech.adoc | 16 ++-- .../tts/DefaultTextToSpeechOptions.java | 2 +- .../springframework/ai/audio}/tts/Speech.java | 2 +- .../tts/StreamingTextToSpeechModel.java | 2 +- .../ai/audio}/tts/TextToSpeechMessage.java | 2 +- .../ai/audio}/tts/TextToSpeechModel.java | 2 +- .../ai/audio}/tts/TextToSpeechOptions.java | 2 +- .../ai/audio}/tts/TextToSpeechPrompt.java | 2 +- .../ai/audio}/tts/TextToSpeechResponse.java | 2 +- .../tts/DefaultTextToSpeechOptionsTests.java | 2 +- spring-ai-spring-boot-autoconfigure/pom.xml | 0 ...ot.autoconfigure.AutoConfiguration.imports | 0 .../pom.xml | 12 +-- 35 files changed, 240 insertions(+), 108 deletions(-) create mode 100644 auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/pom.xml rename {spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs => auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure}/ElevenLabsAutoConfiguration.java (96%) rename {spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs => auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure}/ElevenLabsConnectionProperties.java (95%) rename {spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs => auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure}/ElevenLabsSpeechProperties.java (93%) create mode 100644 auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports rename {spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs => auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/test/java/org/springframework/ai/model/elevenlabs/autoconfigure}/ElevenLabsAutoConfigurationIT.java (87%) rename {spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs => auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/test/java/org/springframework/ai/model/elevenlabs/autoconfigure}/ElevenLabsPropertiesTests.java (96%) rename {models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs => spring-ai-model/src/main/java/org/springframework/ai/audio}/tts/DefaultTextToSpeechOptions.java (98%) rename {models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs => spring-ai-model/src/main/java/org/springframework/ai/audio}/tts/Speech.java (97%) rename {models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs => spring-ai-model/src/main/java/org/springframework/ai/audio}/tts/StreamingTextToSpeechModel.java (97%) rename {models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs => spring-ai-model/src/main/java/org/springframework/ai/audio}/tts/TextToSpeechMessage.java (96%) rename {models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs => spring-ai-model/src/main/java/org/springframework/ai/audio}/tts/TextToSpeechModel.java (96%) rename {models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs => spring-ai-model/src/main/java/org/springframework/ai/audio}/tts/TextToSpeechOptions.java (98%) rename {models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs => spring-ai-model/src/main/java/org/springframework/ai/audio}/tts/TextToSpeechPrompt.java (97%) rename {models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs => spring-ai-model/src/main/java/org/springframework/ai/audio}/tts/TextToSpeechResponse.java (97%) rename {models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs => spring-ai-model/src/test/java/org/springframework/ai/audio}/tts/DefaultTextToSpeechOptionsTests.java (97%) delete mode 100644 spring-ai-spring-boot-autoconfigure/pom.xml delete mode 100644 spring-ai-spring-boot-autoconfigure/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports rename spring-ai-spring-boot-starters/{spring-ai-starter-elevenlabs => spring-ai-starter-model-elevenlabs}/pom.xml (78%) diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/pom.xml b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/pom.xml new file mode 100644 index 00000000000..bc09ef1f5b4 --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/pom.xml @@ -0,0 +1,90 @@ + + + 4.0.0 + + org.springframework.ai + spring-ai-parent + 1.1.0-SNAPSHOT + ../../../pom.xml + + spring-ai-autoconfigure-model-elevenlabs + jar + Spring AI ElevenLabs Auto Configuration + Spring AI ElevenLabs Auto Configuration + https://github.com/spring-projects/spring-ai + + + https://github.com/spring-projects/spring-ai + git://github.com/spring-projects/spring-ai.git + git@github.com:spring-projects/spring-ai.git + + + + + + + + + org.springframework.ai + spring-ai-elevenlabs + ${project.parent.version} + true + + + + + + org.springframework.ai + spring-ai-autoconfigure-model-tool + ${project.parent.version} + + + + org.springframework.ai + spring-ai-autoconfigure-retry + ${project.parent.version} + + + + + org.springframework.boot + spring-boot-starter + true + + + + org.springframework.boot + spring-boot-configuration-processor + true + + + + org.springframework.boot + spring-boot-autoconfigure-processor + true + + + + + org.springframework.ai + spring-ai-test + ${project.parent.version} + test + + + + org.springframework.boot + spring-boot-starter-test + test + + + + org.mockito + mockito-core + test + + + + diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsAutoConfiguration.java b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsAutoConfiguration.java similarity index 96% rename from spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsAutoConfiguration.java rename to auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsAutoConfiguration.java index fb8ae6ed724..b2578a93939 100644 --- a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsAutoConfiguration.java +++ b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsAutoConfiguration.java @@ -14,11 +14,11 @@ * limitations under the License. */ -package org.springframework.ai.autoconfigure.elevenlabs; +package org.springframework.ai.model.elevenlabs.autoconfigure; -import org.springframework.ai.autoconfigure.retry.SpringAiRetryAutoConfiguration; import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechModel; import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.ai.retry.autoconfigure.SpringAiRetryAutoConfiguration; import org.springframework.beans.factory.ObjectProvider; import org.springframework.boot.autoconfigure.AutoConfiguration; import org.springframework.boot.autoconfigure.ImportAutoConfiguration; diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsConnectionProperties.java b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsConnectionProperties.java similarity index 95% rename from spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsConnectionProperties.java rename to auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsConnectionProperties.java index 643a817fd63..4f2b299142e 100644 --- a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsConnectionProperties.java +++ b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsConnectionProperties.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.springframework.ai.autoconfigure.elevenlabs; +package org.springframework.ai.model.elevenlabs.autoconfigure; import org.springframework.ai.elevenlabs.api.ElevenLabsApi; import org.springframework.boot.context.properties.ConfigurationProperties; diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsSpeechProperties.java b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsSpeechProperties.java similarity index 93% rename from spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsSpeechProperties.java rename to auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsSpeechProperties.java index 2a330afe8d6..7614f3070ab 100644 --- a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsSpeechProperties.java +++ b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsSpeechProperties.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.springframework.ai.autoconfigure.elevenlabs; +package org.springframework.ai.model.elevenlabs.autoconfigure; import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechOptions; import org.springframework.ai.elevenlabs.api.ElevenLabsApi; @@ -31,7 +31,7 @@ public class ElevenLabsSpeechProperties { public static final String CONFIG_PREFIX = "spring.ai.elevenlabs.tts"; - public static final String DEFAULT_MODEL_ID = "eleven_monolingual_v1"; + public static final String DEFAULT_MODEL_ID = "eleven_turbo_v2_5"; private static final String DEFAULT_VOICE_ID = "9BWtsMINqrJLrRacOk9x"; diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports new file mode 100644 index 00000000000..82784c92262 --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports @@ -0,0 +1,16 @@ +# +# Copyright 2025-2025 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.springframework.ai.model.elevenlabs.autoconfigure.elevenlabsChatAutoConfiguration diff --git a/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsAutoConfigurationIT.java b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/test/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsAutoConfigurationIT.java similarity index 87% rename from spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsAutoConfigurationIT.java rename to auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/test/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsAutoConfigurationIT.java index 1c96a924c09..4c1fc68ec04 100644 --- a/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsAutoConfigurationIT.java +++ b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/test/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsAutoConfigurationIT.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.springframework.ai.autoconfigure.elevenlabs; +package org.springframework.ai.model.elevenlabs.autoconfigure; import java.util.Arrays; @@ -72,14 +72,13 @@ void speechStream() { } public boolean verifyMp3FrameHeader(byte[] audioResponse) { - // Check if the response is null or too short to contain a frame header - if (audioResponse == null || audioResponse.length < 2) { + if (audioResponse == null || audioResponse.length < 3) { return false; } - // Check for the MP3 frame header - // 0xFFE0 is the sync word for an MP3 frame (11 bits set to 1 followed by 3 bits - // set to 0) - return (audioResponse[0] & 0xFF) == 0xFF && (audioResponse[1] & 0xE0) == 0xE0; + // Accept ID3 tag (MP3 metadata) or MP3 frame header + boolean hasId3 = audioResponse[0] == 'I' && audioResponse[1] == 'D' && audioResponse[2] == '3'; + boolean hasFrame = (audioResponse[0] & 0xFF) == 0xFF && (audioResponse[1] & 0xE0) == 0xE0; + return hasId3 || hasFrame; } } diff --git a/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsPropertiesTests.java b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/test/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsPropertiesTests.java similarity index 96% rename from spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsPropertiesTests.java rename to auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/test/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsPropertiesTests.java index a4a80433a79..e8ab28a2dcc 100644 --- a/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/elevenlabs/ElevenLabsPropertiesTests.java +++ b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/test/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsPropertiesTests.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.springframework.ai.autoconfigure.elevenlabs; +package org.springframework.ai.model.elevenlabs.autoconfigure; import static org.assertj.core.api.Assertions.assertThat; import org.junit.jupiter.api.Test; @@ -83,8 +83,8 @@ public void speechOptionsTest() { "spring.ai.elevenlabs.tts.options.seed=12345", "spring.ai.elevenlabs.tts.options.previous-text=previous", "spring.ai.elevenlabs.tts.options.next-text=next", - "spring.ai.elevenlabs.tts.options.use-pvc-as-ivc=true", - "spring.ai.elevenlabs.tts.options.apply-text-normalization=ON" + "spring.ai.elevenlabs.tts.options.apply-text-normalization=ON", + "spring.ai.elevenlabs.tts.options.apply-language-text-normalization=true" // @formatter:on ).withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)).run(context -> { var speechProperties = context.getBean(ElevenLabsSpeechProperties.class); @@ -102,9 +102,9 @@ public void speechOptionsTest() { assertThat(speechProperties.getOptions().getSeed()).isEqualTo(12345); assertThat(speechProperties.getOptions().getPreviousText()).isEqualTo("previous"); assertThat(speechProperties.getOptions().getNextText()).isEqualTo("next"); - assertThat(speechProperties.getOptions().getUsePvcAsIvc()).isTrue(); assertThat(speechProperties.getOptions().getApplyTextNormalization()) .isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON); + assertThat(speechProperties.getOptions().getApplyLanguageTextNormalization()).isTrue(); }); } diff --git a/models/spring-ai-elevenlabs/pom.xml b/models/spring-ai-elevenlabs/pom.xml index ef036f30766..172da0099e8 100644 --- a/models/spring-ai-elevenlabs/pom.xml +++ b/models/spring-ai-elevenlabs/pom.xml @@ -5,8 +5,8 @@ 4.0.0 org.springframework.ai - spring-ai - 1.0.0-SNAPSHOT + spring-ai-parent + 1.1.0-SNAPSHOT ../../pom.xml @@ -31,7 +31,7 @@ org.springframework.ai - spring-ai-core + spring-ai-model ${project.parent.version} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java index 99b72fc67f5..3fb179ed804 100644 --- a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java @@ -23,11 +23,11 @@ import reactor.core.publisher.Flux; import org.springframework.ai.elevenlabs.api.ElevenLabsApi; -import org.springframework.ai.elevenlabs.tts.Speech; -import org.springframework.ai.elevenlabs.tts.StreamingTextToSpeechModel; -import org.springframework.ai.elevenlabs.tts.TextToSpeechModel; -import org.springframework.ai.elevenlabs.tts.TextToSpeechPrompt; -import org.springframework.ai.elevenlabs.tts.TextToSpeechResponse; +import org.springframework.ai.audio.tts.Speech; +import org.springframework.ai.audio.tts.StreamingTextToSpeechModel; +import org.springframework.ai.audio.tts.TextToSpeechModel; +import org.springframework.ai.audio.tts.TextToSpeechPrompt; +import org.springframework.ai.audio.tts.TextToSpeechResponse; import org.springframework.ai.retry.RetryUtils; import org.springframework.retry.support.RetryTemplate; import org.springframework.util.Assert; @@ -131,8 +131,8 @@ private ElevenLabsApi.SpeechRequest createRequest(TextToSpeechPrompt prompt) { .nextText(options.getNextText()) .previousRequestIds(options.getPreviousRequestIds()) .nextRequestIds(options.getNextRequestIds()) - .usePvcAsIvc(options.getUsePvcAsIvc()) .applyTextNormalization(options.getApplyTextNormalization()) + .applyLanguageTextNormalization(options.getApplyLanguageTextNormalization()) .build(); } @@ -161,9 +161,10 @@ private ElevenLabsTextToSpeechOptions merge(ElevenLabsTextToSpeechOptions runtim .previousRequestIds( getOrDefault(runtimeOptions.getPreviousRequestIds(), defaultOptions.getPreviousRequestIds())) .nextRequestIds(getOrDefault(runtimeOptions.getNextRequestIds(), defaultOptions.getNextRequestIds())) - .usePvcAsIvc(getOrDefault(runtimeOptions.getUsePvcAsIvc(), defaultOptions.getUsePvcAsIvc())) .applyTextNormalization(getOrDefault(runtimeOptions.getApplyTextNormalization(), defaultOptions.getApplyTextNormalization())) + .applyLanguageTextNormalization(getOrDefault(runtimeOptions.getApplyLanguageTextNormalization(), + defaultOptions.getApplyLanguageTextNormalization())) .build(); } diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptions.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptions.java index d35c8128558..20adb6af3b0 100644 --- a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptions.java +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptions.java @@ -24,7 +24,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import org.springframework.ai.elevenlabs.api.ElevenLabsApi; -import org.springframework.ai.elevenlabs.tts.TextToSpeechOptions; +import org.springframework.ai.audio.tts.TextToSpeechOptions; /** * Options for ElevenLabs text-to-speech. @@ -76,12 +76,12 @@ public class ElevenLabsTextToSpeechOptions implements TextToSpeechOptions { @JsonProperty("next_request_ids") private List nextRequestIds; - @JsonProperty("use_pvc_as_ivc") - private Boolean usePvcAsIvc; - @JsonProperty("apply_text_normalization") private ElevenLabsApi.SpeechRequest.TextNormalizationMode applyTextNormalization; + @JsonProperty("apply_language_text_normalization") + private Boolean applyLanguageTextNormalization; + public static Builder builder() { return new ElevenLabsTextToSpeechOptions.Builder(); } @@ -246,14 +246,6 @@ public void setNextRequestIds(List nextRequestIds) { this.nextRequestIds = nextRequestIds; } - public Boolean getUsePvcAsIvc() { - return this.usePvcAsIvc; - } - - public void setUsePvcAsIvc(Boolean usePvcAsIvc) { - this.usePvcAsIvc = usePvcAsIvc; - } - public ElevenLabsApi.SpeechRequest.TextNormalizationMode getApplyTextNormalization() { return this.applyTextNormalization; } @@ -262,6 +254,14 @@ public void setApplyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizat this.applyTextNormalization = applyTextNormalization; } + public Boolean getApplyLanguageTextNormalization() { + return this.applyLanguageTextNormalization; + } + + public void setApplyLanguageTextNormalization(Boolean applyLanguageTextNormalization) { + this.applyLanguageTextNormalization = applyLanguageTextNormalization; + } + @Override public boolean equals(Object o) { if (this == o) @@ -275,15 +275,16 @@ public boolean equals(Object o) { && Objects.equals(seed, that.seed) && Objects.equals(previousText, that.previousText) && Objects.equals(nextText, that.nextText) && Objects.equals(previousRequestIds, that.previousRequestIds) - && Objects.equals(nextRequestIds, that.nextRequestIds) && Objects.equals(usePvcAsIvc, that.usePvcAsIvc) - && Objects.equals(applyTextNormalization, that.applyTextNormalization); + && Objects.equals(applyTextNormalization, that.applyTextNormalization) + && Objects.equals(nextRequestIds, that.nextRequestIds) + && Objects.equals(applyLanguageTextNormalization, that.applyLanguageTextNormalization); } @Override public int hashCode() { return Objects.hash(modelId, voiceId, outputFormat, voiceSettings, languageCode, pronunciationDictionaryLocators, seed, previousText, nextText, previousRequestIds, nextRequestIds, - usePvcAsIvc, applyTextNormalization); + applyTextNormalization, applyLanguageTextNormalization); } @Override @@ -292,8 +293,9 @@ public String toString() { + ", outputFormat='" + outputFormat + '\'' + ", voiceSettings=" + voiceSettings + ", languageCode='" + languageCode + '\'' + ", pronunciationDictionaryLocators=" + pronunciationDictionaryLocators + ", seed=" + seed + ", previousText='" + previousText + '\'' + ", nextText='" + nextText + '\'' - + ", previousRequestIds=" + previousRequestIds + ", nextRequestIds=" + nextRequestIds + ", usePvcAsIvc=" - + usePvcAsIvc + ", applyTextNormalization=" + applyTextNormalization + '}'; + + ", previousRequestIds=" + previousRequestIds + ", nextRequestIds=" + nextRequestIds + + ", applyTextNormalization=" + applyTextNormalization + ", applyLanguageTextNormalization=" + + applyLanguageTextNormalization + '}'; } @Override @@ -313,8 +315,8 @@ public ElevenLabsTextToSpeechOptions copy() { .nextText(this.getNextText()) .previousRequestIds(this.getPreviousRequestIds()) .nextRequestIds(this.getNextRequestIds()) - .usePvcAsIvc(this.getUsePvcAsIvc()) .applyTextNormalization(this.getApplyTextNormalization()) + .applyLanguageTextNormalization(this.getApplyLanguageTextNormalization()) .build(); } @@ -388,17 +390,17 @@ public Builder nextRequestIds(List nextRequestIds) { return this; } - public Builder usePvcAsIvc(Boolean usePvcAsIvc) { - options.setUsePvcAsIvc(usePvcAsIvc); - return this; - } - public Builder applyTextNormalization( ElevenLabsApi.SpeechRequest.TextNormalizationMode applyTextNormalization) { options.setApplyTextNormalization(applyTextNormalization); return this; } + public Builder applyLanguageTextNormalization(Boolean applyLanguageTextNormalization) { + options.setApplyLanguageTextNormalization(applyLanguageTextNormalization); + return this; + } + public ElevenLabsTextToSpeechOptions build() { return this.options; } diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsApi.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsApi.java index af749b8181b..a7fd65d621d 100644 --- a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsApi.java +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsApi.java @@ -146,8 +146,10 @@ public enum OutputFormat { MP3_22050_32("mp3_22050_32"), MP3_44100_32("mp3_44100_32"), MP3_44100_64("mp3_44100_64"), MP3_44100_96("mp3_44100_96"), MP3_44100_128("mp3_44100_128"), MP3_44100_192("mp3_44100_192"), - PCM_16000("pcm_16000"), PCM_22050("pcm_22050"), PCM_24000("pcm_24000"), PCM_44100("pcm_44100"), - ULAW_8000("ulaw_8000"); + PCM_8000("pcm_8000"), PCM_16000("pcm_16000"), PCM_22050("pcm_22050"), PCM_24000("pcm_24000"), + PCM_44100("pcm_44100"), PCM_48000("pcm_48000"), ULAW_8000("ulaw_8000"), ALAW_8000("alaw_8000"), + OPUS_48000_32("opus_48000_32"), OPUS_48000_64("opus_48000_64"), OPUS_48000_96("opus_48000_96"), + OPUS_48000_128("opus_48000_128"), OPUS_48000_192("opus_48000_192"); private final String value; @@ -173,8 +175,8 @@ public record SpeechRequest(@JsonProperty("text") String text, @JsonProperty("mo @JsonProperty("next_text") String nextText, @JsonProperty("previous_request_ids") List previousRequestIds, @JsonProperty("next_request_ids") List nextRequestIds, - @JsonProperty("use_pvc_as_ivc") Boolean usePvcAsIvc, - @JsonProperty("apply_text_normalization") TextNormalizationMode applyTextNormalization) { + @JsonProperty("apply_text_normalization") TextNormalizationMode applyTextNormalization, + @JsonProperty("apply_language_text_normalization") Boolean applyLanguageTextNormalization) { public static Builder builder() { return new Builder(); @@ -243,10 +245,10 @@ public static class Builder { private List nextRequestIds; - private Boolean usePvcAsIvc; - private TextNormalizationMode applyTextNormalization; + private Boolean applyLanguageTextNormalization = false; + public Builder text(String text) { this.text = text; return this; @@ -298,21 +300,21 @@ public Builder nextRequestIds(List nextRequestIds) { return this; } - public Builder usePvcAsIvc(Boolean usePvcAsIvc) { - this.usePvcAsIvc = usePvcAsIvc; + public Builder applyTextNormalization(TextNormalizationMode applyTextNormalization) { + this.applyTextNormalization = applyTextNormalization; return this; } - public Builder applyTextNormalization(TextNormalizationMode applyTextNormalization) { - this.applyTextNormalization = applyTextNormalization; + public Builder applyLanguageTextNormalization(Boolean applyLanguageTextNormalization) { + this.applyLanguageTextNormalization = applyLanguageTextNormalization; return this; } public SpeechRequest build() { Assert.hasText(text, "text must not be empty"); return new SpeechRequest(text, modelId, languageCode, voiceSettings, pronunciationDictionaryLocators, - seed, previousText, nextText, previousRequestIds, nextRequestIds, usePvcAsIvc, - applyTextNormalization); + seed, previousText, nextText, previousRequestIds, nextRequestIds, applyTextNormalization, + applyLanguageTextNormalization); } } diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModelIT.java b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModelIT.java index 840d9b2b558..013dcc97e2f 100644 --- a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModelIT.java +++ b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModelIT.java @@ -23,9 +23,9 @@ import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; import reactor.core.publisher.Flux; -import org.springframework.ai.elevenlabs.tts.Speech; -import org.springframework.ai.elevenlabs.tts.TextToSpeechPrompt; -import org.springframework.ai.elevenlabs.tts.TextToSpeechResponse; +import org.springframework.ai.audio.tts.Speech; +import org.springframework.ai.audio.tts.TextToSpeechPrompt; +import org.springframework.ai.audio.tts.TextToSpeechResponse; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptionsTests.java b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptionsTests.java index 9ed39b8f602..a0164c81d8c 100644 --- a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptionsTests.java +++ b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptionsTests.java @@ -51,8 +51,8 @@ public void testBuilderWithAllFields() { .nextText("next") .previousRequestIds(List.of("req1", "req2")) .nextRequestIds(List.of("req3", "req4")) - .usePvcAsIvc(true) .applyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON) + .applyLanguageTextNormalization(true) .build(); assertThat(options.getModelId()).isEqualTo("test-model"); @@ -75,8 +75,8 @@ public void testBuilderWithAllFields() { assertThat(options.getNextText()).isEqualTo("next"); assertThat(options.getPreviousRequestIds()).containsExactly("req1", "req2"); assertThat(options.getNextRequestIds()).containsExactly("req3", "req4"); - assertThat(options.getUsePvcAsIvc()).isTrue(); assertThat(options.getApplyTextNormalization()).isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON); + assertThat(options.getApplyLanguageTextNormalization()).isTrue(); } @Test @@ -114,8 +114,8 @@ public void testSetters() { options.setNextText("next"); options.setPreviousRequestIds(List.of("req1", "req2")); options.setNextRequestIds(List.of("req3", "req4")); - options.setUsePvcAsIvc(true); options.setApplyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON); + options.setApplyLanguageTextNormalization(true); assertThat(options.getModelId()).isEqualTo("test-model"); assertThat(options.getVoice()).isEqualTo("test-voice-id"); @@ -134,8 +134,8 @@ public void testSetters() { assertThat(options.getNextText()).isEqualTo("next"); assertThat(options.getPreviousRequestIds()).containsExactly("req1", "req2"); assertThat(options.getNextRequestIds()).containsExactly("req3", "req4"); - assertThat(options.getUsePvcAsIvc()).isTrue(); assertThat(options.getApplyTextNormalization()).isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON); + assertThat(options.getApplyLanguageTextNormalization()).isTrue(); } @Test @@ -155,8 +155,8 @@ public void testDefaultValues() { assertThat(options.getNextText()).isNull(); assertThat(options.getPreviousRequestIds()).isNull(); assertThat(options.getNextRequestIds()).isNull(); - assertThat(options.getUsePvcAsIvc()).isNull(); assertThat(options.getApplyTextNormalization()).isNull(); + assertThat(options.getApplyLanguageTextNormalization()).isNull(); } @Test diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsApiIT.java b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsApiIT.java index e2637915663..399dc9156f6 100644 --- a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsApiIT.java +++ b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsApiIT.java @@ -16,6 +16,7 @@ package org.springframework.ai.elevenlabs.api; +import java.io.IOException; import java.util.concurrent.atomic.AtomicInteger; import static org.assertj.core.api.Assertions.assertThat; @@ -49,10 +50,10 @@ public class ElevenLabsApiIT { private ElevenLabsApi elevenLabsApi; @Test - public void testTextToSpeech() { + public void testTextToSpeech() throws IOException { ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() .text("Hello, world!") - .modelId("eleven_monolingual_v1") + .modelId("eleven_turbo_v2_5") .build(); String validVoiceId = "9BWtsMINqrJLrRacOk9x"; @@ -66,7 +67,7 @@ public void testTextToSpeech() { public void testTextToSpeechWithVoiceSettings() { ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() .text("Hello, with Voice settings!") - .modelId("eleven_monolingual_v1") + .modelId("eleven_turbo_v2_5") .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.7, 0.0, true, 1.0)) .build(); @@ -81,12 +82,13 @@ public void testTextToSpeechWithVoiceSettings() { public void testTextToSpeechWithQueryParams() { ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() .text("Hello, testing query params!") - .modelId("eleven_monolingual_v1") + .modelId("eleven_turbo_v2_5") .build(); String validVoiceId = "9BWtsMINqrJLrRacOk9x"; MultiValueMap queryParams = new LinkedMultiValueMap<>(); queryParams.add("optimize_streaming_latency", "2"); + queryParams.add("enable_logging", "true"); queryParams.add("output_format", ElevenLabsApi.OutputFormat.MP3_22050_32.getValue()); ResponseEntity response = elevenLabsApi.textToSpeech(request, validVoiceId, queryParams); @@ -99,7 +101,7 @@ public void testTextToSpeechWithQueryParams() { public void testTextToSpeechVoiceIdNull() { ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() .text("This should fail.") - .modelId("eleven_monolingual_v1") + .modelId("eleven_turbo_v2_5") .build(); Exception exception = assertThrows(IllegalArgumentException.class, @@ -110,7 +112,7 @@ public void testTextToSpeechVoiceIdNull() { @Test public void testTextToSpeechTextEmpty() { Exception exception = assertThrows(IllegalArgumentException.class, - () -> ElevenLabsApi.SpeechRequest.builder().text("").modelId("eleven_monolingual_v1").build()); + () -> ElevenLabsApi.SpeechRequest.builder().text("").modelId("eleven_turbo_v2_5").build()); assertThat(exception.getMessage()).isEqualTo("text must not be empty"); } @@ -120,7 +122,7 @@ public void testTextToSpeechTextEmpty() { public void testTextToSpeechStream() { ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() .text("This is a longer text to ensure multiple chunks are received through the streaming API.") - .modelId("eleven_monolingual_v1") + .modelId("eleven_turbo_v2_5") .build(); String validVoiceId = "9BWtsMINqrJLrRacOk9x"; @@ -146,7 +148,7 @@ public void testTextToSpeechStream() { public void testTextToSpeechStreamWithVoiceSettings() { ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() .text("Hello, with Voice settings in streaming mode!") - .modelId("eleven_monolingual_v1") + .modelId("eleven_turbo_v2_5") .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.7, null, null, null)) .build(); @@ -164,12 +166,13 @@ public void testTextToSpeechStreamWithVoiceSettings() { public void testTextToSpeechStreamWithQueryParams() { ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() .text("Hello, testing streaming with query params!") - .modelId("eleven_monolingual_v1") + .modelId("eleven_turbo_v2_5") .build(); String validVoiceId = "9BWtsMINqrJLrRacOk9x"; MultiValueMap queryParams = new LinkedMultiValueMap<>(); queryParams.add("optimize_streaming_latency", "2"); + queryParams.add("enable_logging", "true"); queryParams.add("output_format", "mp3_44100_128"); Flux> responseFlux = elevenLabsApi.textToSpeechStream(request, validVoiceId, @@ -186,7 +189,7 @@ public void testTextToSpeechStreamWithQueryParams() { public void testTextToSpeechStreamVoiceIdNull() { ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() .text("This should fail.") - .modelId("eleven_monolingual_v1") + .modelId("eleven_turbo_v2_5") .build(); Exception exception = assertThrows(IllegalArgumentException.class, @@ -208,7 +211,7 @@ public void testTextToSpeechStreamTextEmpty() { Exception exception = assertThrows(IllegalArgumentException.class, () -> { ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() .text("") - .modelId("eleven_monolingual_v1") + .modelId("eleven_turbo_v2_5") .build(); String validVoiceId = "9BWtsMINqrJLrRacOk9x"; diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java index 93ae1cba3c5..9ca15158460 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java @@ -29,7 +29,10 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 + * @deprecated Use {@link org.springframework.ai.audio.tts.Speech} from the core package instead. + * This class will be removed in a future release. */ +@Deprecated public class Speech implements ModelResult { private final byte[] audio; diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java index dde419268b9..8b3c96f5f53 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java @@ -24,7 +24,10 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 + * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechMessage} from the core package instead. + * This class will be removed in a future release. */ +@Deprecated public class SpeechMessage { private String text; diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java index f03370ce434..9e443df80e1 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java @@ -25,7 +25,10 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 + * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechModel} from the core package instead. + * This interface will be removed in a future release. */ +@Deprecated @FunctionalInterface public interface SpeechModel extends Model { diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java index 03fb07d6e89..e5fa1bb66a5 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java @@ -29,7 +29,10 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 + * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechPrompt} from the core package instead. + * This class will be removed in a future release. */ +@Deprecated public class SpeechPrompt implements ModelRequest { private final SpeechMessage message; diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java index 5b92fe770b1..27a7b675cf3 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java @@ -28,7 +28,10 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 + * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechResponse} from the core package instead. + * This class will be removed in a future release. */ +@Deprecated public class SpeechResponse implements ModelResponse { private final Speech speech; diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java index 6743637948d..17c1b3bc1d9 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java @@ -27,7 +27,10 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 + * @deprecated Use {@link org.springframework.ai.audio.tts.StreamingTextToSpeechModel} from the core package instead. + * This interface will be removed in a future release. */ +@Deprecated @FunctionalInterface public interface StreamingSpeechModel extends StreamingModel { diff --git a/pom.xml b/pom.xml index 8762799dd10..be86d9dc04c 100644 --- a/pom.xml +++ b/pom.xml @@ -99,6 +99,7 @@ auto-configurations/models/spring-ai-autoconfigure-model-anthropic auto-configurations/models/spring-ai-autoconfigure-model-azure-openai auto-configurations/models/spring-ai-autoconfigure-model-bedrock-ai + auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs auto-configurations/models/spring-ai-autoconfigure-model-huggingface auto-configurations/models/spring-ai-autoconfigure-model-openai auto-configurations/models/spring-ai-autoconfigure-model-minimax @@ -181,7 +182,7 @@ spring-ai-spring-boot-starters/spring-ai-starter-model-azure-openai spring-ai-spring-boot-starters/spring-ai-starter-model-bedrock spring-ai-spring-boot-starters/spring-ai-starter-model-bedrock-converse - spring-ai-spring-boot-starters/spring-ai-starter-elevenlabs + spring-ai-spring-boot-starters/spring-ai-starter-model-elevenlabs spring-ai-spring-boot-starters/spring-ai-starter-model-huggingface spring-ai-spring-boot-starters/spring-ai-starter-model-minimax spring-ai-spring-boot-starters/spring-ai-starter-model-mistral-ai diff --git a/spring-ai-bom/pom.xml b/spring-ai-bom/pom.xml index e145d0bc89f..5128d6f4f48 100644 --- a/spring-ai-bom/pom.xml +++ b/spring-ai-bom/pom.xml @@ -243,6 +243,13 @@ ${project.version} + + org.springframework.ai + spring-ai-elevenlabs + ${project.version} + true + + org.springframework.ai spring-ai-huggingface @@ -310,7 +317,6 @@ ${project.version} - org.springframework.ai spring-ai-zhipuai diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc index ca499c0543c..2abf8cfb9b0 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc @@ -54,7 +54,7 @@ The prefix `spring.ai.elevenlabs.tts` is used as the property prefix to configur |==== | Property | Description | Default -| spring.ai.elevenlabs.tts.options.model-id | The ID of the model to use. | eleven_monolingual_v1 +| spring.ai.elevenlabs.tts.options.model-id | The ID of the model to use. | eleven_turbo_v2_5 | spring.ai.elevenlabs.tts.options.voice-id | The ID of the voice to use. This is the *voice ID*, not the voice name. | 9BWtsMINqrJLrRacOk9x | spring.ai.elevenlabs.tts.options.output-format | The output format for the generated audio. See xref:elevenlabs-tts.adoc#output-formats[Output Formats] below. | mp3_22050_32 | spring.ai.elevenlabs.tts.enabled | Enable or disable the ElevenLabs Text-to-Speech client. | true @@ -94,12 +94,12 @@ The `ElevenLabsSpeechOptions` class provides options to use when making a text-t * `languageCode`: The language code of the input text (e.g., "en" for English). * `pronunciationDictionaryLocators`: A list of pronunciation dictionary locators. * `seed`: A seed for random number generation, for reproducibility. -* `previousText`: Text before the main text, for context in multi-turn conversations (advanced use). -* `nextText`: Text after the main text, for context in multi-turn conversations (advanced use). -* `previousRequestIds`: Request IDs from previous turns in a conversation (advanced use). -* `nextRequestIds`: Request IDs for subsequent turns in a conversation (advanced use). -* `usePvcAsIvc`: Use PVC as IVC (advanced use). +* `previousText`: Text before the main text, for context in multi-turn conversations. +* `nextText`: Text after the main text, for context in multi-turn conversations. +* `previousRequestIds`: Request IDs from previous turns in a conversation. +* `nextRequestIds`: Request IDs for subsequent turns in a conversation. * `applyTextNormalization`: Apply text normalization ("auto", "on", or "off"). +* `applyTextNormalizationForVoice`: Apply language text normalization. For example: @@ -147,7 +147,7 @@ ElevenLabsApi elevenLabsApi = ElevenLabsApi.builder() .build(); ElevenLabsSpeechOptions defaultOptions = ElevenLabsSpeechOptions.builder() - .modelId("eleven_monolingual_v1") + .modelId("eleven_turbo_v2_5") .voiceId("your_voice_id") .outputFormat("mp3_44100_128") .build(); @@ -176,7 +176,7 @@ ElevenLabsApi elevenLabsApi = ElevenLabsApi.builder() .build(); ElevenLabsSpeechOptions defaultOptions = ElevenLabsSpeechOptions.builder() - .modelId("eleven_monolingual_v1") + .modelId("eleven_turbo_v2_5") .voiceId("your_voice_id") .outputFormat("mp3_44100_128") .build(); diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/DefaultTextToSpeechOptions.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/DefaultTextToSpeechOptions.java similarity index 98% rename from models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/DefaultTextToSpeechOptions.java rename to spring-ai-model/src/main/java/org/springframework/ai/audio/tts/DefaultTextToSpeechOptions.java index 96d55c841e2..48a23433608 100644 --- a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/DefaultTextToSpeechOptions.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/DefaultTextToSpeechOptions.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.springframework.ai.elevenlabs.tts; +package org.springframework.ai.audio.tts; import java.util.Objects; diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/Speech.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/Speech.java similarity index 97% rename from models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/Speech.java rename to spring-ai-model/src/main/java/org/springframework/ai/audio/tts/Speech.java index 2307b64d086..794d2a2b390 100644 --- a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/Speech.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/Speech.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.springframework.ai.elevenlabs.tts; +package org.springframework.ai.audio.tts; import java.util.Arrays; import java.util.Objects; diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/StreamingTextToSpeechModel.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/StreamingTextToSpeechModel.java similarity index 97% rename from models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/StreamingTextToSpeechModel.java rename to spring-ai-model/src/main/java/org/springframework/ai/audio/tts/StreamingTextToSpeechModel.java index 409ed788f1d..f342b0fb0aa 100644 --- a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/StreamingTextToSpeechModel.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/StreamingTextToSpeechModel.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.springframework.ai.elevenlabs.tts; +package org.springframework.ai.audio.tts; import reactor.core.publisher.Flux; diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechMessage.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechMessage.java similarity index 96% rename from models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechMessage.java rename to spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechMessage.java index 88997c42ea5..d6d299a26bb 100644 --- a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechMessage.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechMessage.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.springframework.ai.elevenlabs.tts; +package org.springframework.ai.audio.tts; import java.util.Objects; diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechModel.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechModel.java similarity index 96% rename from models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechModel.java rename to spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechModel.java index 6c6d6f327d6..1f417992acd 100644 --- a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechModel.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechModel.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.springframework.ai.elevenlabs.tts; +package org.springframework.ai.audio.tts; import org.springframework.ai.model.Model; import org.springframework.ai.model.ModelResult; diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechOptions.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechOptions.java similarity index 98% rename from models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechOptions.java rename to spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechOptions.java index d204a32212e..9a3e8de1a1b 100644 --- a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechOptions.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechOptions.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.springframework.ai.elevenlabs.tts; +package org.springframework.ai.audio.tts; import org.springframework.ai.model.ModelOptions; import org.springframework.lang.Nullable; diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechPrompt.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechPrompt.java similarity index 97% rename from models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechPrompt.java rename to spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechPrompt.java index 56b656e5457..f679018df51 100644 --- a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechPrompt.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechPrompt.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.springframework.ai.elevenlabs.tts; +package org.springframework.ai.audio.tts; import java.util.Objects; diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechResponse.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponse.java similarity index 97% rename from models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechResponse.java rename to spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponse.java index 3fd8f5c9dce..cf2f6358a85 100644 --- a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/tts/TextToSpeechResponse.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponse.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.springframework.ai.elevenlabs.tts; +package org.springframework.ai.audio.tts; import java.util.List; import java.util.Objects; diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/tts/DefaultTextToSpeechOptionsTests.java b/spring-ai-model/src/test/java/org/springframework/ai/audio/tts/DefaultTextToSpeechOptionsTests.java similarity index 97% rename from models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/tts/DefaultTextToSpeechOptionsTests.java rename to spring-ai-model/src/test/java/org/springframework/ai/audio/tts/DefaultTextToSpeechOptionsTests.java index bc6ff9b81e3..7194a42214e 100644 --- a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/tts/DefaultTextToSpeechOptionsTests.java +++ b/spring-ai-model/src/test/java/org/springframework/ai/audio/tts/DefaultTextToSpeechOptionsTests.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.springframework.ai.elevenlabs.tts; +package org.springframework.ai.audio.tts; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.within; diff --git a/spring-ai-spring-boot-autoconfigure/pom.xml b/spring-ai-spring-boot-autoconfigure/pom.xml deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/spring-ai-spring-boot-autoconfigure/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports b/spring-ai-spring-boot-autoconfigure/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/spring-ai-spring-boot-starters/spring-ai-starter-elevenlabs/pom.xml b/spring-ai-spring-boot-starters/spring-ai-starter-model-elevenlabs/pom.xml similarity index 78% rename from spring-ai-spring-boot-starters/spring-ai-starter-elevenlabs/pom.xml rename to spring-ai-spring-boot-starters/spring-ai-starter-model-elevenlabs/pom.xml index 6ad6673146e..929f371e8cf 100644 --- a/spring-ai-spring-boot-starters/spring-ai-starter-elevenlabs/pom.xml +++ b/spring-ai-spring-boot-starters/spring-ai-starter-model-elevenlabs/pom.xml @@ -5,11 +5,11 @@ 4.0.0 org.springframework.ai - spring-ai - 1.0.0-SNAPSHOT + spring-ai-parent + 1.1.0-SNAPSHOT ../../pom.xml - spring-ai-elevenlabs-spring-boot-starter + spring-ai-starter-model-elevenlabs jar Spring AI Starter - ElevenLabs Spring AI ElevenLabs Auto Configuration @@ -28,12 +28,6 @@ spring-boot-starter - - org.springframework.ai - spring-ai-spring-boot-autoconfigure - ${project.parent.version} - - org.springframework.ai spring-ai-elevenlabs From 779160a37d35afce3a09fccb13bba86d79d62226 Mon Sep 17 00:00:00 2001 From: Alexandros Pappas Date: Tue, 10 Jun 2025 15:29:44 +0200 Subject: [PATCH 3/6] feat: format code and add TextToSpeechResponseMetadata Signed-off-by: Alexandros Pappas --- .../ai/openai/audio/speech/Speech.java | 4 ++-- .../ai/openai/audio/speech/SpeechMessage.java | 4 ++-- .../ai/openai/audio/speech/SpeechModel.java | 4 ++-- .../ai/openai/audio/speech/SpeechPrompt.java | 4 ++-- .../ai/openai/audio/speech/SpeechResponse.java | 4 ++-- .../ai/openai/audio/speech/StreamingSpeechModel.java | 4 ++-- .../audio/OpenAiAudioSpeechResponseMetadata.java | 4 ++-- .../ai/audio/tts/TextToSpeechResponse.java | 10 +++++----- .../ai/audio/tts/TextToSpeechResponseMetadata.java | 12 ++++++++++++ 9 files changed, 31 insertions(+), 19 deletions(-) create mode 100644 spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponseMetadata.java diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java index 9ca15158460..66e8dd53c23 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java @@ -29,8 +29,8 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 - * @deprecated Use {@link org.springframework.ai.audio.tts.Speech} from the core package instead. - * This class will be removed in a future release. + * @deprecated Use {@link org.springframework.ai.audio.tts.Speech} from the core package + * instead. This class will be removed in a future release. */ @Deprecated public class Speech implements ModelResult { diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java index 8b3c96f5f53..8de55fe4f11 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java @@ -24,8 +24,8 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 - * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechMessage} from the core package instead. - * This class will be removed in a future release. + * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechMessage} from the + * core package instead. This class will be removed in a future release. */ @Deprecated public class SpeechMessage { diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java index 9e443df80e1..98161933814 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java @@ -25,8 +25,8 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 - * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechModel} from the core package instead. - * This interface will be removed in a future release. + * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechModel} from the + * core package instead. This interface will be removed in a future release. */ @Deprecated @FunctionalInterface diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java index e5fa1bb66a5..bfce1e311ee 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java @@ -29,8 +29,8 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 - * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechPrompt} from the core package instead. - * This class will be removed in a future release. + * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechPrompt} from the + * core package instead. This class will be removed in a future release. */ @Deprecated public class SpeechPrompt implements ModelRequest { diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java index 27a7b675cf3..9662764aec5 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java @@ -28,8 +28,8 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 - * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechResponse} from the core package instead. - * This class will be removed in a future release. + * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechResponse} from the + * core package instead. This class will be removed in a future release. */ @Deprecated public class SpeechResponse implements ModelResponse { diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java index 17c1b3bc1d9..fa8daadf159 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java @@ -27,8 +27,8 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 - * @deprecated Use {@link org.springframework.ai.audio.tts.StreamingTextToSpeechModel} from the core package instead. - * This interface will be removed in a future release. + * @deprecated Use {@link org.springframework.ai.audio.tts.StreamingTextToSpeechModel} + * from the core package instead. This interface will be removed in a future release. */ @Deprecated @FunctionalInterface diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechResponseMetadata.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechResponseMetadata.java index e90c4097d71..412b0775ea9 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechResponseMetadata.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechResponseMetadata.java @@ -16,9 +16,9 @@ package org.springframework.ai.openai.metadata.audio; +import org.springframework.ai.audio.tts.TextToSpeechResponseMetadata; import org.springframework.ai.chat.metadata.EmptyRateLimit; import org.springframework.ai.chat.metadata.RateLimit; -import org.springframework.ai.model.MutableResponseMetadata; import org.springframework.ai.openai.api.OpenAiAudioApi; import org.springframework.lang.Nullable; import org.springframework.util.Assert; @@ -29,7 +29,7 @@ * @author Ahmed Yousri * @see RateLimit */ -public class OpenAiAudioSpeechResponseMetadata extends MutableResponseMetadata { +public class OpenAiAudioSpeechResponseMetadata extends TextToSpeechResponseMetadata { public static final OpenAiAudioSpeechResponseMetadata NULL = new OpenAiAudioSpeechResponseMetadata() { diff --git a/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponse.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponse.java index cf2f6358a85..00cfab133fc 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponse.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponse.java @@ -31,15 +31,15 @@ public class TextToSpeechResponse implements ModelResponse { private final List results; - private final ResponseMetadata metadata; + private final TextToSpeechResponseMetadata textToSpeechResponseMetadata; public TextToSpeechResponse(List results) { this(results, null); } - public TextToSpeechResponse(List results, ResponseMetadata metadata) { + public TextToSpeechResponse(List results, TextToSpeechResponseMetadata textToSpeechResponseMetadata) { this.results = results; - this.metadata = metadata; + this.textToSpeechResponseMetadata = textToSpeechResponseMetadata; } @Override @@ -52,8 +52,8 @@ public Speech getResult() { } @Override - public ResponseMetadata getMetadata() { - return this.metadata; + public TextToSpeechResponseMetadata getMetadata() { + return this.textToSpeechResponseMetadata; } @Override diff --git a/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponseMetadata.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponseMetadata.java new file mode 100644 index 00000000000..f581b167064 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponseMetadata.java @@ -0,0 +1,12 @@ +package org.springframework.ai.audio.tts; + +import org.springframework.ai.model.MutableResponseMetadata; + +/** + * Metadata associated with an audio transcription response. + * + * @author Alexandros Pappas + */ +public class TextToSpeechResponseMetadata extends MutableResponseMetadata { + +} From 6446920f6dd4900b4854e3b800d4dc50f062108b Mon Sep 17 00:00:00 2001 From: Alexandros Pappas Date: Tue, 10 Jun 2025 16:30:52 +0200 Subject: [PATCH 4/6] refactor ElevenLabsTextToSpeechModel Signed-off-by: Alexandros Pappas --- .../ElevenLabsTextToSpeechModel.java | 48 +++++++++++-------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java index 3fb179ed804..ddd7cb91df1 100644 --- a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java @@ -71,21 +71,13 @@ public static Builder builder() { @Override public TextToSpeechResponse call(TextToSpeechPrompt prompt) { - ElevenLabsApi.SpeechRequest request = createRequest(prompt); - String voiceId = getOptions(prompt).getVoice(); - - MultiValueMap queryParameters = new LinkedMultiValueMap<>(); - if (getOptions(prompt).getEnableLogging() != null) { - queryParameters.add("enable_logging", getOptions(prompt).getEnableLogging().toString()); - } - if (getOptions(prompt).getFormat() != null) { - queryParameters.add("output_format", getOptions(prompt).getFormat()); - } + RequestContext requestContext = prepareRequest(prompt); byte[] audioData = retryTemplate.execute(context -> { - var response = elevenLabsApi.textToSpeech(request, voiceId, queryParameters); + var response = elevenLabsApi.textToSpeech(requestContext.request, requestContext.voiceId, + requestContext.queryParameters); if (response.getBody() == null) { - logger.warn("No speech response returned for request: {}", request); + logger.warn("No speech response returned for request: {}", requestContext.request); return new byte[0]; } return response.getBody(); @@ -96,19 +88,35 @@ public TextToSpeechResponse call(TextToSpeechPrompt prompt) { @Override public Flux stream(TextToSpeechPrompt prompt) { + RequestContext requestContext = prepareRequest(prompt); + + return retryTemplate.execute(context -> elevenLabsApi + .textToSpeechStream(requestContext.request, requestContext.voiceId, requestContext.queryParameters) + .map(entity -> new TextToSpeechResponse(List.of(new Speech(entity.getBody()))))); + } + + private RequestContext prepareRequest(TextToSpeechPrompt prompt) { ElevenLabsApi.SpeechRequest request = createRequest(prompt); - String voiceId = getOptions(prompt).getVoice(); + ElevenLabsTextToSpeechOptions options = getOptions(prompt); + String voiceId = options.getVoice(); + MultiValueMap queryParameters = buildQueryParameters(options); + + return new RequestContext(request, voiceId, queryParameters); + } + private record RequestContext(ElevenLabsApi.SpeechRequest request, String voiceId, + MultiValueMap queryParameters) { + } + + private MultiValueMap buildQueryParameters(ElevenLabsTextToSpeechOptions options) { MultiValueMap queryParameters = new LinkedMultiValueMap<>(); - if (getOptions(prompt).getEnableLogging() != null) { - queryParameters.add("enable_logging", getOptions(prompt).getEnableLogging().toString()); + if (options.getEnableLogging() != null) { + queryParameters.add("enable_logging", options.getEnableLogging().toString()); } - if (getOptions(prompt).getFormat() != null) { - queryParameters.add("output_format", getOptions(prompt).getFormat()); + if (options.getFormat() != null) { + queryParameters.add("output_format", options.getFormat()); } - - return retryTemplate.execute(context -> elevenLabsApi.textToSpeechStream(request, voiceId, queryParameters) - .map(entity -> new TextToSpeechResponse(List.of(new Speech(entity.getBody()))))); + return queryParameters; } private ElevenLabsApi.SpeechRequest createRequest(TextToSpeechPrompt prompt) { From 661e282fa67e61b0670b15f6867534de66d81a4e Mon Sep 17 00:00:00 2001 From: Alexandros Pappas Date: Tue, 10 Jun 2025 16:46:18 +0200 Subject: [PATCH 5/6] update doc and pom xml files Signed-off-by: Alexandros Pappas --- spring-ai-bom/pom.xml | 10 ++++++ .../api/audio/speech/elevenlabs-speech.adoc | 36 +++++++++++-------- .../pom.xml | 6 ++++ 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/spring-ai-bom/pom.xml b/spring-ai-bom/pom.xml index 5128d6f4f48..b4218a2dc94 100644 --- a/spring-ai-bom/pom.xml +++ b/spring-ai-bom/pom.xml @@ -571,6 +571,11 @@ ${project.version} + + org.springframework.ai + spring-ai-autoconfigure-model-elevenlabs + + org.springframework.ai spring-ai-autoconfigure-model-huggingface @@ -920,6 +925,11 @@ ${project.version} + + org.springframework.ai + spring-ai-starter-model-elevenlabs + + org.springframework.ai spring-ai-starter-model-minimax diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc index 2abf8cfb9b0..09629529b3e 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc @@ -18,7 +18,7 @@ To enable it, add the following dependency to your project's Maven `pom.xml` fil ---- org.springframework.ai - spring-ai-elevenlabs-spring-boot-starter + spring-ai-starter-model-elevenlabs ---- @@ -27,7 +27,7 @@ or to your Gradle `build.gradle` build file: [source,groovy] ---- dependencies { - implementation 'org.springframework.ai:spring-ai-elevenlabs-spring-boot-starter' + implementation 'org.springframework.ai:spring-ai-starter-model-elevenlabs' } ---- @@ -68,18 +68,26 @@ TIP: All properties prefixed with `spring.ai.elevenlabs.tts.options` can be over .Available Output Formats [cols="1,1"] |==== -| Enum Value | Description -| MP3_22050_32 | MP3, 22.05 kHz, 32 kbps -| MP3_44100_32 | MP3, 44.1 kHz, 32 kbps -| MP3_44100_64 | MP3, 44.1 kHz, 64 kbps -| MP3_44100_96 | MP3, 44.1 kHz, 96 kbps -| MP3_44100_128 | MP3, 44.1 kHz, 128 kbps -| MP3_44100_192 | MP3, 44.1 kHz, 192 kbps -| PCM_16000 | PCM, 16 kHz -| PCM_22050 | PCM, 22.05 kHz -| PCM_24000 | PCM, 24 kHz -| PCM_44100 | PCM, 44 kHz -| ULAW_8000 | µ-law, 8 kHz +| Enum Value | Description +| MP3_22050_32 | MP3, 22.05 kHz, 32 kbps +| MP3_44100_32 | MP3, 44.1 kHz, 32 kbps +| MP3_44100_64 | MP3, 44.1 kHz, 64 kbps +| MP3_44100_96 | MP3, 44.1 kHz, 96 kbps +| MP3_44100_128 | MP3, 44.1 kHz, 128 kbps +| MP3_44100_192 | MP3, 44.1 kHz, 192 kbps +| PCM_8000 | PCM, 8 kHz +| PCM_16000 | PCM, 16 kHz +| PCM_22050 | PCM, 22.05 kHz +| PCM_24000 | PCM, 24 kHz +| PCM_44100 | PCM, 44.1 kHz +| PCM_48000 | PCM, 48 kHz +| ULAW_8000 | µ-law, 8 kHz +| ALAW_8000 | A-law, 8 kHz +| OPUS_48000_32 | Opus, 48 kHz, 32 kbps +| OPUS_48000_64 | Opus, 48 kHz, 64 kbps +| OPUS_48000_96 | Opus, 48 kHz, 96 kbps +| OPUS_48000_128 | Opus, 48 kHz, 128 kbps +| OPUS_48000_192 | Opus, 48 kHz, 192 kbps |==== diff --git a/spring-ai-spring-boot-starters/spring-ai-starter-model-elevenlabs/pom.xml b/spring-ai-spring-boot-starters/spring-ai-starter-model-elevenlabs/pom.xml index 929f371e8cf..a9961ab0a87 100644 --- a/spring-ai-spring-boot-starters/spring-ai-starter-model-elevenlabs/pom.xml +++ b/spring-ai-spring-boot-starters/spring-ai-starter-model-elevenlabs/pom.xml @@ -28,6 +28,12 @@ spring-boot-starter + + org.springframework.ai + spring-ai-autoconfigure-model-elevenlabs + ${project.parent.version} + + org.springframework.ai spring-ai-elevenlabs From 9291c303b55d3291a50fa04e95e8760879edd20d Mon Sep 17 00:00:00 2001 From: Alexandros Pappas Date: Tue, 10 Jun 2025 16:58:28 +0200 Subject: [PATCH 6/6] explicitly add spring-webflux dependency Signed-off-by: Alexandros Pappas --- models/spring-ai-elevenlabs/pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/models/spring-ai-elevenlabs/pom.xml b/models/spring-ai-elevenlabs/pom.xml index 172da0099e8..85f8c513dfd 100644 --- a/models/spring-ai-elevenlabs/pom.xml +++ b/models/spring-ai-elevenlabs/pom.xml @@ -51,6 +51,11 @@ spring-context-support + + org.springframework + spring-webflux + + org.slf4j slf4j-api