diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/pom.xml b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/pom.xml new file mode 100644 index 00000000000..bc09ef1f5b4 --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/pom.xml @@ -0,0 +1,90 @@ + + + 4.0.0 + + org.springframework.ai + spring-ai-parent + 1.1.0-SNAPSHOT + ../../../pom.xml + + spring-ai-autoconfigure-model-elevenlabs + jar + Spring AI ElevenLabs Auto Configuration + Spring AI ElevenLabs Auto Configuration + https://github.com/spring-projects/spring-ai + + + https://github.com/spring-projects/spring-ai + git://github.com/spring-projects/spring-ai.git + git@github.com:spring-projects/spring-ai.git + + + + + + + + + org.springframework.ai + spring-ai-elevenlabs + ${project.parent.version} + true + + + + + + org.springframework.ai + spring-ai-autoconfigure-model-tool + ${project.parent.version} + + + + org.springframework.ai + spring-ai-autoconfigure-retry + ${project.parent.version} + + + + + org.springframework.boot + spring-boot-starter + true + + + + org.springframework.boot + spring-boot-configuration-processor + true + + + + org.springframework.boot + spring-boot-autoconfigure-processor + true + + + + + org.springframework.ai + spring-ai-test + ${project.parent.version} + test + + + + org.springframework.boot + spring-boot-starter-test + test + + + + org.mockito + mockito-core + test + + + + diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsAutoConfiguration.java b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsAutoConfiguration.java new file mode 100644 index 00000000000..b2578a93939 --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsAutoConfiguration.java @@ -0,0 +1,79 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.model.elevenlabs.autoconfigure; + +import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechModel; +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.ai.retry.autoconfigure.SpringAiRetryAutoConfiguration; +import org.springframework.beans.factory.ObjectProvider; +import org.springframework.boot.autoconfigure.AutoConfiguration; +import org.springframework.boot.autoconfigure.ImportAutoConfiguration; +import org.springframework.boot.autoconfigure.condition.ConditionalOnClass; +import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.boot.autoconfigure.web.client.RestClientAutoConfiguration; +import org.springframework.boot.autoconfigure.web.reactive.function.client.WebClientAutoConfiguration; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.retry.support.RetryTemplate; +import org.springframework.web.client.ResponseErrorHandler; +import org.springframework.web.client.RestClient; +import org.springframework.web.reactive.function.client.WebClient; + +/** + * {@link AutoConfiguration Auto-configuration} for ElevenLabs. + * + * @author Alexandros Pappas + */ +@AutoConfiguration(after = { RestClientAutoConfiguration.class, SpringAiRetryAutoConfiguration.class, + WebClientAutoConfiguration.class }) +@ConditionalOnClass(ElevenLabsApi.class) +@EnableConfigurationProperties({ ElevenLabsSpeechProperties.class, ElevenLabsConnectionProperties.class }) +@ConditionalOnProperty(prefix = ElevenLabsSpeechProperties.CONFIG_PREFIX, name = "enabled", havingValue = "true", + matchIfMissing = true) +@ImportAutoConfiguration(classes = { SpringAiRetryAutoConfiguration.class, RestClientAutoConfiguration.class, + WebClientAutoConfiguration.class }) +public class ElevenLabsAutoConfiguration { + + @Bean + @ConditionalOnMissingBean + public ElevenLabsApi elevenLabsApi(ElevenLabsConnectionProperties connectionProperties, + ObjectProvider restClientBuilderProvider, + ObjectProvider webClientBuilderProvider, ResponseErrorHandler responseErrorHandler) { + + return ElevenLabsApi.builder() + .baseUrl(connectionProperties.getBaseUrl()) + .apiKey(connectionProperties.getApiKey()) + .restClientBuilder(restClientBuilderProvider.getIfAvailable(RestClient::builder)) + .webClientBuilder(webClientBuilderProvider.getIfAvailable(WebClient::builder)) + .responseErrorHandler(responseErrorHandler) + .build(); + } + + @Bean + @ConditionalOnMissingBean + public ElevenLabsTextToSpeechModel elevenLabsSpeechModel(ElevenLabsApi elevenLabsApi, + ElevenLabsSpeechProperties speechProperties, RetryTemplate retryTemplate) { + + return ElevenLabsTextToSpeechModel.builder() + .elevenLabsApi(elevenLabsApi) + .defaultOptions(speechProperties.getOptions()) + .retryTemplate(retryTemplate) + .build(); + } + +} diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsConnectionProperties.java b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsConnectionProperties.java new file mode 100644 index 00000000000..4f2b299142e --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsConnectionProperties.java @@ -0,0 +1,58 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.model.elevenlabs.autoconfigure; + +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.boot.context.properties.ConfigurationProperties; + +/** + * Configuration properties for the ElevenLabs API connection. + * + * @author Alexandros Pappas + */ +@ConfigurationProperties(ElevenLabsConnectionProperties.CONFIG_PREFIX) +public class ElevenLabsConnectionProperties { + + public static final String CONFIG_PREFIX = "spring.ai.elevenlabs"; + + /** + * ElevenLabs API access key. + */ + private String apiKey; + + /** + * ElevenLabs API base URL. + */ + private String baseUrl = ElevenLabsApi.DEFAULT_BASE_URL; + + public String getApiKey() { + return this.apiKey; + } + + public void setApiKey(String apiKey) { + this.apiKey = apiKey; + } + + public String getBaseUrl() { + return this.baseUrl; + } + + public void setBaseUrl(String baseUrl) { + this.baseUrl = baseUrl; + } + +} diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsSpeechProperties.java b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsSpeechProperties.java new file mode 100644 index 00000000000..7614f3070ab --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsSpeechProperties.java @@ -0,0 +1,68 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.model.elevenlabs.autoconfigure; + +import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechOptions; +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.boot.context.properties.NestedConfigurationProperty; + +/** + * Configuration properties for the ElevenLabs Text-to-Speech API. + * + * @author Alexandros Pappas + */ +@ConfigurationProperties(ElevenLabsSpeechProperties.CONFIG_PREFIX) +public class ElevenLabsSpeechProperties { + + public static final String CONFIG_PREFIX = "spring.ai.elevenlabs.tts"; + + public static final String DEFAULT_MODEL_ID = "eleven_turbo_v2_5"; + + private static final String DEFAULT_VOICE_ID = "9BWtsMINqrJLrRacOk9x"; + + private static final ElevenLabsApi.OutputFormat DEFAULT_OUTPUT_FORMAT = ElevenLabsApi.OutputFormat.MP3_22050_32; + + /** + * Enable ElevenLabs speech model. + */ + private boolean enabled = true; + + @NestedConfigurationProperty + private ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder() + .modelId(DEFAULT_MODEL_ID) + .voiceId(DEFAULT_VOICE_ID) + .outputFormat(DEFAULT_OUTPUT_FORMAT.getValue()) + .build(); + + public ElevenLabsTextToSpeechOptions getOptions() { + return this.options; + } + + public void setOptions(ElevenLabsTextToSpeechOptions options) { + this.options = options; + } + + public boolean isEnabled() { + return this.enabled; + } + + public void setEnabled(boolean enabled) { + this.enabled = enabled; + } + +} diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports new file mode 100644 index 00000000000..82784c92262 --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports @@ -0,0 +1,16 @@ +# +# Copyright 2025-2025 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.springframework.ai.model.elevenlabs.autoconfigure.elevenlabsChatAutoConfiguration diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/test/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsAutoConfigurationIT.java b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/test/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsAutoConfigurationIT.java new file mode 100644 index 00000000000..4c1fc68ec04 --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/test/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsAutoConfigurationIT.java @@ -0,0 +1,84 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.model.elevenlabs.autoconfigure; + +import java.util.Arrays; + +import static org.assertj.core.api.Assertions.assertThat; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechModel; +import org.springframework.boot.autoconfigure.AutoConfigurations; +import org.springframework.boot.test.context.runner.ApplicationContextRunner; + +/** + * Integration tests for the {@link ElevenLabsAutoConfiguration}. + * + * @author Alexandros Pappas + */ +@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".*") +public class ElevenLabsAutoConfigurationIT { + + private static final org.apache.commons.logging.Log logger = org.apache.commons.logging.LogFactory + .getLog(ElevenLabsAutoConfigurationIT.class); + + private final ApplicationContextRunner contextRunner = new ApplicationContextRunner() + .withPropertyValues("spring.ai.elevenlabs.api-key=" + System.getenv("ELEVEN_LABS_API_KEY")) + .withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)); + + @Test + void speech() { + this.contextRunner.run(context -> { + ElevenLabsTextToSpeechModel speechModel = context.getBean(ElevenLabsTextToSpeechModel.class); + byte[] response = speechModel.call("H"); + assertThat(response).isNotNull(); + assertThat(verifyMp3FrameHeader(response)) + .withFailMessage("Expected MP3 frame header to be present in the response, but it was not found.") + .isTrue(); + assertThat(response).isNotEmpty(); + + logger.debug("Response: " + Arrays.toString(response)); + }); + } + + @Test + void speechStream() { + this.contextRunner.run(context -> { + ElevenLabsTextToSpeechModel speechModel = context.getBean(ElevenLabsTextToSpeechModel.class); + byte[] response = speechModel.call("Hello"); + assertThat(response).isNotNull(); + assertThat(verifyMp3FrameHeader(response)) + .withFailMessage("Expected MP3 frame header to be present in the response, but it was not found.") + .isTrue(); + assertThat(response).isNotEmpty(); + + logger.debug("Response: " + Arrays.toString(response)); + }); + } + + public boolean verifyMp3FrameHeader(byte[] audioResponse) { + if (audioResponse == null || audioResponse.length < 3) { + return false; + } + // Accept ID3 tag (MP3 metadata) or MP3 frame header + boolean hasId3 = audioResponse[0] == 'I' && audioResponse[1] == 'D' && audioResponse[2] == '3'; + boolean hasFrame = (audioResponse[0] & 0xFF) == 0xFF && (audioResponse[1] & 0xE0) == 0xE0; + return hasId3 || hasFrame; + } + +} diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/test/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsPropertiesTests.java b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/test/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsPropertiesTests.java new file mode 100644 index 00000000000..e8ab28a2dcc --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs/src/test/java/org/springframework/ai/model/elevenlabs/autoconfigure/ElevenLabsPropertiesTests.java @@ -0,0 +1,141 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.model.elevenlabs.autoconfigure; + +import static org.assertj.core.api.Assertions.assertThat; +import org.junit.jupiter.api.Test; + +import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechModel; +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.boot.autoconfigure.AutoConfigurations; +import org.springframework.boot.test.context.runner.ApplicationContextRunner; + +/** + * Tests for the {@link ElevenLabsSpeechProperties} and + * {@link ElevenLabsConnectionProperties}. + * + * @author Alexandros Pappas + */ +public class ElevenLabsPropertiesTests { + + @Test + public void connectionProperties() { + new ApplicationContextRunner().withPropertyValues( + // @formatter:off + "spring.ai.elevenlabs.api-key=YOUR_API_KEY", + "spring.ai.elevenlabs.base-url=https://custom.api.elevenlabs.io", + "spring.ai.elevenlabs.tts.options.model-id=custom-model", + "spring.ai.elevenlabs.tts.options.voice=custom-voice", + "spring.ai.elevenlabs.tts.options.voice-settings.stability=0.6", + "spring.ai.elevenlabs.tts.options.voice-settings.similarity-boost=0.8", + "spring.ai.elevenlabs.tts.options.voice-settings.style=0.2", + "spring.ai.elevenlabs.tts.options.voice-settings.use-speaker-boost=false", + "spring.ai.elevenlabs.tts.options.voice-settings.speed=1.5" + // @formatter:on + ).withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)).run(context -> { + var speechProperties = context.getBean(ElevenLabsSpeechProperties.class); + var connectionProperties = context.getBean(ElevenLabsConnectionProperties.class); + + assertThat(connectionProperties.getApiKey()).isEqualTo("YOUR_API_KEY"); + assertThat(connectionProperties.getBaseUrl()).isEqualTo("https://custom.api.elevenlabs.io"); + + assertThat(speechProperties.getOptions().getModelId()).isEqualTo("custom-model"); + assertThat(speechProperties.getOptions().getVoice()).isEqualTo("custom-voice"); + assertThat(speechProperties.getOptions().getVoiceSettings().stability()).isEqualTo(0.6); + assertThat(speechProperties.getOptions().getVoiceSettings().similarityBoost()).isEqualTo(0.8); + assertThat(speechProperties.getOptions().getVoiceSettings().style()).isEqualTo(0.2); + assertThat(speechProperties.getOptions().getVoiceSettings().useSpeakerBoost()).isFalse(); + assertThat(speechProperties.getOptions().getSpeed()).isEqualTo(1.5f); + + // enabled is true by default + assertThat(speechProperties.isEnabled()).isTrue(); + }); + } + + @Test + public void speechOptionsTest() { + new ApplicationContextRunner().withPropertyValues( + // @formatter:off + "spring.ai.elevenlabs.api-key=YOUR_API_KEY", + "spring.ai.elevenlabs.tts.options.model-id=custom-model", + "spring.ai.elevenlabs.tts.options.voice=custom-voice", + "spring.ai.elevenlabs.tts.options.format=pcm_44100", + "spring.ai.elevenlabs.tts.options.voice-settings.stability=0.6", + "spring.ai.elevenlabs.tts.options.voice-settings.similarity-boost=0.8", + "spring.ai.elevenlabs.tts.options.voice-settings.style=0.2", + "spring.ai.elevenlabs.tts.options.voice-settings.use-speaker-boost=false", + "spring.ai.elevenlabs.tts.options.voice-settings.speed=1.2", + "spring.ai.elevenlabs.tts.options.language-code=en", + "spring.ai.elevenlabs.tts.options.seed=12345", + "spring.ai.elevenlabs.tts.options.previous-text=previous", + "spring.ai.elevenlabs.tts.options.next-text=next", + "spring.ai.elevenlabs.tts.options.apply-text-normalization=ON", + "spring.ai.elevenlabs.tts.options.apply-language-text-normalization=true" + // @formatter:on + ).withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)).run(context -> { + var speechProperties = context.getBean(ElevenLabsSpeechProperties.class); + + assertThat(speechProperties.getOptions().getModelId()).isEqualTo("custom-model"); + assertThat(speechProperties.getOptions().getVoice()).isEqualTo("custom-voice"); + assertThat(speechProperties.getOptions().getFormat()).isEqualTo("pcm_44100"); + assertThat(speechProperties.getOptions().getVoiceSettings().stability()).isEqualTo(0.6); + assertThat(speechProperties.getOptions().getVoiceSettings().similarityBoost()).isEqualTo(0.8); + assertThat(speechProperties.getOptions().getVoiceSettings().style()).isEqualTo(0.2); + assertThat(speechProperties.getOptions().getVoiceSettings().useSpeakerBoost()).isFalse(); + assertThat(speechProperties.getOptions().getVoiceSettings().speed()).isEqualTo(1.2); + assertThat(speechProperties.getOptions().getSpeed()).isEqualTo(1.2); + assertThat(speechProperties.getOptions().getLanguageCode()).isEqualTo("en"); + assertThat(speechProperties.getOptions().getSeed()).isEqualTo(12345); + assertThat(speechProperties.getOptions().getPreviousText()).isEqualTo("previous"); + assertThat(speechProperties.getOptions().getNextText()).isEqualTo("next"); + assertThat(speechProperties.getOptions().getApplyTextNormalization()) + .isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON); + assertThat(speechProperties.getOptions().getApplyLanguageTextNormalization()).isTrue(); + }); + } + + @Test + public void speechActivation() { + + // It is enabled by default + new ApplicationContextRunner().withPropertyValues("spring.ai.elevenlabs.api-key=YOUR_API_KEY") + .withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)) + .run(context -> { + assertThat(context.getBeansOfType(ElevenLabsSpeechProperties.class)).isNotEmpty(); + assertThat(context.getBeansOfType(ElevenLabsTextToSpeechModel.class)).isNotEmpty(); + }); + + // Explicitly enable the text-to-speech autoconfiguration. + new ApplicationContextRunner() + .withPropertyValues("spring.ai.elevenlabs.api-key=YOUR_API_KEY", "spring.ai.elevenlabs.tts.enabled=true") + .withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)) + .run(context -> { + assertThat(context.getBeansOfType(ElevenLabsSpeechProperties.class)).isNotEmpty(); + assertThat(context.getBeansOfType(ElevenLabsTextToSpeechModel.class)).isNotEmpty(); + }); + + // Explicitly disable the text-to-speech autoconfiguration. + new ApplicationContextRunner() + .withPropertyValues("spring.ai.elevenlabs.api-key=YOUR_API_KEY", "spring.ai.elevenlabs.tts.enabled=false") + .withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)) + .run(context -> { + assertThat(context.getBeansOfType(ElevenLabsSpeechProperties.class)).isEmpty(); + assertThat(context.getBeansOfType(ElevenLabsTextToSpeechModel.class)).isEmpty(); + }); + } + +} diff --git a/models/spring-ai-elevenlabs/README.md b/models/spring-ai-elevenlabs/README.md new file mode 100644 index 00000000000..b7149d0b6f3 --- /dev/null +++ b/models/spring-ai-elevenlabs/README.md @@ -0,0 +1,3 @@ +# Spring AI - ElevenLabs Text-to-Speech + +[ElevenLabs Text-to-Speech Documentation](https://docs.spring.io/spring-ai/reference/api/audio/speech/elevenlabs-speech.html) \ No newline at end of file diff --git a/models/spring-ai-elevenlabs/pom.xml b/models/spring-ai-elevenlabs/pom.xml new file mode 100644 index 00000000000..85f8c513dfd --- /dev/null +++ b/models/spring-ai-elevenlabs/pom.xml @@ -0,0 +1,92 @@ + + + 4.0.0 + + org.springframework.ai + spring-ai-parent + 1.1.0-SNAPSHOT + ../../pom.xml + + + spring-ai-elevenlabs + jar + Spring AI Model - ElevenLabs + ElevenLabs Text-to-Speech model support + https://github.com/spring-projects/spring-ai + + + https://github.com/spring-projects/spring-ai + git://github.com/spring-projects/spring-ai.git + git@github.com:spring-projects/spring-ai.git + + + + + + + + + + + org.springframework.ai + spring-ai-model + ${project.parent.version} + + + + org.springframework.ai + spring-ai-retry + ${project.parent.version} + + + + io.rest-assured + json-path + + + + org.springframework + spring-context-support + + + + org.springframework + spring-webflux + + + + org.slf4j + slf4j-api + + + + + org.springframework.ai + spring-ai-test + ${project.version} + test + + + + io.micrometer + micrometer-observation-test + test + + + + com.fasterxml.jackson.dataformat + jackson-dataformat-xml + 2.11.1 + test + + + + io.projectreactor + reactor-test + test + + + + diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java new file mode 100644 index 00000000000..ddd7cb91df1 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModel.java @@ -0,0 +1,219 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs; + +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import reactor.core.publisher.Flux; + +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.ai.audio.tts.Speech; +import org.springframework.ai.audio.tts.StreamingTextToSpeechModel; +import org.springframework.ai.audio.tts.TextToSpeechModel; +import org.springframework.ai.audio.tts.TextToSpeechPrompt; +import org.springframework.ai.audio.tts.TextToSpeechResponse; +import org.springframework.ai.retry.RetryUtils; +import org.springframework.retry.support.RetryTemplate; +import org.springframework.util.Assert; +import org.springframework.util.LinkedMultiValueMap; +import org.springframework.util.MultiValueMap; + +/** + * Implementation of the {@link TextToSpeechModel} and {@link StreamingTextToSpeechModel} + * interfaces + * + * @author Alexandros Pappas + */ +public class ElevenLabsTextToSpeechModel implements TextToSpeechModel, StreamingTextToSpeechModel { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final ElevenLabsApi elevenLabsApi; + + private final RetryTemplate retryTemplate; + + private final ElevenLabsTextToSpeechOptions defaultOptions; + + public ElevenLabsTextToSpeechModel(ElevenLabsApi elevenLabsApi, ElevenLabsTextToSpeechOptions defaultOptions) { + this(elevenLabsApi, defaultOptions, RetryUtils.DEFAULT_RETRY_TEMPLATE); + } + + public ElevenLabsTextToSpeechModel(ElevenLabsApi elevenLabsApi, ElevenLabsTextToSpeechOptions defaultOptions, + RetryTemplate retryTemplate) { + Assert.notNull(elevenLabsApi, "ElevenLabsApi must not be null"); + Assert.notNull(defaultOptions, "ElevenLabsSpeechOptions must not be null"); + Assert.notNull(retryTemplate, "RetryTemplate must not be null"); + + this.elevenLabsApi = elevenLabsApi; + this.defaultOptions = defaultOptions; + this.retryTemplate = retryTemplate; + } + + public static Builder builder() { + return new Builder(); + } + + @Override + public TextToSpeechResponse call(TextToSpeechPrompt prompt) { + RequestContext requestContext = prepareRequest(prompt); + + byte[] audioData = retryTemplate.execute(context -> { + var response = elevenLabsApi.textToSpeech(requestContext.request, requestContext.voiceId, + requestContext.queryParameters); + if (response.getBody() == null) { + logger.warn("No speech response returned for request: {}", requestContext.request); + return new byte[0]; + } + return response.getBody(); + }); + + return new TextToSpeechResponse(List.of(new Speech(audioData))); + } + + @Override + public Flux stream(TextToSpeechPrompt prompt) { + RequestContext requestContext = prepareRequest(prompt); + + return retryTemplate.execute(context -> elevenLabsApi + .textToSpeechStream(requestContext.request, requestContext.voiceId, requestContext.queryParameters) + .map(entity -> new TextToSpeechResponse(List.of(new Speech(entity.getBody()))))); + } + + private RequestContext prepareRequest(TextToSpeechPrompt prompt) { + ElevenLabsApi.SpeechRequest request = createRequest(prompt); + ElevenLabsTextToSpeechOptions options = getOptions(prompt); + String voiceId = options.getVoice(); + MultiValueMap queryParameters = buildQueryParameters(options); + + return new RequestContext(request, voiceId, queryParameters); + } + + private record RequestContext(ElevenLabsApi.SpeechRequest request, String voiceId, + MultiValueMap queryParameters) { + } + + private MultiValueMap buildQueryParameters(ElevenLabsTextToSpeechOptions options) { + MultiValueMap queryParameters = new LinkedMultiValueMap<>(); + if (options.getEnableLogging() != null) { + queryParameters.add("enable_logging", options.getEnableLogging().toString()); + } + if (options.getFormat() != null) { + queryParameters.add("output_format", options.getFormat()); + } + return queryParameters; + } + + private ElevenLabsApi.SpeechRequest createRequest(TextToSpeechPrompt prompt) { + ElevenLabsTextToSpeechOptions options = getOptions(prompt); + + String voiceId = options.getVoice(); + Assert.notNull(voiceId, "A voiceId must be specified in the ElevenLabsSpeechOptions."); + + String text = prompt.getInstructions().getText(); + Assert.hasText(text, "Prompt must contain text to convert to speech."); + + return ElevenLabsApi.SpeechRequest.builder() + .text(text) + .modelId(options.getModelId()) + .voiceSettings(options.getVoiceSettings()) + .languageCode(options.getLanguageCode()) + .pronunciationDictionaryLocators(options.getPronunciationDictionaryLocators()) + .seed(options.getSeed()) + .previousText(options.getPreviousText()) + .nextText(options.getNextText()) + .previousRequestIds(options.getPreviousRequestIds()) + .nextRequestIds(options.getNextRequestIds()) + .applyTextNormalization(options.getApplyTextNormalization()) + .applyLanguageTextNormalization(options.getApplyLanguageTextNormalization()) + .build(); + } + + private ElevenLabsTextToSpeechOptions getOptions(TextToSpeechPrompt prompt) { + ElevenLabsTextToSpeechOptions runtimeOptions = (prompt + .getOptions() instanceof ElevenLabsTextToSpeechOptions elevenLabsSpeechOptions) ? elevenLabsSpeechOptions + : null; + return (runtimeOptions != null) ? merge(runtimeOptions, this.defaultOptions) : this.defaultOptions; + } + + private ElevenLabsTextToSpeechOptions merge(ElevenLabsTextToSpeechOptions runtimeOptions, + ElevenLabsTextToSpeechOptions defaultOptions) { + return ElevenLabsTextToSpeechOptions.builder() + .modelId(getOrDefault(runtimeOptions.getModelId(), defaultOptions.getModelId())) + .voice(getOrDefault(runtimeOptions.getVoice(), defaultOptions.getVoice())) + .voiceId(getOrDefault(runtimeOptions.getVoiceId(), defaultOptions.getVoiceId())) + .format(getOrDefault(runtimeOptions.getFormat(), defaultOptions.getFormat())) + .outputFormat(getOrDefault(runtimeOptions.getOutputFormat(), defaultOptions.getOutputFormat())) + .voiceSettings(getOrDefault(runtimeOptions.getVoiceSettings(), defaultOptions.getVoiceSettings())) + .languageCode(getOrDefault(runtimeOptions.getLanguageCode(), defaultOptions.getLanguageCode())) + .pronunciationDictionaryLocators(getOrDefault(runtimeOptions.getPronunciationDictionaryLocators(), + defaultOptions.getPronunciationDictionaryLocators())) + .seed(getOrDefault(runtimeOptions.getSeed(), defaultOptions.getSeed())) + .previousText(getOrDefault(runtimeOptions.getPreviousText(), defaultOptions.getPreviousText())) + .nextText(getOrDefault(runtimeOptions.getNextText(), defaultOptions.getNextText())) + .previousRequestIds( + getOrDefault(runtimeOptions.getPreviousRequestIds(), defaultOptions.getPreviousRequestIds())) + .nextRequestIds(getOrDefault(runtimeOptions.getNextRequestIds(), defaultOptions.getNextRequestIds())) + .applyTextNormalization(getOrDefault(runtimeOptions.getApplyTextNormalization(), + defaultOptions.getApplyTextNormalization())) + .applyLanguageTextNormalization(getOrDefault(runtimeOptions.getApplyLanguageTextNormalization(), + defaultOptions.getApplyLanguageTextNormalization())) + .build(); + } + + private T getOrDefault(T runtimeValue, T defaultValue) { + return runtimeValue != null ? runtimeValue : defaultValue; + } + + @Override + public ElevenLabsTextToSpeechOptions getDefaultOptions() { + return this.defaultOptions; + } + + public static class Builder { + + private ElevenLabsApi elevenLabsApi; + + private RetryTemplate retryTemplate = RetryUtils.DEFAULT_RETRY_TEMPLATE; + + private ElevenLabsTextToSpeechOptions defaultOptions = ElevenLabsTextToSpeechOptions.builder().build(); + + public Builder elevenLabsApi(ElevenLabsApi elevenLabsApi) { + this.elevenLabsApi = elevenLabsApi; + return this; + } + + public Builder retryTemplate(RetryTemplate retryTemplate) { + this.retryTemplate = retryTemplate; + return this; + } + + public Builder defaultOptions(ElevenLabsTextToSpeechOptions defaultOptions) { + this.defaultOptions = defaultOptions; + return this; + } + + public ElevenLabsTextToSpeechModel build() { + Assert.notNull(elevenLabsApi, "ElevenLabsApi must not be null"); + Assert.notNull(defaultOptions, "ElevenLabsSpeechOptions must not be null"); + return new ElevenLabsTextToSpeechModel(elevenLabsApi, defaultOptions, retryTemplate); + } + + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptions.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptions.java new file mode 100644 index 00000000000..20adb6af3b0 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptions.java @@ -0,0 +1,410 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs; + +import java.util.List; +import java.util.Objects; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; + +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.ai.audio.tts.TextToSpeechOptions; + +/** + * Options for ElevenLabs text-to-speech. + * + * @author Alexandros Pappas + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class ElevenLabsTextToSpeechOptions implements TextToSpeechOptions { + + @JsonProperty("model_id") + private String modelId; + + // Path Params + @JsonProperty("voice_id") + private String voiceId; + + // End Path Params + + // Query Params + @JsonProperty("enable_logging") + private Boolean enableLogging; + + @JsonProperty("output_format") + private String outputFormat; + + // End Query Params + + @JsonProperty("voice_settings") + private ElevenLabsApi.SpeechRequest.VoiceSettings voiceSettings; + + @JsonProperty("language_code") + private String languageCode; + + @JsonProperty("pronunciation_dictionary_locators") + private List pronunciationDictionaryLocators; + + @JsonProperty("seed") + private Integer seed; + + @JsonProperty("previous_text") + private String previousText; + + @JsonProperty("next_text") + private String nextText; + + @JsonProperty("previous_request_ids") + private List previousRequestIds; + + @JsonProperty("next_request_ids") + private List nextRequestIds; + + @JsonProperty("apply_text_normalization") + private ElevenLabsApi.SpeechRequest.TextNormalizationMode applyTextNormalization; + + @JsonProperty("apply_language_text_normalization") + private Boolean applyLanguageTextNormalization; + + public static Builder builder() { + return new ElevenLabsTextToSpeechOptions.Builder(); + } + + @Override + @JsonIgnore + public String getModel() { + return getModelId(); + } + + @JsonIgnore + public void setModel(String model) { + setModelId(model); + } + + public String getModelId() { + return this.modelId; + } + + public void setModelId(String modelId) { + this.modelId = modelId; + } + + @Override + @JsonIgnore + public String getVoice() { + return getVoiceId(); + } + + @JsonIgnore + public void setVoice(String voice) { + setVoiceId(voice); + } + + public String getVoiceId() { + return this.voiceId; + } + + public void setVoiceId(String voiceId) { + this.voiceId = voiceId; + } + + public Boolean getEnableLogging() { + return this.enableLogging; + } + + public void setEnableLogging(Boolean enableLogging) { + this.enableLogging = enableLogging; + } + + @Override + @JsonIgnore + public String getFormat() { + return getOutputFormat(); + } + + @JsonIgnore + public void setFormat(String format) { + setOutputFormat(format); + } + + public String getOutputFormat() { + return this.outputFormat; + } + + public void setOutputFormat(String outputFormat) { + this.outputFormat = outputFormat; + } + + @Override + @JsonIgnore + public Double getSpeed() { + if (this.getVoiceSettings() != null) { + return this.getVoiceSettings().speed(); + } + return null; + } + + @JsonIgnore + public void setSpeed(Double speed) { + if (speed != null) { + if (this.getVoiceSettings() == null) { + this.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(null, null, null, null, speed)); + } + else { + this.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(this.getVoiceSettings().stability(), + this.getVoiceSettings().similarityBoost(), this.getVoiceSettings().style(), + this.getVoiceSettings().useSpeakerBoost(), speed)); + } + } + else { + if (this.getVoiceSettings() != null) { + this.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(this.getVoiceSettings().stability(), + this.getVoiceSettings().similarityBoost(), this.getVoiceSettings().style(), + this.getVoiceSettings().useSpeakerBoost(), null)); + } + } + } + + public ElevenLabsApi.SpeechRequest.VoiceSettings getVoiceSettings() { + return this.voiceSettings; + } + + public void setVoiceSettings(ElevenLabsApi.SpeechRequest.VoiceSettings voiceSettings) { + this.voiceSettings = voiceSettings; + } + + public String getLanguageCode() { + return this.languageCode; + } + + public void setLanguageCode(String languageCode) { + this.languageCode = languageCode; + } + + public List getPronunciationDictionaryLocators() { + return this.pronunciationDictionaryLocators; + } + + public void setPronunciationDictionaryLocators( + List pronunciationDictionaryLocators) { + this.pronunciationDictionaryLocators = pronunciationDictionaryLocators; + } + + public Integer getSeed() { + return this.seed; + } + + public void setSeed(Integer seed) { + this.seed = seed; + } + + public String getPreviousText() { + return this.previousText; + } + + public void setPreviousText(String previousText) { + this.previousText = previousText; + } + + public String getNextText() { + return this.nextText; + } + + public void setNextText(String nextText) { + this.nextText = nextText; + } + + public List getPreviousRequestIds() { + return this.previousRequestIds; + } + + public void setPreviousRequestIds(List previousRequestIds) { + this.previousRequestIds = previousRequestIds; + } + + public List getNextRequestIds() { + return this.nextRequestIds; + } + + public void setNextRequestIds(List nextRequestIds) { + this.nextRequestIds = nextRequestIds; + } + + public ElevenLabsApi.SpeechRequest.TextNormalizationMode getApplyTextNormalization() { + return this.applyTextNormalization; + } + + public void setApplyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizationMode applyTextNormalization) { + this.applyTextNormalization = applyTextNormalization; + } + + public Boolean getApplyLanguageTextNormalization() { + return this.applyLanguageTextNormalization; + } + + public void setApplyLanguageTextNormalization(Boolean applyLanguageTextNormalization) { + this.applyLanguageTextNormalization = applyLanguageTextNormalization; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof ElevenLabsTextToSpeechOptions that)) + return false; + return Objects.equals(modelId, that.modelId) && Objects.equals(voiceId, that.voiceId) + && Objects.equals(outputFormat, that.outputFormat) && Objects.equals(voiceSettings, that.voiceSettings) + && Objects.equals(languageCode, that.languageCode) + && Objects.equals(pronunciationDictionaryLocators, that.pronunciationDictionaryLocators) + && Objects.equals(seed, that.seed) && Objects.equals(previousText, that.previousText) + && Objects.equals(nextText, that.nextText) + && Objects.equals(previousRequestIds, that.previousRequestIds) + && Objects.equals(applyTextNormalization, that.applyTextNormalization) + && Objects.equals(nextRequestIds, that.nextRequestIds) + && Objects.equals(applyLanguageTextNormalization, that.applyLanguageTextNormalization); + } + + @Override + public int hashCode() { + return Objects.hash(modelId, voiceId, outputFormat, voiceSettings, languageCode, + pronunciationDictionaryLocators, seed, previousText, nextText, previousRequestIds, nextRequestIds, + applyTextNormalization, applyLanguageTextNormalization); + } + + @Override + public String toString() { + return "ElevenLabsSpeechOptions{" + "modelId='" + modelId + '\'' + ", voiceId='" + voiceId + '\'' + + ", outputFormat='" + outputFormat + '\'' + ", voiceSettings=" + voiceSettings + ", languageCode='" + + languageCode + '\'' + ", pronunciationDictionaryLocators=" + pronunciationDictionaryLocators + + ", seed=" + seed + ", previousText='" + previousText + '\'' + ", nextText='" + nextText + '\'' + + ", previousRequestIds=" + previousRequestIds + ", nextRequestIds=" + nextRequestIds + + ", applyTextNormalization=" + applyTextNormalization + ", applyLanguageTextNormalization=" + + applyLanguageTextNormalization + '}'; + } + + @Override + @SuppressWarnings("unchecked") + public ElevenLabsTextToSpeechOptions copy() { + return ElevenLabsTextToSpeechOptions.builder() + .modelId(this.getModelId()) + .voice(this.getVoice()) + .voiceId(this.getVoiceId()) + .format(this.getFormat()) + .outputFormat(this.getOutputFormat()) + .voiceSettings(this.getVoiceSettings()) + .languageCode(this.getLanguageCode()) + .pronunciationDictionaryLocators(this.getPronunciationDictionaryLocators()) + .seed(this.getSeed()) + .previousText(this.getPreviousText()) + .nextText(this.getNextText()) + .previousRequestIds(this.getPreviousRequestIds()) + .nextRequestIds(this.getNextRequestIds()) + .applyTextNormalization(this.getApplyTextNormalization()) + .applyLanguageTextNormalization(this.getApplyLanguageTextNormalization()) + .build(); + } + + public static class Builder { + + private final ElevenLabsTextToSpeechOptions options = new ElevenLabsTextToSpeechOptions(); + + public Builder modelId(String modelId) { + options.setModelId(modelId); + return this; + } + + public Builder voice(String voice) { + options.setVoice(voice); + return this; + } + + public Builder voiceId(String voiceId) { + options.setVoiceId(voiceId); + return this; + } + + public Builder format(String format) { + options.setFormat(format); + return this; + } + + public Builder outputFormat(String outputFormat) { + options.setOutputFormat(outputFormat); + return this; + } + + public Builder voiceSettings(ElevenLabsApi.SpeechRequest.VoiceSettings voiceSettings) { + options.setVoiceSettings(voiceSettings); + return this; + } + + public Builder languageCode(String languageCode) { + options.setLanguageCode(languageCode); + return this; + } + + public Builder pronunciationDictionaryLocators( + List pronunciationDictionaryLocators) { + options.setPronunciationDictionaryLocators(pronunciationDictionaryLocators); + return this; + } + + public Builder seed(Integer seed) { + options.setSeed(seed); + return this; + } + + public Builder previousText(String previousText) { + options.setPreviousText(previousText); + return this; + } + + public Builder nextText(String nextText) { + options.setNextText(nextText); + return this; + } + + public Builder previousRequestIds(List previousRequestIds) { + options.setPreviousRequestIds(previousRequestIds); + return this; + } + + public Builder nextRequestIds(List nextRequestIds) { + options.setNextRequestIds(nextRequestIds); + return this; + } + + public Builder applyTextNormalization( + ElevenLabsApi.SpeechRequest.TextNormalizationMode applyTextNormalization) { + options.setApplyTextNormalization(applyTextNormalization); + return this; + } + + public Builder applyLanguageTextNormalization(Boolean applyLanguageTextNormalization) { + options.setApplyLanguageTextNormalization(applyLanguageTextNormalization); + return this; + } + + public ElevenLabsTextToSpeechOptions build() { + return this.options; + } + + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/aot/ElevenLabsRuntimeHints.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/aot/ElevenLabsRuntimeHints.java new file mode 100644 index 00000000000..143969c270a --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/aot/ElevenLabsRuntimeHints.java @@ -0,0 +1,44 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.aot; + +import static org.springframework.ai.aot.AiRuntimeHints.findJsonAnnotatedClassesInPackage; + +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.aot.hint.MemberCategory; +import org.springframework.aot.hint.RuntimeHints; +import org.springframework.aot.hint.RuntimeHintsRegistrar; +import org.springframework.lang.NonNull; +import org.springframework.lang.Nullable; + +/** + * The ElevenLabsRuntimeHints class is responsible for registering runtime hints for + * ElevenLabs API classes. + * + * @author Alexandros Pappas + */ +public class ElevenLabsRuntimeHints implements RuntimeHintsRegistrar { + + @Override + public void registerHints(@NonNull RuntimeHints hints, @Nullable ClassLoader classLoader) { + var mcs = MemberCategory.values(); + for (var tr : findJsonAnnotatedClassesInPackage(ElevenLabsApi.class)) { + hints.reflection().registerType(tr, mcs); + } + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsApi.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsApi.java new file mode 100644 index 00000000000..a7fd65d621d --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsApi.java @@ -0,0 +1,391 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.api; + +import java.util.List; +import java.util.function.Consumer; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonValue; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +import org.springframework.ai.model.ApiKey; +import org.springframework.ai.model.NoopApiKey; +import org.springframework.ai.model.SimpleApiKey; +import org.springframework.ai.retry.RetryUtils; +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.util.Assert; +import org.springframework.util.LinkedMultiValueMap; +import org.springframework.util.MultiValueMap; +import org.springframework.web.client.ResponseErrorHandler; +import org.springframework.web.client.RestClient; +import org.springframework.web.reactive.function.client.WebClient; +import org.springframework.web.util.UriComponentsBuilder; + +/** + * Client for the ElevenLabs Text-to-Speech API. + * + * @author Alexandros Pappas + */ +public class ElevenLabsApi { + + public static final String DEFAULT_BASE_URL = "https://api.elevenlabs.io"; + + private final RestClient restClient; + + private final WebClient webClient; + + /** + * Create a new ElevenLabs API client. + * @param baseUrl The base URL for the ElevenLabs API. + * @param apiKey Your ElevenLabs API key. + * @param headers the http headers to use. + * @param restClientBuilder A builder for the Spring RestClient. + * @param webClientBuilder A builder for the Spring WebClient. + * @param responseErrorHandler A custom error handler for API responses. + */ + public ElevenLabsApi(String baseUrl, ApiKey apiKey, MultiValueMap headers, + RestClient.Builder restClientBuilder, WebClient.Builder webClientBuilder, + ResponseErrorHandler responseErrorHandler) { + + Consumer jsonContentHeaders = h -> { + if (!(apiKey instanceof NoopApiKey)) { + h.set("xi-api-key", apiKey.getValue()); + } + h.addAll(headers); + h.setContentType(MediaType.APPLICATION_JSON); + }; + + this.restClient = restClientBuilder.baseUrl(baseUrl) + .defaultHeaders(jsonContentHeaders) + .defaultStatusHandler(responseErrorHandler) + .build(); + + this.webClient = webClientBuilder.baseUrl(baseUrl).defaultHeaders(jsonContentHeaders).build(); + } + + public static Builder builder() { + return new Builder(); + } + + /** + * Convert text to speech using the specified voice and parameters. + * @param requestBody The request body containing text, model, and voice settings. + * @param voiceId The ID of the voice to use. Must not be null. + * @param queryParameters Additional query parameters for the API call. + * @return A ResponseEntity containing the generated audio as a byte array. + */ + public ResponseEntity textToSpeech(SpeechRequest requestBody, String voiceId, + MultiValueMap queryParameters) { + + Assert.notNull(voiceId, "voiceId must be provided. It cannot be null."); + Assert.notNull(requestBody, "requestBody can not be null."); + Assert.hasText(requestBody.text(), "requestBody.text must be provided. It cannot be null or empty."); + + UriComponentsBuilder uriBuilder = UriComponentsBuilder.fromPath("/v1/text-to-speech/{voice_id}") + .queryParams(queryParameters); + + return this.restClient.post() + .uri(uriBuilder.buildAndExpand(voiceId).toUriString()) + .body(requestBody) + .retrieve() + .toEntity(byte[].class); + } + + /** + * Convert text to speech using the specified voice and parameters, streaming the + * results. + * @param requestBody The request body containing text, model, and voice settings. + * @param voiceId The ID of the voice to use. Must not be null. + * @param queryParameters Additional query parameters for the API call. + * @return A Flux of ResponseEntity containing the generated audio chunks as byte + * arrays. + */ + public Flux> textToSpeechStream(SpeechRequest requestBody, String voiceId, + MultiValueMap queryParameters) { + Assert.notNull(voiceId, "voiceId must be provided for streaming. It cannot be null."); + Assert.notNull(requestBody, "requestBody can not be null."); + Assert.hasText(requestBody.text(), "requestBody.text must be provided. It cannot be null or empty."); + + UriComponentsBuilder uriBuilder = UriComponentsBuilder.fromPath("/v1/text-to-speech/{voice_id}/stream") + .queryParams(queryParameters); + + return this.webClient.post() + .uri(uriBuilder.buildAndExpand(voiceId).toUriString()) + .body(Mono.just(requestBody), SpeechRequest.class) + .accept(MediaType.APPLICATION_OCTET_STREAM) + .exchangeToFlux(clientResponse -> { + HttpHeaders headers = clientResponse.headers().asHttpHeaders(); + return clientResponse.bodyToFlux(byte[].class) + .map(bytes -> ResponseEntity.ok().headers(headers).body(bytes)); + }); + } + + /** + * The output format of the generated audio. + */ + public enum OutputFormat { + + MP3_22050_32("mp3_22050_32"), MP3_44100_32("mp3_44100_32"), MP3_44100_64("mp3_44100_64"), + MP3_44100_96("mp3_44100_96"), MP3_44100_128("mp3_44100_128"), MP3_44100_192("mp3_44100_192"), + PCM_8000("pcm_8000"), PCM_16000("pcm_16000"), PCM_22050("pcm_22050"), PCM_24000("pcm_24000"), + PCM_44100("pcm_44100"), PCM_48000("pcm_48000"), ULAW_8000("ulaw_8000"), ALAW_8000("alaw_8000"), + OPUS_48000_32("opus_48000_32"), OPUS_48000_64("opus_48000_64"), OPUS_48000_96("opus_48000_96"), + OPUS_48000_128("opus_48000_128"), OPUS_48000_192("opus_48000_192"); + + private final String value; + + OutputFormat(String value) { + this.value = value; + } + + public String getValue() { + return this.value; + } + + } + + /** + * Represents a request to the ElevenLabs Text-to-Speech API. + */ + @JsonInclude(JsonInclude.Include.NON_NULL) + public record SpeechRequest(@JsonProperty("text") String text, @JsonProperty("model_id") String modelId, + @JsonProperty("language_code") String languageCode, + @JsonProperty("voice_settings") VoiceSettings voiceSettings, + @JsonProperty("pronunciation_dictionary_locators") List pronunciationDictionaryLocators, + @JsonProperty("seed") Integer seed, @JsonProperty("previous_text") String previousText, + @JsonProperty("next_text") String nextText, + @JsonProperty("previous_request_ids") List previousRequestIds, + @JsonProperty("next_request_ids") List nextRequestIds, + @JsonProperty("apply_text_normalization") TextNormalizationMode applyTextNormalization, + @JsonProperty("apply_language_text_normalization") Boolean applyLanguageTextNormalization) { + + public static Builder builder() { + return new Builder(); + } + + /** + * Text normalization mode. + */ + public enum TextNormalizationMode { + + @JsonProperty("auto") + AUTO("auto"), @JsonProperty("on") + ON("on"), @JsonProperty("off") + OFF("off"); + + public final String value; + + TextNormalizationMode(String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return this.value; + } + + } + + /** + * Voice settings to override defaults for the given voice. + */ + @JsonInclude(JsonInclude.Include.NON_NULL) + public record VoiceSettings(@JsonProperty("stability") Double stability, + @JsonProperty("similarity_boost") Double similarityBoost, @JsonProperty("style") Double style, + @JsonProperty("use_speaker_boost") Boolean useSpeakerBoost, @JsonProperty("speed") Double speed) { + } + + /** + * Locator for a pronunciation dictionary. + */ + @JsonInclude(JsonInclude.Include.NON_NULL) + public record PronunciationDictionaryLocator( + @JsonProperty("pronunciation_dictionary_id") String pronunciationDictionaryId, + @JsonProperty("version_id") String versionId) { + } + + public static class Builder { + + private String text; + + private String modelId; + + private String languageCode; + + private VoiceSettings voiceSettings; + + private List pronunciationDictionaryLocators; + + private Integer seed; + + private String previousText; + + private String nextText; + + private List previousRequestIds; + + private List nextRequestIds; + + private TextNormalizationMode applyTextNormalization; + + private Boolean applyLanguageTextNormalization = false; + + public Builder text(String text) { + this.text = text; + return this; + } + + public Builder modelId(String modelId) { + this.modelId = modelId; + return this; + } + + public Builder languageCode(String languageCode) { + this.languageCode = languageCode; + return this; + } + + public Builder voiceSettings(VoiceSettings voiceSettings) { + this.voiceSettings = voiceSettings; + return this; + } + + public Builder pronunciationDictionaryLocators( + List pronunciationDictionaryLocators) { + this.pronunciationDictionaryLocators = pronunciationDictionaryLocators; + return this; + } + + public Builder seed(Integer seed) { + this.seed = seed; + return this; + } + + public Builder previousText(String previousText) { + this.previousText = previousText; + return this; + } + + public Builder nextText(String nextText) { + this.nextText = nextText; + return this; + } + + public Builder previousRequestIds(List previousRequestIds) { + this.previousRequestIds = previousRequestIds; + return this; + } + + public Builder nextRequestIds(List nextRequestIds) { + this.nextRequestIds = nextRequestIds; + return this; + } + + public Builder applyTextNormalization(TextNormalizationMode applyTextNormalization) { + this.applyTextNormalization = applyTextNormalization; + return this; + } + + public Builder applyLanguageTextNormalization(Boolean applyLanguageTextNormalization) { + this.applyLanguageTextNormalization = applyLanguageTextNormalization; + return this; + } + + public SpeechRequest build() { + Assert.hasText(text, "text must not be empty"); + return new SpeechRequest(text, modelId, languageCode, voiceSettings, pronunciationDictionaryLocators, + seed, previousText, nextText, previousRequestIds, nextRequestIds, applyTextNormalization, + applyLanguageTextNormalization); + } + + } + + } + + /** + * Builder to construct {@link ElevenLabsApi} instance. + */ + public static class Builder { + + private String baseUrl = DEFAULT_BASE_URL; + + private ApiKey apiKey; + + private MultiValueMap headers = new LinkedMultiValueMap<>(); + + private RestClient.Builder restClientBuilder = RestClient.builder(); + + private WebClient.Builder webClientBuilder = WebClient.builder(); + + private ResponseErrorHandler responseErrorHandler = RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER; + + public Builder baseUrl(String baseUrl) { + Assert.hasText(baseUrl, "baseUrl cannot be null or empty"); + this.baseUrl = baseUrl; + return this; + } + + public Builder apiKey(ApiKey apiKey) { + Assert.notNull(apiKey, "apiKey cannot be null"); + this.apiKey = apiKey; + return this; + } + + public Builder apiKey(String simpleApiKey) { + Assert.notNull(simpleApiKey, "simpleApiKey cannot be null"); + this.apiKey = new SimpleApiKey(simpleApiKey); + return this; + } + + public Builder headers(MultiValueMap headers) { + Assert.notNull(headers, "headers cannot be null"); + this.headers = headers; + return this; + } + + public Builder restClientBuilder(RestClient.Builder restClientBuilder) { + Assert.notNull(restClientBuilder, "restClientBuilder cannot be null"); + this.restClientBuilder = restClientBuilder; + return this; + } + + public Builder webClientBuilder(WebClient.Builder webClientBuilder) { + Assert.notNull(webClientBuilder, "webClientBuilder cannot be null"); + this.webClientBuilder = webClientBuilder; + return this; + } + + public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler) { + Assert.notNull(responseErrorHandler, "responseErrorHandler cannot be null"); + this.responseErrorHandler = responseErrorHandler; + return this; + } + + public ElevenLabsApi build() { + Assert.notNull(this.apiKey, "apiKey must be set"); + return new ElevenLabsApi(this.baseUrl, this.apiKey, this.headers, this.restClientBuilder, + this.webClientBuilder, this.responseErrorHandler); + } + + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsVoicesApi.java b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsVoicesApi.java new file mode 100644 index 00000000000..51df40c6d4f --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/java/org/springframework/ai/elevenlabs/api/ElevenLabsVoicesApi.java @@ -0,0 +1,452 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.api; + +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonValue; + +import org.springframework.ai.model.ApiKey; +import org.springframework.ai.model.NoopApiKey; +import org.springframework.ai.model.SimpleApiKey; +import org.springframework.ai.retry.RetryUtils; +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.util.Assert; +import org.springframework.util.LinkedMultiValueMap; +import org.springframework.util.MultiValueMap; +import org.springframework.web.client.ResponseErrorHandler; +import org.springframework.web.client.RestClient; + +/** + * Client for the ElevenLabs Voices API. + * + * @author Alexandros Pappas + */ +public class ElevenLabsVoicesApi { + + private static final String DEFAULT_BASE_URL = "https://api.elevenlabs.io"; + + private final RestClient restClient; + + /** + * Create a new ElevenLabs Voices API client. + * @param baseUrl The base URL for the ElevenLabs API. + * @param apiKey Your ElevenLabs API key. + * @param headers the http headers to use. + * @param restClientBuilder A builder for the Spring RestClient. + * @param responseErrorHandler A custom error handler for API responses. + */ + public ElevenLabsVoicesApi(String baseUrl, ApiKey apiKey, MultiValueMap headers, + RestClient.Builder restClientBuilder, ResponseErrorHandler responseErrorHandler) { + Consumer jsonContentHeaders = h -> { + if (!(apiKey instanceof NoopApiKey)) { + h.set("xi-api-key", apiKey.getValue()); + } + h.addAll(headers); + h.setContentType(MediaType.APPLICATION_JSON); + }; + + this.restClient = restClientBuilder.baseUrl(baseUrl) + .defaultHeaders(jsonContentHeaders) + .defaultStatusHandler(responseErrorHandler) + .build(); + + } + + public static Builder builder() { + return new Builder(); + } + + /** + * Retrieves a list of all available voices from the ElevenLabs API. + * @return A ResponseEntity containing a Voices object, which contains the list of + * voices. + */ + public ResponseEntity getVoices() { + return this.restClient.get().uri("/v1/voices").retrieve().toEntity(Voices.class); + } + + /** + * Gets the default settings for voices. "similarity_boost" corresponds to โ€Clarity + + * Similarity Enhancementโ€ in the web app and "stability" corresponds to "Stability" + * slider in the web app. + * @return {@link ResponseEntity} containing the {@link VoiceSettings} record. + */ + public ResponseEntity getDefaultVoiceSettings() { + return this.restClient.get().uri("/v1/voices/settings/default").retrieve().toEntity(VoiceSettings.class); + } + + /** + * Returns the settings for a specific voice. "similarity_boost" corresponds to + * "Clarity + Similarity Enhancement" in the web app and "stability" corresponds to + * the "Stability" slider in the web app. + * @param voiceId The ID of the voice to get settings for. Required. + * @return {@link ResponseEntity} containing the {@link VoiceSettings} record. + */ + public ResponseEntity getVoiceSettings(String voiceId) { + Assert.hasText(voiceId, "voiceId cannot be null or empty"); + return this.restClient.get() + .uri("/v1/voices/{voiceId}/settings", voiceId) + .retrieve() + .toEntity(VoiceSettings.class); + } + + /** + * Returns metadata about a specific voice. + * @param voiceId ID of the voice to be used. You can use the Get voices endpoint list + * all the available voices. Required. + * @return {@link ResponseEntity} containing the {@link Voice} record. + */ + public ResponseEntity getVoice(String voiceId) { + Assert.hasText(voiceId, "voiceId cannot be null or empty"); + return this.restClient.get().uri("/v1/voices/{voiceId}", voiceId).retrieve().toEntity(Voice.class); + } + + public enum CategoryEnum { + + @JsonProperty("generated") + GENERATED("generated"), @JsonProperty("cloned") + CLONED("cloned"), @JsonProperty("premade") + PREMADE("premade"), @JsonProperty("professional") + PROFESSIONAL("professional"), @JsonProperty("famous") + FAMOUS("famous"), @JsonProperty("high_quality") + HIGH_QUALITY("high_quality"); + + public final String value; + + CategoryEnum(String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return this.value; + } + + } + + public enum SafetyControlEnum { + + @JsonProperty("NONE") + NONE("NONE"), @JsonProperty("BAN") + BAN("BAN"), @JsonProperty("CAPTCHA") + CAPTCHA("CAPTCHA"), @JsonProperty("CAPTCHA_AND_MODERATION") + CAPTCHA_AND_MODERATION("CAPTCHA_AND_MODERATION"), @JsonProperty("ENTERPRISE_BAN") + ENTERPRISE_BAN("ENTERPRISE_BAN"), @JsonProperty("ENTERPRISE_CAPTCHA") + ENTERPRISE_CAPTCHA("ENTERPRISE_CAPTCHA"); + + public final String value; + + SafetyControlEnum(String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return this.value; + } + + } + + /** + * Represents the response from the /v1/voices endpoint. + * + * @param voices A list of Voice objects representing the available voices. + */ + @JsonInclude(JsonInclude.Include.NON_NULL) + public record Voices(@JsonProperty("voices") List voices) { + } + + /** + * Represents a single voice from the ElevenLabs API. + */ + @JsonInclude(JsonInclude.Include.NON_NULL) + public record Voice(@JsonProperty("voice_id") String voiceId, @JsonProperty("name") String name, + @JsonProperty("samples") List samples, @JsonProperty("category") CategoryEnum category, + @JsonProperty("fine_tuning") FineTuning fineTuning, @JsonProperty("labels") Map labels, + @JsonProperty("description") String description, @JsonProperty("preview_url") String previewUrl, + @JsonProperty("available_for_tiers") List availableForTiers, + @JsonProperty("settings") VoiceSettings settings, @JsonProperty("sharing") VoiceSharing sharing, + @JsonProperty("high_quality_base_model_ids") List highQualityBaseModelIds, + @JsonProperty("verified_languages") List verifiedLanguages, + @JsonProperty("safety_control") SafetyControlEnum safetyControl, + @JsonProperty("voice_verification") VoiceVerification voiceVerification, + @JsonProperty("permission_on_resource") String permissionOnResource, + @JsonProperty("is_owner") Boolean isOwner, @JsonProperty("is_legacy") Boolean isLegacy, + @JsonProperty("is_mixed") Boolean isMixed, @JsonProperty("created_at_unix") Integer createdAtUnix) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record Sample(@JsonProperty("sample_id") String sampleId, @JsonProperty("file_name") String fileName, + @JsonProperty("mime_type") String mimeType, @JsonProperty("size_bytes") Integer sizeBytes, + @JsonProperty("hash") String hash) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record FineTuning(@JsonProperty("is_allowed_to_fine_tune") Boolean isAllowedToFineTune, + @JsonProperty("state") Map state, + @JsonProperty("verification_failures") List verificationFailures, + @JsonProperty("verification_attempts_count") Integer verificationAttemptsCount, + @JsonProperty("manual_verification_requested") Boolean manualVerificationRequested, + @JsonProperty("language") String language, @JsonProperty("progress") Map progress, + @JsonProperty("message") Map message, + @JsonProperty("dataset_duration_seconds") Double datasetDurationSeconds, + @JsonProperty("verification_attempts") List verificationAttempts, + @JsonProperty("slice_ids") List sliceIds, + @JsonProperty("manual_verification") ManualVerification manualVerification, + @JsonProperty("max_verification_attempts") Integer maxVerificationAttempts, + @JsonProperty("next_max_verification_attempts_reset_unix_ms") Long nextMaxVerificationAttemptsResetUnixMs) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record VoiceVerification(@JsonProperty("requires_verification") Boolean requiresVerification, + @JsonProperty("is_verified") Boolean isVerified, + @JsonProperty("verification_failures") List verificationFailures, + @JsonProperty("verification_attempts_count") Integer verificationAttemptsCount, + @JsonProperty("language") String language, + @JsonProperty("verification_attempts") List verificationAttempts) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record VerificationAttempt(@JsonProperty("text") String text, @JsonProperty("date_unix") Integer dateUnix, + @JsonProperty("accepted") Boolean accepted, @JsonProperty("similarity") Double similarity, + @JsonProperty("levenshtein_distance") Double levenshteinDistance, + @JsonProperty("recording") Recording recording) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record Recording(@JsonProperty("recording_id") String recordingId, + @JsonProperty("mime_type") String mimeType, @JsonProperty("size_bytes") Integer sizeBytes, + @JsonProperty("upload_date_unix") Integer uploadDateUnix, + @JsonProperty("transcription") String transcription) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record ManualVerification(@JsonProperty("extra_text") String extraText, + @JsonProperty("request_time_unix") Integer requestTimeUnix, + @JsonProperty("files") List files) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record ManualVerificationFile(@JsonProperty("file_id") String fileId, + @JsonProperty("file_name") String fileName, @JsonProperty("mime_type") String mimeType, + @JsonProperty("size_bytes") Integer sizeBytes, @JsonProperty("upload_date_unix") Integer uploadDateUnix) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record VoiceSettings(@JsonProperty("stability") Double stability, + @JsonProperty("similarity_boost") Double similarityBoost, @JsonProperty("style") Double style, + @JsonProperty("use_speaker_boost") Boolean useSpeakerBoost, @JsonProperty("speed") Double speed) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record VoiceSharing(@JsonProperty("status") StatusEnum status, + @JsonProperty("history_item_sample_id") String historyItemSampleId, + @JsonProperty("date_unix") Integer dateUnix, + @JsonProperty("whitelisted_emails") List whitelistedEmails, + @JsonProperty("public_owner_id") String publicOwnerId, + @JsonProperty("original_voice_id") String originalVoiceId, + @JsonProperty("financial_rewards_enabled") Boolean financialRewardsEnabled, + @JsonProperty("free_users_allowed") Boolean freeUsersAllowed, + @JsonProperty("live_moderation_enabled") Boolean liveModerationEnabled, @JsonProperty("rate") Double rate, + @JsonProperty("notice_period") Integer noticePeriod, @JsonProperty("disable_at_unix") Integer disableAtUnix, + @JsonProperty("voice_mixing_allowed") Boolean voiceMixingAllowed, + @JsonProperty("featured") Boolean featured, @JsonProperty("category") CategoryEnum category, + @JsonProperty("reader_app_enabled") Boolean readerAppEnabled, @JsonProperty("image_url") String imageUrl, + @JsonProperty("ban_reason") String banReason, @JsonProperty("liked_by_count") Integer likedByCount, + @JsonProperty("cloned_by_count") Integer clonedByCount, @JsonProperty("name") String name, + @JsonProperty("description") String description, @JsonProperty("labels") Map labels, + @JsonProperty("review_status") ReviewStatusEnum reviewStatus, + @JsonProperty("review_message") String reviewMessage, + @JsonProperty("enabled_in_library") Boolean enabledInLibrary, + @JsonProperty("instagram_username") String instagramUsername, + @JsonProperty("twitter_username") String twitterUsername, + @JsonProperty("youtube_username") String youtubeUsername, + @JsonProperty("tiktok_username") String tiktokUsername, + @JsonProperty("moderation_check") VoiceSharingModerationCheck moderationCheck, + @JsonProperty("reader_restricted_on") List readerRestrictedOn) { + public enum StatusEnum { + + @JsonProperty("enabled") + ENABLED("enabled"), @JsonProperty("disabled") + DISABLED("disabled"), @JsonProperty("copied") + COPIED("copied"), @JsonProperty("copied_disabled") + COPIED_DISABLED("copied_disabled"); + + public final String value; + + StatusEnum(String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return this.value; + } + + } + + public enum CategoryEnum { + + @JsonProperty("generated") + GENERATED("generated"), @JsonProperty("professional") + PROFESSIONAL("professional"), @JsonProperty("high_quality") + HIGH_QUALITY("high_quality"), @JsonProperty("famous") + FAMOUS("famous"); + + public final String value; + + CategoryEnum(String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return this.value; + } + + } + + public enum ReviewStatusEnum { + + @JsonProperty("not_requested") + NOT_REQUESTED("not_requested"), @JsonProperty("pending") + PENDING("pending"), @JsonProperty("declined") + DECLINED("declined"), @JsonProperty("allowed") + ALLOWED("allowed"), @JsonProperty("allowed_with_changes") + ALLOWED_WITH_CHANGES("allowed_with_changes"); + + public final String value; + + ReviewStatusEnum(String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return this.value; + } + + } + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record VoiceSharingModerationCheck(@JsonProperty("date_checked_unix") Integer dateCheckedUnix, + @JsonProperty("name_value") String nameValue, @JsonProperty("name_check") Boolean nameCheck, + @JsonProperty("description_value") String descriptionValue, + @JsonProperty("description_check") Boolean descriptionCheck, + @JsonProperty("sample_ids") List sampleIds, + @JsonProperty("sample_checks") List sampleChecks, + @JsonProperty("captcha_ids") List captchaIds, + @JsonProperty("captcha_checks") List captchaChecks) { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record ReaderResource(@JsonProperty("resource_type") ResourceTypeEnum resourceType, + @JsonProperty("resource_id") String resourceId) { + + public enum ResourceTypeEnum { + + @JsonProperty("read") + READ("read"), @JsonProperty("collection") + COLLECTION("collection"); + + public final String value; + + ResourceTypeEnum(String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return this.value; + } + + } + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public record VerifiedVoiceLanguage(@JsonProperty("language") String language, + @JsonProperty("model_id") String modelId, @JsonProperty("accent") String accent) { + } + + /** + * Builder to construct {@link ElevenLabsVoicesApi} instance. + */ + public static class Builder { + + private String baseUrl = DEFAULT_BASE_URL; + + private ApiKey apiKey; + + private MultiValueMap headers = new LinkedMultiValueMap<>(); + + private RestClient.Builder restClientBuilder = RestClient.builder(); + + private ResponseErrorHandler responseErrorHandler = RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER; + + public Builder baseUrl(String baseUrl) { + Assert.hasText(baseUrl, "baseUrl cannot be null or empty"); + this.baseUrl = baseUrl; + return this; + } + + public Builder apiKey(ApiKey apiKey) { + Assert.notNull(apiKey, "apiKey cannot be null"); + this.apiKey = apiKey; + return this; + } + + public Builder apiKey(String simpleApiKey) { + Assert.notNull(simpleApiKey, "simpleApiKey cannot be null"); + this.apiKey = new SimpleApiKey(simpleApiKey); + return this; + } + + public Builder headers(MultiValueMap headers) { + Assert.notNull(headers, "headers cannot be null"); + this.headers = headers; + return this; + } + + public Builder restClientBuilder(RestClient.Builder restClientBuilder) { + Assert.notNull(restClientBuilder, "restClientBuilder cannot be null"); + this.restClientBuilder = restClientBuilder; + return this; + } + + public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler) { + Assert.notNull(responseErrorHandler, "responseErrorHandler cannot be null"); + this.responseErrorHandler = responseErrorHandler; + return this; + } + + public ElevenLabsVoicesApi build() { + Assert.notNull(this.apiKey, "apiKey must be set"); + return new ElevenLabsVoicesApi(this.baseUrl, this.apiKey, this.headers, this.restClientBuilder, + this.responseErrorHandler); + } + + } + +} diff --git a/models/spring-ai-elevenlabs/src/main/resources/META-INF/spring/aot.factories b/models/spring-ai-elevenlabs/src/main/resources/META-INF/spring/aot.factories new file mode 100644 index 00000000000..b2d77ead057 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/main/resources/META-INF/spring/aot.factories @@ -0,0 +1,2 @@ +org.springframework.aot.hint.RuntimeHintsRegistrar=\ + org.springframework.ai.elevenlabs.aot.ElevenLabsRuntimeHints \ No newline at end of file diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTestConfiguration.java b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTestConfiguration.java new file mode 100644 index 00000000000..e57b27dbfd2 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTestConfiguration.java @@ -0,0 +1,58 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs; + +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; +import org.springframework.ai.elevenlabs.api.ElevenLabsVoicesApi; +import org.springframework.ai.model.SimpleApiKey; +import org.springframework.boot.SpringBootConfiguration; +import org.springframework.context.annotation.Bean; +import org.springframework.util.StringUtils; + +/** + * Configuration class for the ElevenLabs API. + * + * @author Alexandros Pappas + */ +@SpringBootConfiguration +public class ElevenLabsTestConfiguration { + + @Bean + public ElevenLabsApi elevenLabsApi() { + return ElevenLabsApi.builder().apiKey(getApiKey()).build(); + } + + @Bean + public ElevenLabsVoicesApi elevenLabsVoicesApi() { + return ElevenLabsVoicesApi.builder().apiKey(getApiKey()).build(); + } + + private SimpleApiKey getApiKey() { + String apiKey = System.getenv("ELEVEN_LABS_API_KEY"); + if (!StringUtils.hasText(apiKey)) { + throw new IllegalArgumentException( + "You must provide an API key. Put it in an environment variable under the name ELEVEN_LABS_API_KEY"); + } + return new SimpleApiKey(apiKey); + } + + @Bean + public ElevenLabsTextToSpeechModel elevenLabsSpeechModel() { + return ElevenLabsTextToSpeechModel.builder().elevenLabsApi(elevenLabsApi()).build(); + } + +} diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModelIT.java b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModelIT.java new file mode 100644 index 00000000000..013dcc97e2f --- /dev/null +++ b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModelIT.java @@ -0,0 +1,80 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import reactor.core.publisher.Flux; + +import org.springframework.ai.audio.tts.Speech; +import org.springframework.ai.audio.tts.TextToSpeechPrompt; +import org.springframework.ai.audio.tts.TextToSpeechResponse; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; + +/** + * Integration tests for the {@link ElevenLabsTextToSpeechModel}. + * + *

+ * These tests require a valid ElevenLabs API key to be set as an environment variable + * named {@code ELEVEN_LABS_API_KEY}. + * + * @author Alexandros Pappas + */ +@SpringBootTest(classes = ElevenLabsTestConfiguration.class) +@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".+") +public class ElevenLabsTextToSpeechModelIT { + + private static final String VOICE_ID = "9BWtsMINqrJLrRacOk9x"; + + @Autowired + private ElevenLabsTextToSpeechModel textToSpeechModel; + + @Test + void textToSpeechWithVoiceTest() { + ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder().voice(VOICE_ID).build(); + TextToSpeechPrompt prompt = new TextToSpeechPrompt("Hello, world!", options); + TextToSpeechResponse response = textToSpeechModel.call(prompt); + + assertThat(response).isNotNull(); + List results = response.getResults(); + assertThat(results).hasSize(1); + Speech speech = results.get(0); + assertThat(speech.getOutput()).isNotEmpty(); + } + + @Test + void textToSpeechStreamWithVoiceTest() { + ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder().voice(VOICE_ID).build(); + TextToSpeechPrompt prompt = new TextToSpeechPrompt( + "Hello, world! This is a test of streaming speech synthesis.", options); + Flux responseFlux = textToSpeechModel.stream(prompt); + + List responses = responseFlux.collectList().block(); + assertThat(responses).isNotNull().isNotEmpty(); + + responses.forEach(response -> { + assertThat(response).isNotNull(); + assertThat(response.getResults()).hasSize(1); + assertThat(response.getResults().get(0).getOutput()).isNotEmpty(); + }); + } + +} diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptionsTests.java b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptionsTests.java new file mode 100644 index 00000000000..a0164c81d8c --- /dev/null +++ b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechOptionsTests.java @@ -0,0 +1,231 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import org.junit.jupiter.api.Test; + +import org.springframework.ai.elevenlabs.api.ElevenLabsApi; + +/** + * Tests for the {@link ElevenLabsTextToSpeechOptions}. + * + *

+ * These tests require a valid ElevenLabs API key to be set as an environment variable + * named {@code ELEVEN_LABS_API_KEY}. + * + * @author Alexandros Pappas + */ +public class ElevenLabsTextToSpeechOptionsTests { + + @Test + public void testBuilderWithAllFields() { + ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder() + .modelId("test-model") + .voice("test-voice") + .voiceId("test-voice-id") // Test both voice and voiceId + .format("mp3_44100_128") + .outputFormat("mp3_44100_128") + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.8, 0.9, true, 1.2)) + .languageCode("en") + .pronunciationDictionaryLocators( + List.of(new ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator("dict1", "v1"))) + .seed(12345) + .previousText("previous") + .nextText("next") + .previousRequestIds(List.of("req1", "req2")) + .nextRequestIds(List.of("req3", "req4")) + .applyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON) + .applyLanguageTextNormalization(true) + .build(); + + assertThat(options.getModelId()).isEqualTo("test-model"); + assertThat(options.getVoice()).isEqualTo("test-voice-id"); + assertThat(options.getVoiceId()).isEqualTo("test-voice-id"); + assertThat(options.getFormat()).isEqualTo("mp3_44100_128"); + assertThat(options.getOutputFormat()).isEqualTo("mp3_44100_128"); + assertThat(options.getVoiceSettings()).isNotNull(); + assertThat(options.getVoiceSettings().stability()).isEqualTo(0.5); + assertThat(options.getVoiceSettings().similarityBoost()).isEqualTo(0.8); + assertThat(options.getVoiceSettings().style()).isEqualTo(0.9); + assertThat(options.getVoiceSettings().useSpeakerBoost()).isTrue(); + assertThat(options.getSpeed()).isEqualTo(1.2); // Check via getter + assertThat(options.getLanguageCode()).isEqualTo("en"); + assertThat(options.getPronunciationDictionaryLocators()).hasSize(1); + assertThat(options.getPronunciationDictionaryLocators().get(0).pronunciationDictionaryId()).isEqualTo("dict1"); + assertThat(options.getPronunciationDictionaryLocators().get(0).versionId()).isEqualTo("v1"); + assertThat(options.getSeed()).isEqualTo(12345); + assertThat(options.getPreviousText()).isEqualTo("previous"); + assertThat(options.getNextText()).isEqualTo("next"); + assertThat(options.getPreviousRequestIds()).containsExactly("req1", "req2"); + assertThat(options.getNextRequestIds()).containsExactly("req3", "req4"); + assertThat(options.getApplyTextNormalization()).isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON); + assertThat(options.getApplyLanguageTextNormalization()).isTrue(); + } + + @Test + public void testCopy() { + ElevenLabsTextToSpeechOptions original = ElevenLabsTextToSpeechOptions.builder() + .modelId("test-model") + .voice("test-voice") + .format("mp3_44100_128") + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.8, null, null, null)) + .build(); + + ElevenLabsTextToSpeechOptions copied = original.copy(); + + assertThat(copied).isNotSameAs(original).isEqualTo(original); + + copied = ElevenLabsTextToSpeechOptions.builder().modelId("new-model").build(); + assertThat(original.getModelId()).isEqualTo("test-model"); + assertThat(copied.getModelId()).isEqualTo("new-model"); + } + + @Test + public void testSetters() { + ElevenLabsTextToSpeechOptions options = new ElevenLabsTextToSpeechOptions(); + options.setModelId("test-model"); + options.setVoice("test-voice"); + options.setVoiceId("test-voice-id"); + options.setOutputFormat("mp3_44100_128"); + options.setFormat("mp3_44100_128"); + options.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.8, null, null, null)); + options.setLanguageCode("en"); + options.setPronunciationDictionaryLocators( + List.of(new ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator("dict1", "v1"))); + options.setSeed(12345); + options.setPreviousText("previous"); + options.setNextText("next"); + options.setPreviousRequestIds(List.of("req1", "req2")); + options.setNextRequestIds(List.of("req3", "req4")); + options.setApplyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON); + options.setApplyLanguageTextNormalization(true); + + assertThat(options.getModelId()).isEqualTo("test-model"); + assertThat(options.getVoice()).isEqualTo("test-voice-id"); + assertThat(options.getVoiceId()).isEqualTo("test-voice-id"); + assertThat(options.getFormat()).isEqualTo("mp3_44100_128"); + assertThat(options.getOutputFormat()).isEqualTo("mp3_44100_128"); + assertThat(options.getVoiceSettings()).isNotNull(); + assertThat(options.getVoiceSettings().stability()).isEqualTo(0.5); + assertThat(options.getVoiceSettings().similarityBoost()).isEqualTo(0.8); + assertThat(options.getLanguageCode()).isEqualTo("en"); + assertThat(options.getPronunciationDictionaryLocators()).hasSize(1); + assertThat(options.getPronunciationDictionaryLocators().get(0).pronunciationDictionaryId()).isEqualTo("dict1"); + assertThat(options.getPronunciationDictionaryLocators().get(0).versionId()).isEqualTo("v1"); + assertThat(options.getSeed()).isEqualTo(12345); + assertThat(options.getPreviousText()).isEqualTo("previous"); + assertThat(options.getNextText()).isEqualTo("next"); + assertThat(options.getPreviousRequestIds()).containsExactly("req1", "req2"); + assertThat(options.getNextRequestIds()).containsExactly("req3", "req4"); + assertThat(options.getApplyTextNormalization()).isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON); + assertThat(options.getApplyLanguageTextNormalization()).isTrue(); + } + + @Test + public void testDefaultValues() { + ElevenLabsTextToSpeechOptions options = new ElevenLabsTextToSpeechOptions(); + assertThat(options.getModelId()).isNull(); + assertThat(options.getVoice()).isNull(); + assertThat(options.getVoiceId()).isNull(); + assertThat(options.getFormat()).isNull(); + assertThat(options.getOutputFormat()).isNull(); + assertThat(options.getSpeed()).isNull(); + assertThat(options.getVoiceSettings()).isNull(); + assertThat(options.getLanguageCode()).isNull(); + assertThat(options.getPronunciationDictionaryLocators()).isNull(); + assertThat(options.getSeed()).isNull(); + assertThat(options.getPreviousText()).isNull(); + assertThat(options.getNextText()).isNull(); + assertThat(options.getPreviousRequestIds()).isNull(); + assertThat(options.getNextRequestIds()).isNull(); + assertThat(options.getApplyTextNormalization()).isNull(); + assertThat(options.getApplyLanguageTextNormalization()).isNull(); + } + + @Test + public void testSetSpeed() { + // 1. Setting speed via voiceSettings, no existing voiceSettings + ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder() + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(null, null, null, null, 1.5)) + .build(); + assertThat(options.getSpeed()).isEqualTo(1.5); + assertThat(options.getVoiceSettings()).isNotNull(); + assertThat(options.getVoiceSettings().speed()).isEqualTo(1.5); + + // 2. Setting speed via voiceSettings, existing voiceSettings + ElevenLabsTextToSpeechOptions options2 = ElevenLabsTextToSpeechOptions.builder() + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, null)) + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 2.0)) // Overwrite + .build(); + assertThat(options2.getSpeed()).isEqualTo(2.0f); + assertThat(options2.getVoiceSettings().speed()).isEqualTo(2.0f); + assertThat(options2.getVoiceSettings().stability()).isEqualTo(0.1); + + // 3. Setting voiceSettings with null speed, existing voiceSettings + ElevenLabsTextToSpeechOptions options3 = ElevenLabsTextToSpeechOptions.builder() + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 2.0)) + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, null)) // Overwrite + .build(); + assertThat(options3.getSpeed()).isNull(); + assertThat(options3.getVoiceSettings().speed()).isNull(); + assertThat(options3.getVoiceSettings().stability()).isEqualTo(0.1); + + // 4. Setting voiceSettings to null, no existing voiceSettings (shouldn't create + // voiceSettings) + ElevenLabsTextToSpeechOptions options4 = ElevenLabsTextToSpeechOptions.builder().build(); + assertThat(options4.getSpeed()).isNull(); + assertThat(options4.getVoiceSettings()).isNull(); + + // 5. Setting voiceSettings directly, with speed. + ElevenLabsTextToSpeechOptions options5 = ElevenLabsTextToSpeechOptions.builder() + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 2.5)) + .build(); + assertThat(options5.getSpeed()).isEqualTo(2.5f); + assertThat(options5.getVoiceSettings().speed()).isEqualTo(2.5f); + + // 6. Setting voiceSettings directly, without speed (speed should be null). + ElevenLabsTextToSpeechOptions options6 = ElevenLabsTextToSpeechOptions.builder() + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, null)) + .build(); + assertThat(options6.getSpeed()).isNull(); + assertThat(options6.getVoiceSettings().speed()).isNull(); + + // 7. Setting voiceSettings to null, after previously setting it. + ElevenLabsTextToSpeechOptions options7 = ElevenLabsTextToSpeechOptions.builder() + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 1.5)) + .voiceSettings(null) + .build(); + assertThat(options7.getSpeed()).isNull(); + assertThat(options7.getVoiceSettings()).isNull(); + + // 8. Setting speed via setSpeed method + ElevenLabsTextToSpeechOptions options8 = ElevenLabsTextToSpeechOptions.builder().build(); + options8.setSpeed(3.0); + assertThat(options8.getSpeed()).isEqualTo(3.0); + assertThat(options8.getVoiceSettings()).isNotNull(); + assertThat(options8.getVoiceSettings().speed()).isEqualTo(3.0); + + // 9. Setting speed to null via setSpeed method + options8.setSpeed(null); + assertThat(options8.getSpeed()).isNull(); + assertThat(options8.getVoiceSettings().speed()).isNull(); + } + +} diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsApiIT.java b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsApiIT.java new file mode 100644 index 00000000000..399dc9156f6 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsApiIT.java @@ -0,0 +1,223 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.api; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import reactor.core.publisher.Flux; +import reactor.test.StepVerifier; + +import org.springframework.ai.elevenlabs.ElevenLabsTestConfiguration; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.http.ResponseEntity; +import org.springframework.util.LinkedMultiValueMap; +import org.springframework.util.MultiValueMap; + +/** + * Integration tests for the {@link ElevenLabsApi}. + * + *

+ * These tests require a valid ElevenLabs API key to be set as an environment variable + * named {@code ELEVEN_LABS_API_KEY}. + * + * @author Alexandros Pappas + */ +@SpringBootTest(classes = ElevenLabsTestConfiguration.class) +@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".+") +public class ElevenLabsApiIT { + + @Autowired + private ElevenLabsApi elevenLabsApi; + + @Test + public void testTextToSpeech() throws IOException { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("Hello, world!") + .modelId("eleven_turbo_v2_5") + .build(); + + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + ResponseEntity response = elevenLabsApi.textToSpeech(request, validVoiceId, null); + + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull().isNotEmpty(); + } + + @Test + public void testTextToSpeechWithVoiceSettings() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("Hello, with Voice settings!") + .modelId("eleven_turbo_v2_5") + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.7, 0.0, true, 1.0)) + .build(); + + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + ResponseEntity response = elevenLabsApi.textToSpeech(request, validVoiceId, null); + + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull().isNotEmpty(); + } + + @Test + public void testTextToSpeechWithQueryParams() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("Hello, testing query params!") + .modelId("eleven_turbo_v2_5") + .build(); + + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + MultiValueMap queryParams = new LinkedMultiValueMap<>(); + queryParams.add("optimize_streaming_latency", "2"); + queryParams.add("enable_logging", "true"); + queryParams.add("output_format", ElevenLabsApi.OutputFormat.MP3_22050_32.getValue()); + + ResponseEntity response = elevenLabsApi.textToSpeech(request, validVoiceId, queryParams); + + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull().isNotEmpty(); + } + + @Test + public void testTextToSpeechVoiceIdNull() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("This should fail.") + .modelId("eleven_turbo_v2_5") + .build(); + + Exception exception = assertThrows(IllegalArgumentException.class, + () -> elevenLabsApi.textToSpeech(request, null, null)); + assertThat(exception.getMessage()).isEqualTo("voiceId must be provided. It cannot be null."); + } + + @Test + public void testTextToSpeechTextEmpty() { + Exception exception = assertThrows(IllegalArgumentException.class, + () -> ElevenLabsApi.SpeechRequest.builder().text("").modelId("eleven_turbo_v2_5").build()); + assertThat(exception.getMessage()).isEqualTo("text must not be empty"); + } + + // Streaming API tests + + @Test + public void testTextToSpeechStream() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("This is a longer text to ensure multiple chunks are received through the streaming API.") + .modelId("eleven_turbo_v2_5") + .build(); + + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + Flux> responseFlux = elevenLabsApi.textToSpeechStream(request, validVoiceId, null); + + // Track the number of chunks received + AtomicInteger chunkCount = new AtomicInteger(0); + + StepVerifier.create(responseFlux).thenConsumeWhile(response -> { + // Verify each chunk's response properties + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull().isNotEmpty(); + // Count this chunk + chunkCount.incrementAndGet(); + return true; + }).verifyComplete(); + + // Verify we received at least one chunk + assertThat(chunkCount.get()).isPositive(); + } + + @Test + public void testTextToSpeechStreamWithVoiceSettings() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("Hello, with Voice settings in streaming mode!") + .modelId("eleven_turbo_v2_5") + .voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.7, null, null, null)) + .build(); + + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + Flux> responseFlux = elevenLabsApi.textToSpeechStream(request, validVoiceId, null); + + StepVerifier.create(responseFlux).thenConsumeWhile(response -> { + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull().isNotEmpty(); + return true; + }).verifyComplete(); + } + + @Test + public void testTextToSpeechStreamWithQueryParams() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("Hello, testing streaming with query params!") + .modelId("eleven_turbo_v2_5") + .build(); + + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + MultiValueMap queryParams = new LinkedMultiValueMap<>(); + queryParams.add("optimize_streaming_latency", "2"); + queryParams.add("enable_logging", "true"); + queryParams.add("output_format", "mp3_44100_128"); + + Flux> responseFlux = elevenLabsApi.textToSpeechStream(request, validVoiceId, + queryParams); + + StepVerifier.create(responseFlux).thenConsumeWhile(response -> { + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull().isNotEmpty(); + return true; + }).verifyComplete(); + } + + @Test + public void testTextToSpeechStreamVoiceIdNull() { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("This should fail.") + .modelId("eleven_turbo_v2_5") + .build(); + + Exception exception = assertThrows(IllegalArgumentException.class, + () -> elevenLabsApi.textToSpeechStream(request, null, null)); + assertThat(exception.getMessage()).isEqualTo("voiceId must be provided for streaming. It cannot be null."); + } + + @Test + public void testTextToSpeechStreamRequestBodyNull() { + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + + Exception exception = assertThrows(IllegalArgumentException.class, + () -> elevenLabsApi.textToSpeechStream(null, validVoiceId, null)); + assertThat(exception.getMessage()).isEqualTo("requestBody can not be null."); + } + + @Test + public void testTextToSpeechStreamTextEmpty() { + Exception exception = assertThrows(IllegalArgumentException.class, () -> { + ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder() + .text("") + .modelId("eleven_turbo_v2_5") + .build(); + + String validVoiceId = "9BWtsMINqrJLrRacOk9x"; + elevenLabsApi.textToSpeechStream(request, validVoiceId, null); + }); + assertThat(exception.getMessage()).isEqualTo("text must not be empty"); + } + +} diff --git a/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsVoicesApiIT.java b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsVoicesApiIT.java new file mode 100644 index 00000000000..e27d8d5047a --- /dev/null +++ b/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsVoicesApiIT.java @@ -0,0 +1,112 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.elevenlabs.api; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import org.springframework.ai.elevenlabs.ElevenLabsTestConfiguration; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.http.ResponseEntity; + +/** + * Integration tests for the {@link ElevenLabsVoicesApi}. + * + *

+ * These tests require a valid ElevenLabs API key to be set as an environment variable + * named {@code ELEVEN_LABS_API_KEY}. + * + * @author Alexandros Pappas + */ +@SpringBootTest(classes = ElevenLabsTestConfiguration.class) +@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".+") +public class ElevenLabsVoicesApiIT { + + @Autowired + private ElevenLabsVoicesApi voicesApi; + + @Test + void getVoices() { + ResponseEntity response = voicesApi.getVoices(); + System.out.println("Response: " + response); + + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull(); + ElevenLabsVoicesApi.Voices voicesResponse = response.getBody(); + + List voices = voicesResponse.voices(); + assertThat(voices).isNotNull().isNotEmpty(); + + for (ElevenLabsVoicesApi.Voice voice : voices) { + assertThat(voice.voiceId()).isNotBlank(); + } + } + + @Test + void getDefaultVoiceSettings() { + ResponseEntity response = voicesApi.getDefaultVoiceSettings(); + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull(); + + ElevenLabsVoicesApi.VoiceSettings settings = response.getBody(); + assertThat(settings.stability()).isNotNull(); + assertThat(settings.similarityBoost()).isNotNull(); + assertThat(settings.style()).isNotNull(); + assertThat(settings.useSpeakerBoost()).isNotNull(); + } + + @Test + void getVoiceSettings() { + ResponseEntity voicesResponse = voicesApi.getVoices(); + assertThat(voicesResponse.getStatusCode().is2xxSuccessful()).isTrue(); + List voices = voicesResponse.getBody().voices(); + assertThat(voices).isNotEmpty(); + String voiceId = voices.get(0).voiceId(); + + ResponseEntity settingsResponse = voicesApi.getVoiceSettings(voiceId); + assertThat(settingsResponse.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(settingsResponse.getBody()).isNotNull(); + + ElevenLabsVoicesApi.VoiceSettings settings = settingsResponse.getBody(); + assertThat(settings.stability()).isNotNull(); + assertThat(settings.similarityBoost()).isNotNull(); + assertThat(settings.style()).isNotNull(); + assertThat(settings.useSpeakerBoost()).isNotNull(); + } + + @Test + void getVoice() { + ResponseEntity voicesResponse = voicesApi.getVoices(); + assertThat(voicesResponse.getStatusCode().is2xxSuccessful()).isTrue(); + List voices = voicesResponse.getBody().voices(); + assertThat(voices).isNotEmpty(); + String voiceId = voices.get(0).voiceId(); + + ResponseEntity voiceResponse = voicesApi.getVoice(voiceId); + assertThat(voiceResponse.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(voiceResponse.getBody()).isNotNull(); + + ElevenLabsVoicesApi.Voice voice = voiceResponse.getBody(); + assertThat(voice.voiceId()).isEqualTo(voiceId); + assertThat(voice.name()).isNotBlank(); + } + +} diff --git a/models/spring-ai-elevenlabs/src/test/resources/voices.json b/models/spring-ai-elevenlabs/src/test/resources/voices.json new file mode 100644 index 00000000000..da6b3ffcb97 --- /dev/null +++ b/models/spring-ai-elevenlabs/src/test/resources/voices.json @@ -0,0 +1,1482 @@ +{ + "voices": [ + { + "voice_id": "9BWtsMINqrJLrRacOk9x", + "name": "Aria", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_multilingual_v2": "fine_tuned", + "eleven_turbo_v2_5": "fine_tuned", + "eleven_flash_v2_5": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_flash_v2": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "expressive", + "age": "middle-aged", + "gender": "female", + "use_case": "social media" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/9BWtsMINqrJLrRacOk9x/405766b8-1f4e-4d3c-aba1-6f25333823ec.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "CwhRBWXzGAHq8TQ4Fs17", + "name": "Roger", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_multilingual_v2": "fine_tuned", + "eleven_turbo_v2_5": "failed", + "eleven_flash_v2_5": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_flash_v2": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "confident", + "age": "middle-aged", + "gender": "male", + "use_case": "social media" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/CwhRBWXzGAHq8TQ4Fs17/58ee3ff5-f6f2-4628-93b8-e38eb31806b0.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "EXAVITQu4vr4xnSDxMaL", + "name": "Sarah", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": {}, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": {}, + "message": {}, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "american", + "description": "soft", + "age": "young", + "gender": "female", + "use_case": "news" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/EXAVITQu4vr4xnSDxMaL/01a3e33c-6e99-4ee7-8543-ff2216a32186.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_turbo_v2", + "eleven_multilingual_v2", + "eleven_turbo_v2_5" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "FGY2WhTYpPnrIDTdsKH5", + "name": "Laura", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_multilingual_v2": "fine_tuned", + "eleven_turbo_v2_5": "fine_tuned", + "eleven_flash_v2_5": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_flash_v2": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "upbeat", + "age": "young", + "gender": "female", + "use_case": "social media" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/FGY2WhTYpPnrIDTdsKH5/67341759-ad08-41a5-be6e-de12fe448618.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "IKne3meq5aSn9XLyUdCD", + "name": "Charlie", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "Australian", + "description": "natural", + "age": "middle aged", + "gender": "male", + "use_case": "conversational" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/IKne3meq5aSn9XLyUdCD/102de6f2-22ed-43e0-a1f1-111fa75c5481.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_multilingual_v1", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "JBFqnCBsd6RMkjVDRZzb", + "name": "George", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_turbo_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_v2_flash": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_turbo_v2": "", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "British", + "description": "warm", + "age": "middle aged", + "gender": "male", + "use_case": "narration" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/JBFqnCBsd6RMkjVDRZzb/e6206d1a-0721-4787-aafb-06a6e705cac5.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "N2lVS1w4EtoT3dr4eOWO", + "name": "Callum", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "Transatlantic", + "description": "intense", + "age": "middle-aged", + "gender": "male", + "use_case": "characters" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/N2lVS1w4EtoT3dr4eOWO/ac833bd8-ffda-4938-9ebc-b0f99ca25481.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_multilingual_v1", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "SAz9YHcvj6GT2YYXdXww", + "name": "River", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_multilingual_v2": "fine_tuned", + "eleven_turbo_v2_5": "fine_tuned", + "eleven_flash_v2_5": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned", + "eleven_multilingual_sts_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_turbo_v2": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_flash_v2": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "confident", + "age": "middle-aged", + "gender": "non-binary", + "use_case": "social media" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/SAz9YHcvj6GT2YYXdXww/e6c95f0b-2227-491a-b3d7-2249240decb7.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_sts_v2", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "TX3LPaxmHKxFdv7VOQHJ", + "name": "Liam", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_turbo_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_v2_flash": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_turbo_v2": "", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "articulate", + "age": "young", + "gender": "male", + "use_case": "narration" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/TX3LPaxmHKxFdv7VOQHJ/63148076-6363-42db-aea8-31424308b92c.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_multilingual_v1", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "XB0fDUnXU5powFXDhCwa", + "name": "Charlotte", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_multilingual_v2": "", + "eleven_turbo_v2_5": "", + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "Swedish", + "description": "seductive", + "age": "young", + "gender": "female", + "use_case": "characters" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/XB0fDUnXU5powFXDhCwa/942356dc-f10d-4d89-bda5-4f8505ee038b.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_multilingual_v1", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "Xb7hH8MSUJpSbSDYk0k2", + "name": "Alice", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "British", + "description": "confident", + "age": "middle-aged", + "gender": "female", + "use_case": "news" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/Xb7hH8MSUJpSbSDYk0k2/d10f7534-11f6-41fe-a012-2de1e482d336.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "XrExE9yKIg1WjnnlVkGX", + "name": "Matilda", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_turbo_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_v2_flash": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_turbo_v2": "", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "friendly", + "age": "middle-aged", + "gender": "female", + "use_case": "narration" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/XrExE9yKIg1WjnnlVkGX/b930e18d-6b4d-466e-bab2-0ae97c6d8535.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_multilingual_v1", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "bIHbv24MWmeRgasZH58o", + "name": "Will", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_multilingual_v2": "fine_tuned", + "eleven_turbo_v2_5": "fine_tuned", + "eleven_flash_v2_5": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_flash_v2": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "friendly", + "age": "young", + "gender": "male", + "use_case": "social media" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/bIHbv24MWmeRgasZH58o/8caf8f3d-ad29-4980-af41-53f20c72d7a4.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "cgSgspJ2msm6clMCkdW9", + "name": "Jessica", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_multilingual_v2": "fine_tuned", + "eleven_turbo_v2_5": "fine_tuned", + "eleven_flash_v2_5": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_flash_v2": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "expressive", + "age": "young", + "gender": "female", + "use_case": "conversational" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/cgSgspJ2msm6clMCkdW9/56a97bf8-b69b-448f-846c-c3a11683d45a.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "cjVigY5qzO86Huf0OWal", + "name": "Eric", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_multilingual_v2": "fine_tuned", + "eleven_turbo_v2_5": "fine_tuned", + "eleven_flash_v2_5": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_v2_flash": "Done!", + "eleven_flash_v2": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "friendly", + "age": "middle-aged", + "gender": "male", + "use_case": "conversational" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/cjVigY5qzO86Huf0OWal/d098fda0-6456-4030-b3d8-63aa048c9070.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "iP95p4xoKVk53GoZ742B", + "name": "Chris", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "casual", + "age": "middle-aged", + "gender": "male", + "use_case": "conversational" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/iP95p4xoKVk53GoZ742B/3f4bde72-cc48-40dd-829f-57fbf906f4d7.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "nPczCjzI2devNBz1zQrb", + "name": "Brian", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "deep", + "age": "middle-aged", + "gender": "male", + "use_case": "narration" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/nPczCjzI2devNBz1zQrb/2dd3e72c-4fd3-42f1-93ea-abc5d4e5aa1d.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "onwK4e9ZLuTAKqWW03F9", + "name": "Daniel", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "British", + "description": "authoritative", + "age": "middle-aged", + "gender": "male", + "use_case": "news" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/onwK4e9ZLuTAKqWW03F9/7eee0236-1a72-4b86-b303-5dcadc007ba9.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_multilingual_v1", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "pFZP5JQG7iQjIQuC4Bku", + "name": "Lily", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "British", + "description": "warm", + "age": "middle-aged", + "gender": "female", + "use_case": "narration" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/pFZP5JQG7iQjIQuC4Bku/89b68b35-b3dd-4348-a84a-a3c13a3c2b30.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + }, + { + "voice_id": "pqHfZKP75CvOlQylNhV4", + "name": "Bill", + "samples": null, + "category": "premade", + "fine_tuning": { + "is_allowed_to_fine_tune": true, + "state": { + "eleven_flash_v2_5": "fine_tuned", + "eleven_turbo_v2": "fine_tuned", + "eleven_flash_v2": "fine_tuned", + "eleven_v2_flash": "fine_tuned", + "eleven_v2_5_flash": "fine_tuned" + }, + "verification_failures": [], + "verification_attempts_count": 0, + "manual_verification_requested": false, + "language": "en", + "progress": { + "eleven_flash_v2_5": 1, + "eleven_v2_flash": 1, + "eleven_flash_v2": 1, + "eleven_v2_5_flash": 1 + }, + "message": { + "eleven_flash_v2_5": "Done!", + "eleven_turbo_v2": "", + "eleven_flash_v2": "Done!", + "eleven_v2_flash": "Done!", + "eleven_v2_5_flash": "Done!" + }, + "dataset_duration_seconds": null, + "verification_attempts": null, + "slice_ids": null, + "manual_verification": null, + "max_verification_attempts": 5, + "next_max_verification_attempts_reset_unix_ms": 1700000000000 + }, + "labels": { + "accent": "American", + "description": "trustworthy", + "age": "old", + "gender": "male", + "use_case": "narration" + }, + "description": null, + "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/pqHfZKP75CvOlQylNhV4/d782b3ff-84ba-4029-848c-acf01285524d.mp3", + "available_for_tiers": [], + "settings": null, + "sharing": null, + "high_quality_base_model_ids": [ + "eleven_v2_flash", + "eleven_flash_v2", + "eleven_turbo_v2_5", + "eleven_multilingual_v2", + "eleven_v2_5_flash", + "eleven_flash_v2_5", + "eleven_turbo_v2" + ], + "verified_languages": [], + "safety_control": null, + "voice_verification": { + "requires_verification": false, + "is_verified": false, + "verification_failures": [], + "verification_attempts_count": 0, + "language": null, + "verification_attempts": null + }, + "permission_on_resource": null, + "is_owner": false, + "is_legacy": false, + "is_mixed": false, + "created_at_unix": null + } + ] +} \ No newline at end of file diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java index 93ae1cba3c5..66e8dd53c23 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java @@ -29,7 +29,10 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 + * @deprecated Use {@link org.springframework.ai.audio.tts.Speech} from the core package + * instead. This class will be removed in a future release. */ +@Deprecated public class Speech implements ModelResult { private final byte[] audio; diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java index dde419268b9..8de55fe4f11 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java @@ -24,7 +24,10 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 + * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechMessage} from the + * core package instead. This class will be removed in a future release. */ +@Deprecated public class SpeechMessage { private String text; diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java index f03370ce434..98161933814 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java @@ -25,7 +25,10 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 + * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechModel} from the + * core package instead. This interface will be removed in a future release. */ +@Deprecated @FunctionalInterface public interface SpeechModel extends Model { diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java index 03fb07d6e89..bfce1e311ee 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java @@ -29,7 +29,10 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 + * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechPrompt} from the + * core package instead. This class will be removed in a future release. */ +@Deprecated public class SpeechPrompt implements ModelRequest { private final SpeechMessage message; diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java index 5b92fe770b1..9662764aec5 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java @@ -28,7 +28,10 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 + * @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechResponse} from the + * core package instead. This class will be removed in a future release. */ +@Deprecated public class SpeechResponse implements ModelResponse { private final Speech speech; diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java index 6743637948d..fa8daadf159 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java @@ -27,7 +27,10 @@ * * @author Ahmed Yousri * @since 1.0.0-M1 + * @deprecated Use {@link org.springframework.ai.audio.tts.StreamingTextToSpeechModel} + * from the core package instead. This interface will be removed in a future release. */ +@Deprecated @FunctionalInterface public interface StreamingSpeechModel extends StreamingModel { diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechResponseMetadata.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechResponseMetadata.java index e90c4097d71..412b0775ea9 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechResponseMetadata.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechResponseMetadata.java @@ -16,9 +16,9 @@ package org.springframework.ai.openai.metadata.audio; +import org.springframework.ai.audio.tts.TextToSpeechResponseMetadata; import org.springframework.ai.chat.metadata.EmptyRateLimit; import org.springframework.ai.chat.metadata.RateLimit; -import org.springframework.ai.model.MutableResponseMetadata; import org.springframework.ai.openai.api.OpenAiAudioApi; import org.springframework.lang.Nullable; import org.springframework.util.Assert; @@ -29,7 +29,7 @@ * @author Ahmed Yousri * @see RateLimit */ -public class OpenAiAudioSpeechResponseMetadata extends MutableResponseMetadata { +public class OpenAiAudioSpeechResponseMetadata extends TextToSpeechResponseMetadata { public static final OpenAiAudioSpeechResponseMetadata NULL = new OpenAiAudioSpeechResponseMetadata() { diff --git a/pom.xml b/pom.xml index b403eb5ef62..be86d9dc04c 100644 --- a/pom.xml +++ b/pom.xml @@ -99,6 +99,7 @@ auto-configurations/models/spring-ai-autoconfigure-model-anthropic auto-configurations/models/spring-ai-autoconfigure-model-azure-openai auto-configurations/models/spring-ai-autoconfigure-model-bedrock-ai + auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs auto-configurations/models/spring-ai-autoconfigure-model-huggingface auto-configurations/models/spring-ai-autoconfigure-model-openai auto-configurations/models/spring-ai-autoconfigure-model-minimax @@ -162,6 +163,7 @@ models/spring-ai-azure-openai models/spring-ai-bedrock models/spring-ai-bedrock-converse + models/spring-ai-elevenlabs models/spring-ai-huggingface models/spring-ai-minimax models/spring-ai-mistral-ai @@ -180,6 +182,7 @@ spring-ai-spring-boot-starters/spring-ai-starter-model-azure-openai spring-ai-spring-boot-starters/spring-ai-starter-model-bedrock spring-ai-spring-boot-starters/spring-ai-starter-model-bedrock-converse + spring-ai-spring-boot-starters/spring-ai-starter-model-elevenlabs spring-ai-spring-boot-starters/spring-ai-starter-model-huggingface spring-ai-spring-boot-starters/spring-ai-starter-model-minimax spring-ai-spring-boot-starters/spring-ai-starter-model-mistral-ai @@ -711,7 +714,8 @@ org.springframework.ai.anthropic/**/*IT.java org.springframework.ai.azure.openai/**/*IT.java org.springframework.ai.bedrock/**/*IT.java - org.springframework.ai.bedrock.converse/**/*IT.java + org.springframework.ai.bedrock.converse/**/*IT.java + org.springframework.ai.elevenlabs/**/*IT.java org.springframework.ai.huggingface/**/*IT.java org.springframework.ai.minimax/**/*IT.java org.springframework.ai.mistralai/**/*IT.java @@ -759,6 +763,7 @@ org.springframework.ai.autoconfigure.huggingface/**/**IT.java org.springframework.ai.autoconfigure.chat/**/**IT.java + org.springframework.ai.autoconfigure.elevenlabs/**/**IT.java org.springframework.ai.autoconfigure.embedding/**/**IT.java org.springframework.ai.autoconfigure.image/**/**IT.java diff --git a/spring-ai-bom/pom.xml b/spring-ai-bom/pom.xml index e145d0bc89f..b4218a2dc94 100644 --- a/spring-ai-bom/pom.xml +++ b/spring-ai-bom/pom.xml @@ -243,6 +243,13 @@ ${project.version} + + org.springframework.ai + spring-ai-elevenlabs + ${project.version} + true + + org.springframework.ai spring-ai-huggingface @@ -310,7 +317,6 @@ ${project.version} - org.springframework.ai spring-ai-zhipuai @@ -565,6 +571,11 @@ ${project.version} + + org.springframework.ai + spring-ai-autoconfigure-model-elevenlabs + + org.springframework.ai spring-ai-autoconfigure-model-huggingface @@ -914,6 +925,11 @@ ${project.version} + + org.springframework.ai + spring-ai-starter-model-elevenlabs + + org.springframework.ai spring-ai-starter-model-minimax diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech.adoc index adabcd80c04..52de29ff2a2 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech.adoc @@ -1,5 +1,9 @@ [[Speech]] = Text-To-Speech (TTS) API -Spring AI provides support for OpenAI's Speech API. -When additional providers for Speech are implemented, a common `SpeechModel` and `StreamingSpeechModel` interface will be extracted. \ No newline at end of file +Spring AI provides support for the following Text-To-Speech (TTS) providers: + +- xref:api/audio/speech/openai-speech.adoc[OpenAI's Speech API] +- xref:api/audio/speech/elevenlabs-speech.adoc[Eleven Labs Text-To-Speech API] + +Future enhancements may introduce additional providers, at which point a common `TextToSpeechModel` and `StreamingTextToSpeechModel` interface will be extracted. \ No newline at end of file diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc new file mode 100644 index 00000000000..09629529b3e --- /dev/null +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/elevenlabs-speech.adoc @@ -0,0 +1,249 @@ += ElevenLabs Text-to-Speech (TTS) + +== Introduction + +ElevenLabs provides natural-sounding speech synthesis software using deep learning. Its AI audio models generate realistic, versatile, and contextually-aware speech, voices, and sound effects across 32 languages. The ElevenLabs Text-to-Speech API enables users to bring any book, article, PDF, newsletter, or text to life with ultra-realistic AI narration. + +== Prerequisites + +. Create an ElevenLabs account and obtain an API key. You can sign up at the https://elevenlabs.io/sign-up[ElevenLabs signup page]. Your API key can be found on your profile page after logging in. +. Add the `spring-ai-elevenlabs` dependency to your project's build file. For more information, refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section. + +== Auto-configuration + +Spring AI provides Spring Boot auto-configuration for the ElevenLabs Text-to-Speech Client. +To enable it, add the following dependency to your project's Maven `pom.xml` file: + +[source,xml] +---- + + org.springframework.ai + spring-ai-starter-model-elevenlabs + +---- + +or to your Gradle `build.gradle` build file: + +[source,groovy] +---- +dependencies { + implementation 'org.springframework.ai:spring-ai-starter-model-elevenlabs' +} +---- + +TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file. + +== Speech Properties + +=== Connection Properties + +The prefix `spring.ai.elevenlabs` is used as the property prefix for *all* ElevenLabs related configurations (both connection and TTS specific settings). This is defined in `ElevenLabsConnectionProperties`. + +[cols="3,5,1"] +|==== +| Property | Description | Default +| spring.ai.elevenlabs.base-url | The base URL for the ElevenLabs API. | https://api.elevenlabs.io +| spring.ai.elevenlabs.api-key | Your ElevenLabs API key. | - +|==== + +=== Configuration Properties + +The prefix `spring.ai.elevenlabs.tts` is used as the property prefix to configure the ElevenLabs Text-to-Speech client, specifically. This is defined in `ElevenLabsSpeechProperties`. + +[cols="3,5,2"] +|==== +| Property | Description | Default + +| spring.ai.elevenlabs.tts.options.model-id | The ID of the model to use. | eleven_turbo_v2_5 +| spring.ai.elevenlabs.tts.options.voice-id | The ID of the voice to use. This is the *voice ID*, not the voice name. | 9BWtsMINqrJLrRacOk9x +| spring.ai.elevenlabs.tts.options.output-format | The output format for the generated audio. See xref:elevenlabs-tts.adoc#output-formats[Output Formats] below. | mp3_22050_32 +| spring.ai.elevenlabs.tts.enabled | Enable or disable the ElevenLabs Text-to-Speech client. | true +|==== + +NOTE: The base URL and API key can also be configured *specifically* for TTS using `spring.ai.elevenlabs.tts.base-url` and `spring.ai.elevenlabs.tts.api-key`. However, it is generally recommended to use the global `spring.ai.elevenlabs` prefix for simplicity, unless you have a specific reason to use different credentials for different ElevenLabs services. The more specific `tts` properties will override the global ones. + +TIP: All properties prefixed with `spring.ai.elevenlabs.tts.options` can be overridden at runtime. + +[[output-formats]] +.Available Output Formats +[cols="1,1"] +|==== +| Enum Value | Description +| MP3_22050_32 | MP3, 22.05 kHz, 32 kbps +| MP3_44100_32 | MP3, 44.1 kHz, 32 kbps +| MP3_44100_64 | MP3, 44.1 kHz, 64 kbps +| MP3_44100_96 | MP3, 44.1 kHz, 96 kbps +| MP3_44100_128 | MP3, 44.1 kHz, 128 kbps +| MP3_44100_192 | MP3, 44.1 kHz, 192 kbps +| PCM_8000 | PCM, 8 kHz +| PCM_16000 | PCM, 16 kHz +| PCM_22050 | PCM, 22.05 kHz +| PCM_24000 | PCM, 24 kHz +| PCM_44100 | PCM, 44.1 kHz +| PCM_48000 | PCM, 48 kHz +| ULAW_8000 | ยต-law, 8 kHz +| ALAW_8000 | A-law, 8 kHz +| OPUS_48000_32 | Opus, 48 kHz, 32 kbps +| OPUS_48000_64 | Opus, 48 kHz, 64 kbps +| OPUS_48000_96 | Opus, 48 kHz, 96 kbps +| OPUS_48000_128 | Opus, 48 kHz, 128 kbps +| OPUS_48000_192 | Opus, 48 kHz, 192 kbps +|==== + + +== Runtime Options [[speech-options]] + +The `ElevenLabsSpeechOptions` class provides options to use when making a text-to-speech request. On start-up, the options specified by `spring.ai.elevenlabs.tts` are used, but you can override these at runtime. The following options are available: + +* `modelId`: The ID of the model to use. +* `voiceId`: The ID of the voice to use. +* `outputFormat`: The output format of the generated audio. +* `voiceSettings`: An object containing voice settings such as `stability`, `similarityBoost`, `style`, `useSpeakerBoost`, and `speed`. +* `languageCode`: The language code of the input text (e.g., "en" for English). +* `pronunciationDictionaryLocators`: A list of pronunciation dictionary locators. +* `seed`: A seed for random number generation, for reproducibility. +* `previousText`: Text before the main text, for context in multi-turn conversations. +* `nextText`: Text after the main text, for context in multi-turn conversations. +* `previousRequestIds`: Request IDs from previous turns in a conversation. +* `nextRequestIds`: Request IDs for subsequent turns in a conversation. +* `applyTextNormalization`: Apply text normalization ("auto", "on", or "off"). +* `applyTextNormalizationForVoice`: Apply language text normalization. + +For example: + +[source,java] +---- +ElevenLabsSpeechOptions speechOptions = ElevenLabsSpeechOptions.builder() + .modelId("eleven_multilingual_v2") + .voiceId("your_voice_id") + .outputFormat(ElevenLabsApi.OutputFormat.MP3_44100_128.getValue()) + .build(); + +TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("Hello, this is a text-to-speech example.", speechOptions); +TextToSpeechResponse response = elevenLabsSpeechModel.call(speechPrompt); +---- + +== Manual Configuration + +Add the `spring-ai-elevenlabs` dependency to your project's Maven `pom.xml` file: + +[source,xml] +---- + + org.springframework.ai + spring-ai-elevenlabs + +---- + +or to your Gradle `build.gradle` build file: + +[source,groovy] +---- +dependencies { + implementation 'org.springframework.ai:spring-ai-elevenlabs' +} +---- + +TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file. + +Next, create an `ElevenLabsSpeechModel`: + +[source,java] +---- +ElevenLabsApi elevenLabsApi = ElevenLabsApi.builder() + .apiKey(System.getenv("ELEVEN_LABS_API_KEY")) + .build(); + +ElevenLabsSpeechOptions defaultOptions = ElevenLabsSpeechOptions.builder() + .modelId("eleven_turbo_v2_5") + .voiceId("your_voice_id") + .outputFormat("mp3_44100_128") + .build(); + +ElevenLabsSpeechModel elevenLabsSpeechModel = ElevenLabsSpeechModel.builder() + .elevenLabsApi(elevenLabsApi) + .defaultOptions(defaultOptions) + .build(); + + +TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("Hello, this is a text-to-speech example.", defaultOptions); +TextToSpeechResponse response = elevenLabsSpeechModel.call(speechPrompt); + + +byte[] responseAsBytes = response.getResult().get(0).getOutput(); +---- + +== Streaming Real-time Audio + +The ElevenLabs Speech API supports real-time audio streaming using chunk transfer encoding. This allows audio playback to begin before the entire audio file is generated. + +[source,java] +---- +ElevenLabsApi elevenLabsApi = ElevenLabsApi.builder() + .apiKey(System.getenv("ELEVEN_LABS_API_KEY")) + .build(); + +ElevenLabsSpeechOptions defaultOptions = ElevenLabsSpeechOptions.builder() + .modelId("eleven_turbo_v2_5") + .voiceId("your_voice_id") + .outputFormat("mp3_44100_128") + .build(); + +ElevenLabsSpeechModel elevenLabsSpeechModel = ElevenLabsSpeechModel.builder() + .elevenLabsApi(elevenLabsApi) + .defaultOptions(defaultOptions) + .build(); + + +TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("Today is a wonderful day to build something people love!", defaultOptions); + +Flux responseStream = elevenLabsSpeechModel.stream(speechPrompt); + +// Process the stream, e.g., play the audio chunks +responseStream.subscribe(speechResponse -> { + byte[] audioChunk = speechResponse.getResult().get(0).getOutput(); + // Play the audioChunk +}); + +---- + +== Voices API + +The ElevenLabs Voices API allows you to retrieve information about available voices, their settings, and default voice settings. + +To use the Voices API, you'll need to create an instance of `ElevenLabsVoicesApi`: + +[source,java] +---- +ElevenLabsVoicesApi voicesApi = ElevenLabsVoicesApi.builder() + .apiKey(System.getenv("ELEVEN_LABS_API_KEY")) + .build(); +---- + +You can then use the following methods: + +* `getVoices()`: Retrieves a list of all available voices. +* `getDefaultVoiceSettings()`: Gets the default settings for voices. +* `getVoiceSettings(String voiceId)`: Returns the settings for a specific voice. +* `getVoice(String voiceId)`: Returns metadata about a specific voice. + +Example: + +[source,java] +---- +// Get all voices +ResponseEntity voicesResponse = voicesApi.getVoices(); +List voices = voicesResponse.getBody().voices(); + +// Get default voice settings +ResponseEntity defaultSettingsResponse = voicesApi.getDefaultVoiceSettings(); +ElevenLabsVoicesApi.VoiceSettings defaultSettings = defaultSettingsResponse.getBody(); + +// Get settings for a specific voice +ResponseEntity voiceSettingsResponse = voicesApi.getVoiceSettings(voiceId); +ElevenLabsVoicesApi.VoiceSettings voiceSettings = voiceSettingsResponse.getBody(); + +// Get details for a specific voice +ResponseEntity voiceDetailsResponse = voicesApi.getVoice(voiceId); +ElevenLabsVoicesApi.Voice voiceDetails = voiceDetailsResponse.getBody(); +---- \ No newline at end of file diff --git a/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/DefaultTextToSpeechOptions.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/DefaultTextToSpeechOptions.java new file mode 100644 index 00000000000..48a23433608 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/DefaultTextToSpeechOptions.java @@ -0,0 +1,147 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.audio.tts; + +import java.util.Objects; + +import com.fasterxml.jackson.annotation.JsonInclude; + +/** + * Default implementation of the {@link TextToSpeechOptions} interface. + * + * @author Alexandros Pappas + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class DefaultTextToSpeechOptions implements TextToSpeechOptions { + + private final String model; + + private final String voice; + + private final String format; + + private final Double speed; + + private DefaultTextToSpeechOptions(String model, String voice, String format, Double speed) { + this.model = model; + this.voice = voice; + this.format = format; + this.speed = speed; + } + + public static Builder builder() { + return new Builder(); + } + + @Override + public String getModel() { + return this.model; + } + + @Override + public String getVoice() { + return this.voice; + } + + @Override + public String getFormat() { + return this.format; + } + + @Override + public Double getSpeed() { + return this.speed; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof DefaultTextToSpeechOptions that)) + return false; + return Objects.equals(model, that.model) && Objects.equals(voice, that.voice) + && Objects.equals(format, that.format) && Objects.equals(speed, that.speed); + } + + @Override + public int hashCode() { + return Objects.hash(model, voice, format, speed); + } + + @Override + public String toString() { + return "DefaultTextToSpeechOptions{" + "model='" + model + '\'' + ", voice='" + voice + '\'' + ", format='" + + format + '\'' + ", speed=" + speed + '}'; + } + + @Override + @SuppressWarnings("unchecked") + public DefaultTextToSpeechOptions copy() { + return new Builder(this).build(); + } + + public static class Builder implements TextToSpeechOptions.Builder { + + private String model; + + private String voice; + + private String format; + + private Double speed; + + public Builder() { + } + + private Builder(DefaultTextToSpeechOptions options) { + this.model = options.model; + this.voice = options.voice; + this.format = options.format; + this.speed = options.speed; + } + + @Override + public Builder model(String model) { + this.model = model; + return this; + } + + @Override + public Builder voice(String voice) { + this.voice = voice; + return this; + } + + @Override + public Builder format(String format) { + this.format = format; + return this; + } + + @Override + public Builder speed(Double speed) { + this.speed = speed; + return this; + } + + public DefaultTextToSpeechOptions build() { + return new DefaultTextToSpeechOptions(this.model, this.voice, this.format, this.speed); + } + + } + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/Speech.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/Speech.java new file mode 100644 index 00000000000..794d2a2b390 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/Speech.java @@ -0,0 +1,67 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.audio.tts; + +import java.util.Arrays; +import java.util.Objects; + +import org.springframework.ai.model.ModelResult; +import org.springframework.ai.model.ResultMetadata; + +/** + * Implementation of the {@link ModelResult} interface for the speech model. + * + * @author Alexandros Pappas + */ +public class Speech implements ModelResult { + + private final byte[] speech; + + public Speech(byte[] speech) { + this.speech = speech; + } + + @Override + public byte[] getOutput() { + return this.speech; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof Speech speech1)) + return false; + return Arrays.equals(speech, speech1.speech); + } + + @Override + public int hashCode() { + return Objects.hash(Arrays.hashCode(speech)); + } + + @Override + public String toString() { + return "Speech{" + "speech=" + Arrays.toString(speech) + '}'; + } + + @Override + public ResultMetadata getMetadata() { + return null; + } + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/StreamingTextToSpeechModel.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/StreamingTextToSpeechModel.java new file mode 100644 index 00000000000..f342b0fb0aa --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/StreamingTextToSpeechModel.java @@ -0,0 +1,45 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.audio.tts; + +import reactor.core.publisher.Flux; + +import org.springframework.ai.model.StreamingModel; + +/** + * Interface for the streaming text to speech model. + * + * @author Alexandros Pappas + */ +public interface StreamingTextToSpeechModel extends StreamingModel { + + default Flux stream(String text) { + TextToSpeechPrompt prompt = new TextToSpeechPrompt(text); + return stream(prompt).map(response -> (response.getResult() == null || response.getResult().getOutput() == null) + ? new byte[0] : response.getResult().getOutput()); + } + + default Flux stream(String text, TextToSpeechOptions options) { + TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options); + return stream(prompt).map(response -> (response.getResult() == null || response.getResult().getOutput() == null) + ? new byte[0] : response.getResult().getOutput()); + } + + @Override + Flux stream(TextToSpeechPrompt prompt); + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechMessage.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechMessage.java new file mode 100644 index 00000000000..d6d299a26bb --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechMessage.java @@ -0,0 +1,58 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.audio.tts; + +import java.util.Objects; + +/** + * Implementation of the {@link TextToSpeechMessage} interface for the text to speech + * message. + * + * @author Alexandros Pappas + */ +public class TextToSpeechMessage { + + private final String text; + + public TextToSpeechMessage(String text) { + this.text = text; + } + + public String getText() { + return text; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof TextToSpeechMessage that)) + return false; + return Objects.equals(text, that.text); + } + + @Override + public int hashCode() { + return Objects.hash(text); + } + + @Override + public String toString() { + return "TextToSpeechMessage{" + "text='" + text + '\'' + '}'; + } + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechModel.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechModel.java new file mode 100644 index 00000000000..1f417992acd --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechModel.java @@ -0,0 +1,42 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.audio.tts; + +import org.springframework.ai.model.Model; +import org.springframework.ai.model.ModelResult; + +/** + * Interface for the text to speech model. + * + * @author Alexandros Pappas + */ +public interface TextToSpeechModel extends Model { + + default byte[] call(String text) { + TextToSpeechPrompt prompt = new TextToSpeechPrompt(text); + ModelResult result = call(prompt).getResult(); + return (result != null) ? result.getOutput() : new byte[0]; + } + + @Override + TextToSpeechResponse call(TextToSpeechPrompt prompt); + + default TextToSpeechOptions getDefaultOptions() { + return TextToSpeechOptions.builder().build(); + } + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechOptions.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechOptions.java new file mode 100644 index 00000000000..9a3e8de1a1b --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechOptions.java @@ -0,0 +1,114 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.audio.tts; + +import org.springframework.ai.model.ModelOptions; +import org.springframework.lang.Nullable; + +/** + * Interface for text-to-speech model options. Defines the common, portable options that + * should be supported by all implementations. + * + * @author Alexandros Pappas + */ +public interface TextToSpeechOptions extends ModelOptions { + + /** + * Creates a new {@link TextToSpeechOptions.Builder} to create the default + * {@link TextToSpeechOptions}. + * @return Returns a new {@link TextToSpeechOptions.Builder}. + */ + static TextToSpeechOptions.Builder builder() { + return new DefaultTextToSpeechOptions.Builder(); + } + + /** + * Returns the model to use for text-to-speech. + * @return The model name. + */ + @Nullable + String getModel(); + + /** + * Returns the voice to use for text-to-speech. + * @return The voice identifier. + */ + @Nullable + String getVoice(); + + /** + * Returns the output format for the generated audio. + * @return The output format (e.g., "mp3", "wav"). + */ + @Nullable + String getFormat(); + + /** + * Returns the speed of the generated speech. + * @return The speech speed. + */ + @Nullable + Double getSpeed(); + + /** + * Returns a copy of this {@link TextToSpeechOptions}. + * @return a copy of this {@link TextToSpeechOptions} + */ + T copy(); + + /** + * Builder for {@link TextToSpeechOptions}. + */ + interface Builder { + + /** + * Sets the model to use for text-to-speech. + * @param model The model name. + * @return This builder. + */ + Builder model(String model); + + /** + * Sets the voice to use for text-to-speech. + * @param voice The voice identifier. + * @return This builder. + */ + Builder voice(String voice); + + /** + * Sets the output format for the generated audio. + * @param format The output format (e.g., "mp3", "wav"). + * @return This builder. + */ + Builder format(String format); + + /** + * Sets the speed of the generated speech. + * @param speed The speech speed. + * @return This builder. + */ + Builder speed(Double speed); + + /** + * Builds the {@link TextToSpeechOptions}. + * @return The {@link TextToSpeechOptions}. + */ + TextToSpeechOptions build(); + + } + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechPrompt.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechPrompt.java new file mode 100644 index 00000000000..f679018df51 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechPrompt.java @@ -0,0 +1,84 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.audio.tts; + +import java.util.Objects; + +import org.springframework.ai.model.ModelRequest; + +/** + * Implementation of the {@link ModelRequest} interface for the text to speech prompt. + * + * @author Alexandros Pappas + */ +public class TextToSpeechPrompt implements ModelRequest { + + private final TextToSpeechMessage message; + + private TextToSpeechOptions options; + + public TextToSpeechPrompt(String text) { + this(new TextToSpeechMessage(text), TextToSpeechOptions.builder().build()); + } + + public TextToSpeechPrompt(String text, TextToSpeechOptions options) { + this(new TextToSpeechMessage(text), options); + } + + public TextToSpeechPrompt(TextToSpeechMessage message) { + this(message, TextToSpeechOptions.builder().build()); + } + + public TextToSpeechPrompt(TextToSpeechMessage message, TextToSpeechOptions options) { + this.message = message; + this.options = options; + } + + @Override + public TextToSpeechMessage getInstructions() { + return this.message; + } + + @Override + public TextToSpeechOptions getOptions() { + return this.options; + } + + public void setOptions(TextToSpeechOptions options) { + this.options = options; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof TextToSpeechPrompt that)) + return false; + return Objects.equals(message, that.message) && Objects.equals(options, that.options); + } + + @Override + public int hashCode() { + return Objects.hash(message, options); + } + + @Override + public String toString() { + return "TextToSpeechPrompt{" + "message=" + message + ", options=" + options + '}'; + } + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponse.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponse.java new file mode 100644 index 00000000000..00cfab133fc --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponse.java @@ -0,0 +1,78 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.audio.tts; + +import java.util.List; +import java.util.Objects; + +import org.springframework.ai.model.ModelResponse; +import org.springframework.ai.model.ResponseMetadata; + +/** + * Implementation of the {@link ModelResponse} interface for the text to speech response. + * + * @author Alexandros Pappas + */ +public class TextToSpeechResponse implements ModelResponse { + + private final List results; + + private final TextToSpeechResponseMetadata textToSpeechResponseMetadata; + + public TextToSpeechResponse(List results) { + this(results, null); + } + + public TextToSpeechResponse(List results, TextToSpeechResponseMetadata textToSpeechResponseMetadata) { + this.results = results; + this.textToSpeechResponseMetadata = textToSpeechResponseMetadata; + } + + @Override + public List getResults() { + return this.results; + } + + public Speech getResult() { + return this.results.get(0); + } + + @Override + public TextToSpeechResponseMetadata getMetadata() { + return this.textToSpeechResponseMetadata; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof TextToSpeechResponse that)) + return false; + return Objects.equals(results, that.results); + } + + @Override + public int hashCode() { + return Objects.hash(results); + } + + @Override + public String toString() { + return "TextToSpeechResponse{" + "results=" + results + '}'; + } + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponseMetadata.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponseMetadata.java new file mode 100644 index 00000000000..f581b167064 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/audio/tts/TextToSpeechResponseMetadata.java @@ -0,0 +1,12 @@ +package org.springframework.ai.audio.tts; + +import org.springframework.ai.model.MutableResponseMetadata; + +/** + * Metadata associated with an audio transcription response. + * + * @author Alexandros Pappas + */ +public class TextToSpeechResponseMetadata extends MutableResponseMetadata { + +} diff --git a/spring-ai-model/src/test/java/org/springframework/ai/audio/tts/DefaultTextToSpeechOptionsTests.java b/spring-ai-model/src/test/java/org/springframework/ai/audio/tts/DefaultTextToSpeechOptionsTests.java new file mode 100644 index 00000000000..7194a42214e --- /dev/null +++ b/spring-ai-model/src/test/java/org/springframework/ai/audio/tts/DefaultTextToSpeechOptionsTests.java @@ -0,0 +1,67 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.audio.tts; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.within; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for {@link DefaultTextToSpeechOptions}. + * + * @author Alexandros Pappas + */ +class DefaultTextToSpeechOptionsTests { + + @Test + void testBuilderWithAllFields() { + TextToSpeechOptions options = DefaultTextToSpeechOptions.builder() + .model("test-model") + .voice("test-voice") + .format("test-format") + .speed(0.8) + .build(); + + assertThat(options.getModel()).isEqualTo("test-model"); + assertThat(options.getVoice()).isEqualTo("test-voice"); + assertThat(options.getFormat()).isEqualTo("test-format"); + assertThat(options.getSpeed()).isCloseTo(0.8, within(0.0001)); + } + + @Test + void testCopy() { + TextToSpeechOptions original = DefaultTextToSpeechOptions.builder() + .model("test-model") + .voice("test-voice") + .format("test-format") + .speed(0.8) + .build(); + + DefaultTextToSpeechOptions copied = original.copy(); + assertThat(copied).isNotSameAs(original).isEqualTo(original); + } + + @Test + void testDefaultValues() { + DefaultTextToSpeechOptions options = DefaultTextToSpeechOptions.builder().build(); + assertThat(options.getModel()).isNull(); + assertThat(options.getVoice()).isNull(); + assertThat(options.getFormat()).isNull(); + assertThat(options.getSpeed()).isNull(); + } + +} diff --git a/spring-ai-spring-boot-starters/spring-ai-starter-model-elevenlabs/pom.xml b/spring-ai-spring-boot-starters/spring-ai-starter-model-elevenlabs/pom.xml new file mode 100644 index 00000000000..a9961ab0a87 --- /dev/null +++ b/spring-ai-spring-boot-starters/spring-ai-starter-model-elevenlabs/pom.xml @@ -0,0 +1,44 @@ + + + 4.0.0 + + org.springframework.ai + spring-ai-parent + 1.1.0-SNAPSHOT + ../../pom.xml + + spring-ai-starter-model-elevenlabs + jar + Spring AI Starter - ElevenLabs + Spring AI ElevenLabs Auto Configuration + https://github.com/spring-projects/spring-ai + + + https://github.com/spring-projects/spring-ai + git://github.com/spring-projects/spring-ai.git + git@github.com:spring-projects/spring-ai.git + + + + + + org.springframework.boot + spring-boot-starter + + + + org.springframework.ai + spring-ai-autoconfigure-model-elevenlabs + ${project.parent.version} + + + + org.springframework.ai + spring-ai-elevenlabs + ${project.parent.version} + + + +