Skip to content

Commit

Permalink
feat(tts): add elevenlabs support
Browse files Browse the repository at this point in the history
  • Loading branch information
sogehige committed Jan 2, 2024
1 parent 1c23226 commit 901c151
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 3 deletions.
3 changes: 2 additions & 1 deletion d.ts/src/helpers/socket.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,8 @@ export type ClientToServerEventsWithNamespace = {
'events::remove': (eventId: Required<Event['id']>, cb: (error: Error | string | null | unknown) => void) => void,
},
'/core/tts': GenericEvents & {
'speak': (opts: { service: TTSService, text: string, key: string, voice: string; volume: number; rate: number; pitch: number; triggerTTSByHighlightedMessage?: boolean; }, cb: (error: Error | string | null | unknown, b64mp3?: string) => void) => void,
'speak': (opts: { service: TTSService, text: string, key: string, voice: string; volume: number; rate: number; pitch: number; triggerTTSByHighlightedMessage?: boolean; } |
{ service: TTSService.ELEVENLABS, text: string, key: string, voice: string; volume: number; clarity: number; stability: number; exaggeration: number; triggerTTSByHighlightedMessage?: boolean; }, cb: (error: Error | string | null | unknown, b64mp3?: string) => void) => void,
},
'/core/ui': GenericEvents & {
'configuration': (cb: (error: Error | string | null | unknown, data?: Configuration) => void) => void,
Expand Down
8 changes: 8 additions & 0 deletions src/database/entity/overlay.ts
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ export enum TTSService {
RESPONSIVEVOICE = '0',
GOOGLE = '1',
SPEECHSYNTHESIS = '2',
ELEVENLABS = '3',
}

export interface Alerts {
Expand All @@ -360,6 +361,13 @@ export interface Alerts {
// we are using this to store tts settings for each service, so if we are changing service, then we can previously set settings
services: {
[TTSService.NONE]?: null,
[TTSService.ELEVENLABS]?: {
voice: string;
clarity: number;
stability: number;
exaggeration: number;
volume: number;
},
[TTSService.SPEECHSYNTHESIS]?: {
voice: string;
pitch: number;
Expand Down
55 changes: 53 additions & 2 deletions src/tts.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { randomUUID } from 'crypto';

import axios from 'axios';
import { JWT } from 'google-auth-library';
import { google } from 'googleapis';

Expand Down Expand Up @@ -44,6 +45,9 @@ class TTS extends Core {
@settings()
googleVoices: string[] = [];

@settings()
elevenlabsApiKey = '';

sockets() {
adminEndpoint('/core/tts', 'settings.refresh', async () => {
this.initializeTTSServices(); // reset settings
Expand All @@ -52,8 +56,16 @@ class TTS extends Core {
publicEndpoint('/core/tts', 'speak', async (opts, cb) => {
if (secureKeys.has(opts.key)) {
secureKeys.delete(opts.key);
if (opts.service === TTSService.ELEVENLABS) {
const audioContent = await this.elevenlabsSpeak(opts as any);

if (opts.service === TTSService.GOOGLE) {
if (!audioContent) {
throw new Error('Something went wrong');
}
if (cb) {
cb(null, audioContent);
}
} else if (opts.service === TTSService.GOOGLE) {
const audioContent = await this.googleSpeak(opts);

if (!audioContent) {
Expand All @@ -72,7 +84,16 @@ class TTS extends Core {

adminEndpoint('/core/tts', 'speak', async (opts, cb) => {
try {
if (opts.service === TTSService.GOOGLE) {
if (opts.service === TTSService.ELEVENLABS) {
const audioContent = await this.elevenlabsSpeak(opts as any);

if (!audioContent) {
throw new Error('Something went wrong');
}
if (cb) {
cb(null, audioContent);
}
} else if (opts.service === TTSService.GOOGLE) {
const audioContent = await this.googleSpeak(opts);

if (!audioContent) {
Expand Down Expand Up @@ -190,6 +211,36 @@ class TTS extends Core {
this.initializeResponsiveVoiceTTS();
}

async elevenlabsSpeak(opts: {
voice: string;
text: string;
clarity: number;
stability: number;
exaggeration: number;
}) {
const response = await axios(`https://api.elevenlabs.io/v1/text-to-speech/${opts.voice}`, {
method: 'POST',
headers: {
'xi-api-key': this.elevenlabsApiKey,
'Content-Type': 'application/json',
},
responseType: 'arraybuffer',
data: {
model_id: 'eleven_multilingual_v2',
text: opts.text,
output_format: 'mp3_44100_128', // default, but for clarity
voice_settings: {
similarity_boost: opts.clarity,
stability: opts.stability,
style: opts.exaggeration,
use_speaker_boost: true,
},
},
});

return btoa(String.fromCharCode(...new Uint8Array(response.data)));
}

async googleSpeak(opts: {
volume: number;
pitch: number;
Expand Down

0 comments on commit 901c151

Please sign in to comment.