Skip to content

Commit

Permalink
feat: external openai tts support
Browse files Browse the repository at this point in the history
  • Loading branch information
tjbck committed Apr 20, 2024
1 parent 713934e commit cbd18ec
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 71 deletions.
86 changes: 41 additions & 45 deletions backend/apps/audio/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,61 +101,57 @@ async def update_openai_config(

@app.post("/speech")
async def speech(request: Request, user=Depends(get_verified_user)):
idx = None
try:
body = await request.body()
name = hashlib.sha256(body).hexdigest()

file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3")
file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json")
body = await request.body()
name = hashlib.sha256(body).hexdigest()

# Check if the file already exists in the cache
if file_path.is_file():
return FileResponse(file_path)
file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3")
file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json")

headers = {}
headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}"
headers["Content-Type"] = "application/json"
# Check if the file already exists in the cache
if file_path.is_file():
return FileResponse(file_path)

r = None
try:
r = requests.post(
url=f"{app.state.OPENAI_API_BASE_URL}/audio/speech",
data=body,
headers=headers,
stream=True,
)
headers = {}
headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}"
headers["Content-Type"] = "application/json"

r.raise_for_status()
r = None
try:
r = requests.post(
url=f"{app.state.OPENAI_API_BASE_URL}/audio/speech",
data=body,
headers=headers,
stream=True,
)

# Save the streaming content to a file
with open(file_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
r.raise_for_status()

with open(file_body_path, "w") as f:
json.dump(json.loads(body.decode("utf-8")), f)
# Save the streaming content to a file
with open(file_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)

# Return the saved file
return FileResponse(file_path)
with open(file_body_path, "w") as f:
json.dump(json.loads(body.decode("utf-8")), f)

except Exception as e:
log.exception(e)
error_detail = "Open WebUI: Server Connection Error"
if r is not None:
try:
res = r.json()
if "error" in res:
error_detail = f"External: {res['error']}"
except:
error_detail = f"External: {e}"
# Return the saved file
return FileResponse(file_path)

raise HTTPException(
status_code=r.status_code if r else 500, detail=error_detail
)
except Exception as e:
log.exception(e)
error_detail = "Open WebUI: Server Connection Error"
if r is not None:
try:
res = r.json()
if "error" in res:
error_detail = f"External: {res['error']['message']}"
except:
error_detail = f"External: {e}"

except ValueError:
raise HTTPException(status_code=401, detail=ERROR_MESSAGES.OPENAI_NOT_FOUND)
raise HTTPException(
status_code=r.status_code if r != None else 500,
detail=error_detail,
)


@app.post("/transcriptions")
Expand Down
78 changes: 73 additions & 5 deletions src/lib/apis/audio/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,67 @@
import { AUDIO_API_BASE_URL } from '$lib/constants';

export const getAudioConfig = async (token: string) => {
let error = null;

const res = await fetch(`${AUDIO_API_BASE_URL}/config`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`
}
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.catch((err) => {
console.log(err);
error = err.detail;
return null;
});

if (error) {
throw error;
}

return res;
};

type OpenAIConfigForm = {
url: string;
key: string;
};

export const updateAudioConfig = async (token: string, payload: OpenAIConfigForm) => {
let error = null;

const res = await fetch(`${AUDIO_API_BASE_URL}/config/update`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`
},
body: JSON.stringify({
...payload
})
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.catch((err) => {
console.log(err);
error = err.detail;
return null;
});

if (error) {
throw error;
}

return res;
};

export const transcribeAudio = async (token: string, file: File) => {
const data = new FormData();
data.append('file', file);
Expand Down Expand Up @@ -48,11 +110,17 @@ export const synthesizeOpenAISpeech = async (
input: text,
voice: speaker
})
}).catch((err) => {
console.log(err);
error = err;
return null;
});
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res;
})
.catch((err) => {
error = err.detail;
console.log(err);

return null;
});

if (error) {
throw error;
Expand Down
13 changes: 9 additions & 4 deletions src/lib/components/chat/Messages/ResponseMessage.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,12 @@
const toggleSpeakMessage = async () => {
if (speaking) {
speechSynthesis.cancel();
try {
speechSynthesis.cancel();
sentencesAudio[speakingIdx].pause();
sentencesAudio[speakingIdx].currentTime = 0;
sentencesAudio[speakingIdx].pause();
sentencesAudio[speakingIdx].currentTime = 0;
} catch {}
speaking = null;
speakingIdx = null;
Expand Down Expand Up @@ -221,6 +223,10 @@
sentence
).catch((error) => {
toast.error(error);
speaking = null;
loadingSpeech = false;
return null;
});
Expand All @@ -230,7 +236,6 @@
const audio = new Audio(blobUrl);
sentencesAudio[idx] = audio;
loadingSpeech = false;
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
}
}
Expand Down
60 changes: 52 additions & 8 deletions src/lib/components/chat/Settings/Audio.svelte
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
<script lang="ts">
import { getAudioConfig, updateAudioConfig } from '$lib/apis/audio';
import { createEventDispatcher, onMount, getContext } from 'svelte';
import { toast } from 'svelte-sonner';
const dispatch = createEventDispatcher();
Expand All @@ -9,6 +10,9 @@
// Audio
let OpenAIUrl = '';
let OpenAIKey = '';
let STTEngines = ['', 'openai'];
let STTEngine = '';
Expand Down Expand Up @@ -69,6 +73,18 @@
saveSettings({ speechAutoSend: speechAutoSend });
};
const updateConfigHandler = async () => {
const res = await updateAudioConfig(localStorage.token, {
url: OpenAIUrl,
key: OpenAIKey
});
if (res) {
OpenAIUrl = res.OPENAI_API_BASE_URL;
OpenAIKey = res.OPENAI_API_KEY;
}
};
onMount(async () => {
let settings = JSON.parse(localStorage.getItem('settings') ?? '{}');
Expand All @@ -85,12 +101,20 @@
} else {
getWebAPIVoices();
}
const res = await getAudioConfig(localStorage.token);
if (res) {
OpenAIUrl = res.OPENAI_API_BASE_URL;
OpenAIKey = res.OPENAI_API_KEY;
}
});
</script>

<form
class="flex flex-col h-full justify-between space-y-3 text-sm"
on:submit|preventDefault={() => {
on:submit|preventDefault={async () => {
await updateConfigHandler();
saveSettings({
audio: {
STTEngine: STTEngine !== '' ? STTEngine : undefined,
Expand All @@ -101,7 +125,7 @@
dispatch('save');
}}
>
<div class=" space-y-3 pr-1.5 overflow-y-scroll max-h-80">
<div class=" space-y-3 pr-1.5 overflow-y-scroll max-h-[22rem]">
<div>
<div class=" mb-1 text-sm font-medium">{$i18n.t('STT Settings')}</div>

Expand Down Expand Up @@ -196,6 +220,24 @@
</div>
</div>

{#if TTSEngine === 'openai'}
<div class="mt-1 flex gap-2 mb-1">
<input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
placeholder={$i18n.t('API Base URL')}
bind:value={OpenAIUrl}
required
/>

<input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
placeholder={$i18n.t('API Key')}
bind:value={OpenAIKey}
required
/>
</div>
{/if}

<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('Auto-playback response')}</div>

Expand Down Expand Up @@ -241,16 +283,18 @@
<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
<div class="flex w-full">
<div class="flex-1">
<select
class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-800 outline-none"
<input
list="voice-list"
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={speaker}
placeholder="Select a voice"
>
/>

<datalist id="voice-list">
{#each voices as voice}
<option value={voice.name} class="bg-gray-100 dark:bg-gray-700">{voice.name}</option
>
<option value={voice.name} />
{/each}
</select>
</datalist>
</div>
</div>
</div>
Expand Down
Loading

0 comments on commit cbd18ec

Please sign in to comment.