Skip to content

Commit

Permalink
✨ feat: Add new section to README.md and create new API file
Browse files Browse the repository at this point in the history
- Add a new section to the README.md file
- Create a new file in the API directory
- Implement functions and interfaces for fetching and processing speech data from various APIs
- Modify functions related to speech synthesis
- Update the utterance object based on provided options
- Handle dependencies for the useEffect hook
- Update the voiceList state when the voices change

These changes introduce new features and improve the functionality of the code for speech data processing and synthesis.
  • Loading branch information
canisminor1990 committed Nov 7, 2023
1 parent 58770b5 commit 630b586
Show file tree
Hide file tree
Showing 19 changed files with 161 additions and 183 deletions.
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ A high-quality & reliable TTS React Hooks library
- [Compile with Next.js](#compile-with-nextjs)
- [🛳 Self Hosting](#-self-hosting)
- [Deploy to Vercel](#deploy-to-vercel)
- [Environment Variable](#environment-variable)
- [⌨️ Local Development](#️-local-development)
- [🤝 Contributing](#-contributing)
- [🔗 More Products](#-more-products)
Expand Down Expand Up @@ -81,6 +82,21 @@ Click button below to deploy your private plugins' gateway.

[![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2Flobehub%2Flobe-tts&project-name=lobe-tts&repository-name=lobe-tts)

### Environment Variable

This project provides some additional configuration items set with environment variables:

| Environment Variable | Description | Example |
| -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------- |
| `OPENAI_API_KEY` | This is the API key you apply on the OpenAI account page | `sk-xxxxxx...xxxxxx` |
| `OPENAI_PROXY_URL` | If you manually configure the OpenAI interface proxy, you can use this configuration item to override the default OpenAI API request base URL | `https://api.chatanywhere.cn/v1`<br/>The default value is<br/>`https://api.openai.com/v1` |
| `AZURE_SPEECH_KEY` | This is the API key of Azure Speech Service | |
| `AZURE_SPEECH_REGION` | This is the region of Azure Speech Service | |
| `MICROSOFT_SPEECH_PROXY_URL` | If you manually configure the Microsoft Speech interface proxy, you can use this configuration item to override the default Speech API request base URL | |
| `MICROSOFT_SPEECH_ALLOW_ORIGINS` | Allow origins , string or string array | |
| `EDDGE_API_TOKEN` | This is the API key of Edge Speech Service | `6A5AA1D4EAFF4E9FB37E23D68491D6F4` |
| `EDDGE_PROXY_URL` | If you manually configure the Edge interface proxy, you can use this configuration item to override the default Edge wss request base URL | `wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1` |

<div align="right">

[![][back-to-top]](#readme-top)
Expand Down
19 changes: 0 additions & 19 deletions api/index.ts

This file was deleted.

31 changes: 31 additions & 0 deletions api/microsoft-speech.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import cors from '../lib/cors';

export const config = {
runtime: 'edge',
};

const API =
'https://southeastasia.api.speech.microsoft.com/accfreetrial/texttospeech/acc/v3.0-beta1/vcg/speak';

const MICROSOFT_SPEECH_ALLOW_ORIGINS =
process.env?.MICROSOFT_SPEECH_ALLOW_ORIGINS?.split(',') || undefined;

export default async (req: Request) => {
if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 });

let origin = '*';

if (MICROSOFT_SPEECH_ALLOW_ORIGINS) {
const reqOrigin = req.headers.get('origin');
if (reqOrigin && MICROSOFT_SPEECH_ALLOW_ORIGINS.includes(reqOrigin)) {
origin = reqOrigin;
} else {
return new Response('Origin Not Allowed', { status: 403 });
}
}

const res = await fetch(API, { body: req.body, headers: req.headers, method: 'POST' });
const newResponse = new Response(res.body, res);

return cors(req, newResponse, { methods: ['POST'], origin });
};
42 changes: 0 additions & 42 deletions lib/fetchMicrosoftSpeech.ts

This file was deleted.

35 changes: 0 additions & 35 deletions lib/genSSML.ts

This file was deleted.

7 changes: 6 additions & 1 deletion src/const/api.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
import urlJoin from 'url-join';

export const MICROSOFT_SPEECH_PROXY_URL = process.env.MICROSOFT_SPEECH_PROXY_URL || '';
export const MICROSOFT_SPEECH_PROXY_URL =
process.env.MICROSOFT_SPEECH_PROXY_URL || '/api/microsoft-speech';
export const AZURE_SPEECH_KEY = process.env.AZURE_SPEECH_KEY || '';
export const AZURE_SPEECH_REGION = process.env.AZURE_SPEECH_REGION || '';
export const OPENAI_API_KEY = process.env.OPENAI_API_KEY || '';
export const OPENAI_PROXY_URL = process.env.OPENAI_PROXY_URL || 'https://api.openai.com/v1';
export const OPENAI_TTS_URL = (api?: string) => urlJoin(api || OPENAI_PROXY_URL, 'audio/speech');
export const OPENAI_STT_URL = (api: string) =>
urlJoin(api || OPENAI_PROXY_URL, 'audio/transcriptions');
export const EDDGE_PROXY_URL =
process.env.EDDGE_PROXY_URL ||
'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1';
export const EDDGE_API_TOKEN = process.env.EDDGE_API_TOKEN || '6A5AA1D4EAFF4E9FB37E23D68491D6F4';
10 changes: 5 additions & 5 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
export { fetchAzureSpeech } from './services/fetchAzureSpeech';
export { fetchEdgeSpeech } from './services/fetchEdgeSpeech';
export { fetchMicrosoftSpeech } from './services/fetchMicrosoftSpeech';
export { fetchOpenaiSTT } from './services/fetchOpenaiSTT';
export { fetchOpenaiTTS } from './services/fetchOpenaiTTS';
export { type AzureSpeechOptions, fetchAzureSpeech } from './services/fetchAzureSpeech';
export { type EdgeSpeechOptions, fetchEdgeSpeech } from './services/fetchEdgeSpeech';
export { fetchMicrosoftSpeech, type MicrosoftSpeechOptions } from './services/fetchMicrosoftSpeech';
export { fetchOpenaiSTT, type OpenaiSttOptions } from './services/fetchOpenaiSTT';
export { fetchOpenaiTTS, type OpenaiTtsOptions } from './services/fetchOpenaiTTS';
export { useAzureSpeech } from './useAzureSpeech';
export { useEdgeSpeech } from './useEdgeSpeech';
export { useMicrosoftSpeech } from './useMicrosoftSpeech';
Expand Down
3 changes: 1 addition & 2 deletions src/services/fetchAzureSpeech.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ import {
} from 'microsoft-cognitiveservices-speech-sdk';

import { AZURE_SPEECH_KEY, AZURE_SPEECH_REGION } from '@/const/api';

import { type SsmlOptions, genSSML } from '../utils/genSSML';
import { type SsmlOptions, genSSML } from '@/utils/genSSML';

export interface AzureSpeechOptions extends SsmlOptions {
api: {
Expand Down
22 changes: 13 additions & 9 deletions src/services/fetchEdgeSpeech.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import qs from 'query-string';
import { v4 as uuidv4 } from 'uuid';

import { type SsmlOptions, genSSML } from '../utils/genSSML';
import { genSendContent } from '../utils/genSendContent';
import { getHeadersAndData } from '../utils/getHeadersAndData';
import { EDDGE_API_TOKEN, EDDGE_PROXY_URL } from '@/const/api';
import { type SsmlOptions, genSSML } from '@/utils/genSSML';
import { genSendContent } from '@/utils/genSendContent';
import { getHeadersAndData } from '@/utils/getHeadersAndData';

const API = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1';
const TOKEN = '6A5AA1D4EAFF4E9FB37E23D68491D6F4';

export const fetchEdgeSpeech = async (text: string, options: SsmlOptions) => {
export interface EdgeSpeechOptions extends Pick<SsmlOptions, 'name'> {
api: {
key: string;
proxy: string;
};
}
export const fetchEdgeSpeech = async (text: string, { api, ...options }: EdgeSpeechOptions) => {
const connectId = uuidv4().replaceAll('-', '');
const date = new Date().toString();
const audioContext = new AudioContext();
Expand All @@ -18,9 +22,9 @@ export const fetchEdgeSpeech = async (text: string, options: SsmlOptions) => {
qs.stringifyUrl({
query: {
ConnectionId: connectId,
TrustedClientToken: TOKEN,
TrustedClientToken: api.key || EDDGE_API_TOKEN,
},
url: API,
url: api.proxy || EDDGE_PROXY_URL,
}),
);
ws.binaryType = 'arraybuffer';
Expand Down
45 changes: 36 additions & 9 deletions src/services/fetchMicrosoftSpeech.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import qs from 'query-string';
import { v4 as uuidv4 } from 'uuid';

import { MICROSOFT_SPEECH_PROXY_URL } from '@/const/api';

import { type SsmlOptions } from '../utils/genSSML';
import { type SsmlOptions } from '@/utils/genSSML';
import { genSSML } from '@/utils/genSSML';

export interface MicrosoftSpeechOptions extends SsmlOptions {
api?: string;
Expand All @@ -12,12 +12,39 @@ export const fetchMicrosoftSpeech = async (
text: string,
{ api, ...options }: MicrosoftSpeechOptions,
): Promise<AudioBufferSourceNode> => {
const response: Response = await fetch(
qs.stringifyUrl({
query: { text, ...options },
url: api || MICROSOFT_SPEECH_PROXY_URL,
}),
);
const data = JSON.stringify({
offsetInPlainText: 0,
properties: {
SpeakTriggerSource: 'AccTuningPagePlayButton',
},
ssml: genSSML(text, options),
ttsAudioFormat: 'audio-24khz-160kbitrate-mono-mp3',
});

const DEFAULT_HEADERS = new Headers({
'accept': '*/*',
'accept-language': 'zh-CN,zh;q=0.9',
'authority': 'southeastasia.api.speech.microsoft.com',
'content-type': 'application/json',
'customvoiceconnectionid': uuidv4(),
'origin': 'https://speech.microsoft.com',
'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
});

const response: Response = await fetch(api || MICROSOFT_SPEECH_PROXY_URL, {
body: data,
headers: DEFAULT_HEADERS,
method: 'POST',
// @ts-ignore
responseType: 'arraybuffer',
});

if (!response.ok) {
throw new Error('Network response was not ok');
Expand Down
4 changes: 2 additions & 2 deletions src/services/fetchOpenaiSTT.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { v4 as uuidv4 } from 'uuid';

import { OPENAI_API_KEY, OPENAI_STT_URL } from '@/const/api';

export interface OpenaiTtsOptions {
export interface OpenaiSttOptions {
api: {
key: string;
proxy: string;
Expand All @@ -13,7 +13,7 @@ export interface OpenaiTtsOptions {
// 纯文本生成语音
export const fetchOpenaiSTT = async (
speech: Blob,
{ api, model = 'whisper-1' }: OpenaiTtsOptions,
{ api, model = 'whisper-1' }: OpenaiSttOptions,
): Promise<string> => {
const key = api.key || OPENAI_API_KEY;
const url = OPENAI_STT_URL(api.proxy);
Expand Down
5 changes: 2 additions & 3 deletions src/services/fetchOpenaiTTS.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import { OPENAI_API_KEY, OPENAI_TTS_URL } from '@/const/api';

import { type SsmlOptions } from '../utils/genSSML';
import { type SsmlOptions } from '@/utils/genSSML';

export type OpenaiVoice = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';

export interface OpenaiTtsOptions extends SsmlOptions {
export interface OpenaiTtsOptions extends Pick<SsmlOptions, 'name'> {
api: {
key: string;
proxy: string;
Expand Down
11 changes: 7 additions & 4 deletions src/useAzureSpeech/index.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
import { useState } from 'react';
import useSWR from 'swr';

import { AzureSpeechOptions, fetchAzureSpeech } from '../services/fetchAzureSpeech';
import { AzureSpeechOptions, fetchAzureSpeech } from '@/services/fetchAzureSpeech';

export const useAzureSpeech = (defaultText: string, options: AzureSpeechOptions) => {
export const useAzureSpeech = (
defaultText: string,
{ api, name, style, pitch, rate }: AzureSpeechOptions,
) => {
const [data, setDate] = useState<AudioBufferSourceNode>();
const [text, setText] = useState<string>(defaultText);
const [shouldFetch, setShouldFetch] = useState<boolean>(false);
const [isPlaying, setIsPlaying] = useState<boolean>(false);

const { isLoading } = useSWR(
shouldFetch ? [options.name, text].join('-') : null,
() => fetchAzureSpeech(text, options),
shouldFetch ? [name, text].join('-') : null,
() => fetchAzureSpeech(text, { api, name, pitch, rate, style }),
{
onError: () => setShouldFetch(false),
onSuccess: (audioBufferSource) => {
Expand Down
21 changes: 20 additions & 1 deletion src/useEdgeSpeech/demos/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,21 @@ const defaultText = '这是一段使用 Edge Speech 的语音演示';

export default () => {
const store = useCreateStore();

const api: any = useControls(
{
key: {
label: 'EDDGE_API_TOKEN',
value: '',
},
proxy: {
label: 'EDDGE_PROXY_URL',
value: '',
},
},
{ store },
);

const options: any = useControls(
{
name: {
Expand All @@ -17,7 +32,11 @@ export default () => {
},
{ store },
);
const { setText, isLoading, isPlaying, start, stop } = useEdgeSpeech(defaultText, options);

const { setText, isLoading, isPlaying, start, stop } = useEdgeSpeech(defaultText, {
api,
...options,
});
return (
<StoryBook levaStore={store}>
<Flexbox gap={8}>
Expand Down
Loading

0 comments on commit 630b586

Please sign in to comment.