✨ feat: Add new section to README.md and create new API file

- Add a new section to the README.md file - Create a new file in the API directory - Implement functions and interfaces for fetching and processing speech data from various APIs - Modify functions related to speech synthesis - Update the utterance object based on provided options - Handle dependencies for the useEffect hook - Update the voiceList state when the voices change These changes introduce new features and improve the functionality of the code for speech data processing and synthesis.
lobehub · Nov 7, 2023 · 630b586 · 630b586
1 parent 58770b5
commit 630b586
Show file tree

Hide file tree

Showing 19 changed files with 161 additions and 183 deletions.
diff --git a/README.md b/README.md
@@ -33,6 +33,7 @@ A high-quality & reliable TTS React Hooks library
   - [Compile with Next.js](#compile-with-nextjs)
 - [🛳 Self Hosting](#-self-hosting)
   - [Deploy to Vercel](#deploy-to-vercel)
+  - [Environment Variable](#environment-variable)
 - [⌨️ Local Development](#️-local-development)
 - [🤝 Contributing](#-contributing)
 - [🔗 More Products](#-more-products)
@@ -81,6 +82,21 @@ Click button below to deploy your private plugins' gateway.
 
 [![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2Flobehub%2Flobe-tts&project-name=lobe-tts&repository-name=lobe-tts)
 
+### Environment Variable
+
+This project provides some additional configuration items set with environment variables:
+
+| Environment Variable             | Description                                                                                                                                             | Example                                                                                   |
+| -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------- |
+| `OPENAI_API_KEY`                 | This is the API key you apply on the OpenAI account page                                                                                                | `sk-xxxxxx...xxxxxx`                                                                      |
+| `OPENAI_PROXY_URL`               | If you manually configure the OpenAI interface proxy, you can use this configuration item to override the default OpenAI API request base URL           | `https://api.chatanywhere.cn/v1`<br/>The default value is<br/>`https://api.openai.com/v1` |
+| `AZURE_SPEECH_KEY`               | This is the API key of Azure Speech Service                                                                                                             |                                                                                           |
+| `AZURE_SPEECH_REGION`            | This is the region of Azure Speech Service                                                                                                              |                                                                                           |
+| `MICROSOFT_SPEECH_PROXY_URL`     | If you manually configure the Microsoft Speech interface proxy, you can use this configuration item to override the default Speech API request base URL |                                                                                           |
+| `MICROSOFT_SPEECH_ALLOW_ORIGINS` | Allow origins , string or string array                                                                                                                  |                                                                                           |
+| `EDDGE_API_TOKEN`                | This is the API key of Edge Speech Service                                                                                                              | `6A5AA1D4EAFF4E9FB37E23D68491D6F4`                                                        |
+| `EDDGE_PROXY_URL`                | If you manually configure the Edge interface proxy, you can use this configuration item to override the default Edge wss request base URL               | `wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1`             |
+
 <div align="right">
 
 [![][back-to-top]](#readme-top)

diff --git a/api/index.ts b/api/index.ts
diff --git a/api/microsoft-speech.ts b/api/microsoft-speech.ts
@@ -0,0 +1,31 @@
+import cors from '../lib/cors';
+
+export const config = {
+  runtime: 'edge',
+};
+
+const API =
+  'https://southeastasia.api.speech.microsoft.com/accfreetrial/texttospeech/acc/v3.0-beta1/vcg/speak';
+
+const MICROSOFT_SPEECH_ALLOW_ORIGINS =
+  process.env?.MICROSOFT_SPEECH_ALLOW_ORIGINS?.split(',') || undefined;
+
+export default async (req: Request) => {
+  if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 });
+
+  let origin = '*';
+
+  if (MICROSOFT_SPEECH_ALLOW_ORIGINS) {
+    const reqOrigin = req.headers.get('origin');
+    if (reqOrigin && MICROSOFT_SPEECH_ALLOW_ORIGINS.includes(reqOrigin)) {
+      origin = reqOrigin;
+    } else {
+      return new Response('Origin Not Allowed', { status: 403 });
+    }
+  }
+
+  const res = await fetch(API, { body: req.body, headers: req.headers, method: 'POST' });
+  const newResponse = new Response(res.body, res);
+
+  return cors(req, newResponse, { methods: ['POST'], origin });
+};
diff --git a/lib/fetchMicrosoftSpeech.ts b/lib/fetchMicrosoftSpeech.ts
diff --git a/lib/genSSML.ts b/lib/genSSML.ts
diff --git a/src/const/api.ts b/src/const/api.ts
@@ -1,10 +1,15 @@
 import urlJoin from 'url-join';
 
-export const MICROSOFT_SPEECH_PROXY_URL = process.env.MICROSOFT_SPEECH_PROXY_URL || '';
+export const MICROSOFT_SPEECH_PROXY_URL =
+  process.env.MICROSOFT_SPEECH_PROXY_URL || '/api/microsoft-speech';
 export const AZURE_SPEECH_KEY = process.env.AZURE_SPEECH_KEY || '';
 export const AZURE_SPEECH_REGION = process.env.AZURE_SPEECH_REGION || '';
 export const OPENAI_API_KEY = process.env.OPENAI_API_KEY || '';
 export const OPENAI_PROXY_URL = process.env.OPENAI_PROXY_URL || 'https://api.openai.com/v1';
 export const OPENAI_TTS_URL = (api?: string) => urlJoin(api || OPENAI_PROXY_URL, 'audio/speech');
 export const OPENAI_STT_URL = (api: string) =>
   urlJoin(api || OPENAI_PROXY_URL, 'audio/transcriptions');
+export const EDDGE_PROXY_URL =
+  process.env.EDDGE_PROXY_URL ||
+  'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1';
+export const EDDGE_API_TOKEN = process.env.EDDGE_API_TOKEN || '6A5AA1D4EAFF4E9FB37E23D68491D6F4';
diff --git a/src/index.ts b/src/index.ts
@@ -1,8 +1,8 @@
-export { fetchAzureSpeech } from './services/fetchAzureSpeech';
-export { fetchEdgeSpeech } from './services/fetchEdgeSpeech';
-export { fetchMicrosoftSpeech } from './services/fetchMicrosoftSpeech';
-export { fetchOpenaiSTT } from './services/fetchOpenaiSTT';
-export { fetchOpenaiTTS } from './services/fetchOpenaiTTS';
+export { type AzureSpeechOptions, fetchAzureSpeech } from './services/fetchAzureSpeech';
+export { type EdgeSpeechOptions, fetchEdgeSpeech } from './services/fetchEdgeSpeech';
+export { fetchMicrosoftSpeech, type MicrosoftSpeechOptions } from './services/fetchMicrosoftSpeech';
+export { fetchOpenaiSTT, type OpenaiSttOptions } from './services/fetchOpenaiSTT';
+export { fetchOpenaiTTS, type OpenaiTtsOptions } from './services/fetchOpenaiTTS';
 export { useAzureSpeech } from './useAzureSpeech';
 export { useEdgeSpeech } from './useEdgeSpeech';
 export { useMicrosoftSpeech } from './useMicrosoftSpeech';

diff --git a/src/services/fetchAzureSpeech.ts b/src/services/fetchAzureSpeech.ts
@@ -9,8 +9,7 @@ import {
 } from 'microsoft-cognitiveservices-speech-sdk';
 
 import { AZURE_SPEECH_KEY, AZURE_SPEECH_REGION } from '@/const/api';
-
-import { type SsmlOptions, genSSML } from '../utils/genSSML';
+import { type SsmlOptions, genSSML } from '@/utils/genSSML';
 
 export interface AzureSpeechOptions extends SsmlOptions {
   api: {

diff --git a/src/services/fetchEdgeSpeech.ts b/src/services/fetchEdgeSpeech.ts
@@ -1,14 +1,18 @@
 import qs from 'query-string';
 import { v4 as uuidv4 } from 'uuid';
 
-import { type SsmlOptions, genSSML } from '../utils/genSSML';
-import { genSendContent } from '../utils/genSendContent';
-import { getHeadersAndData } from '../utils/getHeadersAndData';
+import { EDDGE_API_TOKEN, EDDGE_PROXY_URL } from '@/const/api';
+import { type SsmlOptions, genSSML } from '@/utils/genSSML';
+import { genSendContent } from '@/utils/genSendContent';
+import { getHeadersAndData } from '@/utils/getHeadersAndData';
 
-const API = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1';
-const TOKEN = '6A5AA1D4EAFF4E9FB37E23D68491D6F4';
-
-export const fetchEdgeSpeech = async (text: string, options: SsmlOptions) => {
+export interface EdgeSpeechOptions extends Pick<SsmlOptions, 'name'> {
+  api: {
+    key: string;
+    proxy: string;
+  };
+}
+export const fetchEdgeSpeech = async (text: string, { api, ...options }: EdgeSpeechOptions) => {
   const connectId = uuidv4().replaceAll('-', '');
   const date = new Date().toString();
   const audioContext = new AudioContext();
@@ -18,9 +22,9 @@ export const fetchEdgeSpeech = async (text: string, options: SsmlOptions) => {
     qs.stringifyUrl({
       query: {
         ConnectionId: connectId,
-        TrustedClientToken: TOKEN,
+        TrustedClientToken: api.key || EDDGE_API_TOKEN,
       },
-      url: API,
+      url: api.proxy || EDDGE_PROXY_URL,
     }),
   );
   ws.binaryType = 'arraybuffer';

diff --git a/src/services/fetchMicrosoftSpeech.ts b/src/services/fetchMicrosoftSpeech.ts
@@ -1,8 +1,8 @@
-import qs from 'query-string';
+import { v4 as uuidv4 } from 'uuid';
 
 import { MICROSOFT_SPEECH_PROXY_URL } from '@/const/api';
-
-import { type SsmlOptions } from '../utils/genSSML';
+import { type SsmlOptions } from '@/utils/genSSML';
+import { genSSML } from '@/utils/genSSML';
 
 export interface MicrosoftSpeechOptions extends SsmlOptions {
   api?: string;
@@ -12,12 +12,39 @@ export const fetchMicrosoftSpeech = async (
   text: string,
   { api, ...options }: MicrosoftSpeechOptions,
 ): Promise<AudioBufferSourceNode> => {
-  const response: Response = await fetch(
-    qs.stringifyUrl({
-      query: { text, ...options },
-      url: api || MICROSOFT_SPEECH_PROXY_URL,
-    }),
-  );
+  const data = JSON.stringify({
+    offsetInPlainText: 0,
+    properties: {
+      SpeakTriggerSource: 'AccTuningPagePlayButton',
+    },
+    ssml: genSSML(text, options),
+    ttsAudioFormat: 'audio-24khz-160kbitrate-mono-mp3',
+  });
+
+  const DEFAULT_HEADERS = new Headers({
+    'accept': '*/*',
+    'accept-language': 'zh-CN,zh;q=0.9',
+    'authority': 'southeastasia.api.speech.microsoft.com',
+    'content-type': 'application/json',
+    'customvoiceconnectionid': uuidv4(),
+    'origin': 'https://speech.microsoft.com',
+    'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
+    'sec-ch-ua-mobile': '?0',
+    'sec-ch-ua-platform': '"Windows"',
+    'sec-fetch-dest': 'empty',
+    'sec-fetch-mode': 'cors',
+    'sec-fetch-site': 'same-site',
+    'user-agent':
+      'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
+  });
+
+  const response: Response = await fetch(api || MICROSOFT_SPEECH_PROXY_URL, {
+    body: data,
+    headers: DEFAULT_HEADERS,
+    method: 'POST',
+    // @ts-ignore
+    responseType: 'arraybuffer',
+  });
 
   if (!response.ok) {
     throw new Error('Network response was not ok');

diff --git a/src/services/fetchOpenaiSTT.ts b/src/services/fetchOpenaiSTT.ts
@@ -2,7 +2,7 @@ import { v4 as uuidv4 } from 'uuid';
 
 import { OPENAI_API_KEY, OPENAI_STT_URL } from '@/const/api';
 
-export interface OpenaiTtsOptions {
+export interface OpenaiSttOptions {
   api: {
     key: string;
     proxy: string;
@@ -13,7 +13,7 @@ export interface OpenaiTtsOptions {
 // 纯文本生成语音
 export const fetchOpenaiSTT = async (
   speech: Blob,
-  { api, model = 'whisper-1' }: OpenaiTtsOptions,
+  { api, model = 'whisper-1' }: OpenaiSttOptions,
 ): Promise<string> => {
   const key = api.key || OPENAI_API_KEY;
   const url = OPENAI_STT_URL(api.proxy);

diff --git a/src/services/fetchOpenaiTTS.ts b/src/services/fetchOpenaiTTS.ts
@@ -1,10 +1,9 @@
 import { OPENAI_API_KEY, OPENAI_TTS_URL } from '@/const/api';
-
-import { type SsmlOptions } from '../utils/genSSML';
+import { type SsmlOptions } from '@/utils/genSSML';
 
 export type OpenaiVoice = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
 
-export interface OpenaiTtsOptions extends SsmlOptions {
+export interface OpenaiTtsOptions extends Pick<SsmlOptions, 'name'> {
   api: {
     key: string;
     proxy: string;

diff --git a/src/useAzureSpeech/index.ts b/src/useAzureSpeech/index.ts
@@ -1,17 +1,20 @@
 import { useState } from 'react';
 import useSWR from 'swr';
 
-import { AzureSpeechOptions, fetchAzureSpeech } from '../services/fetchAzureSpeech';
+import { AzureSpeechOptions, fetchAzureSpeech } from '@/services/fetchAzureSpeech';
 
-export const useAzureSpeech = (defaultText: string, options: AzureSpeechOptions) => {
+export const useAzureSpeech = (
+  defaultText: string,
+  { api, name, style, pitch, rate }: AzureSpeechOptions,
+) => {
   const [data, setDate] = useState<AudioBufferSourceNode>();
   const [text, setText] = useState<string>(defaultText);
   const [shouldFetch, setShouldFetch] = useState<boolean>(false);
   const [isPlaying, setIsPlaying] = useState<boolean>(false);
 
   const { isLoading } = useSWR(
-    shouldFetch ? [options.name, text].join('-') : null,
-    () => fetchAzureSpeech(text, options),
+    shouldFetch ? [name, text].join('-') : null,
+    () => fetchAzureSpeech(text, { api, name, pitch, rate, style }),
     {
       onError: () => setShouldFetch(false),
       onSuccess: (audioBufferSource) => {

diff --git a/src/useEdgeSpeech/demos/index.tsx b/src/useEdgeSpeech/demos/index.tsx
@@ -8,6 +8,21 @@ const defaultText = '这是一段使用 Edge Speech 的语音演示';
 
 export default () => {
   const store = useCreateStore();
+
+  const api: any = useControls(
+    {
+      key: {
+        label: 'EDDGE_API_TOKEN',
+        value: '',
+      },
+      proxy: {
+        label: 'EDDGE_PROXY_URL',
+        value: '',
+      },
+    },
+    { store },
+  );
+
   const options: any = useControls(
     {
       name: {
@@ -17,7 +32,11 @@ export default () => {
     },
     { store },
   );
-  const { setText, isLoading, isPlaying, start, stop } = useEdgeSpeech(defaultText, options);
+
+  const { setText, isLoading, isPlaying, start, stop } = useEdgeSpeech(defaultText, {
+    api,
+    ...options,
+  });
   return (
     <StoryBook levaStore={store}>
       <Flexbox gap={8}>