Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions modules/tool/packages/whisper/config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import { defineTool } from '@tool/type';
import { FlowNodeInputTypeEnum, WorkflowIOValueTypeEnum } from '@tool/type/fastgpt';
import { ToolTypeEnum } from '@tool/type/tool';

export default defineTool({
name: {
'zh-CN': 'Whisper 语音转文字',
en: 'Whisper Speech-to-Text'
},
type: ToolTypeEnum.multimodal,
description: {
'zh-CN': '使用 OpenAI Whisper 模型将音频文件转换为文字,支持多种音频格式和多语言识别',
en: 'Convert audio files to text using OpenAI Whisper model, supporting multiple audio formats and multilingual recognition'
},
courseUrl: 'https://platform.openai.com/docs/pricing',
icon: 'common/openai',
toolDescription:
'Convert audio files to text using OpenAI Whisper speech recognition API. Supports multiple audio formats and languages.',
secretInputConfig: [
{
key: 'baseUrl',
label: 'BaseUrl',
inputType: 'input',
description: '默认为:https://api.openai.com/v1',
defaultValue: 'https://api.openai.com/v1'
},
{
key: 'apiKey',
label: 'API Key',
required: true,
inputType: 'secret'
}
],
versionList: [
{
value: '0.1.0',
description: 'Default version',
inputs: [
{
key: 'model',
label: '模型',
toolDescription: 'Whisper model to use for transcription',
renderTypeList: [FlowNodeInputTypeEnum.select, FlowNodeInputTypeEnum.reference],
valueType: WorkflowIOValueTypeEnum.string,
required: true,
defaultValue: 'whisper-1',
list: [
{ label: 'whisper-1', value: 'whisper-1' },
{ label: 'gpt-4o-transcribe', value: 'gpt-4o-transcribe' },
{ label: 'gpt-4o-mini-transcribe', value: 'gpt-4o-mini-transcribe' },
{ label: 'gpt-4o-transcribe-diarize', value: 'gpt-4o-transcribe-diarize' }
]
},
{
key: 'file',
label: '音频文件',
toolDescription:
'音频文件,支持 URL 或 base64 格式。URL 格式如:https://example.com/audio.mp3,base64 格式如:data:audio/mp3;base64,xxx...',
renderTypeList: [FlowNodeInputTypeEnum.textarea, FlowNodeInputTypeEnum.reference],
valueType: WorkflowIOValueTypeEnum.string,
required: true,
placeholder: '输入音频文件 URL 或 base64 数据'
}
],
outputs: [
{
valueType: WorkflowIOValueTypeEnum.string,
key: 'text',
label: '文本'
}
]
}
]
});
10 changes: 10 additions & 0 deletions modules/tool/packages/whisper/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import config from './config';
import { InputType, OutputType, tool as toolCb } from './src';
import { exportTool } from '@tool/utils/tool';

export default exportTool({
toolCb,
InputType,
OutputType,
config
});
17 changes: 17 additions & 0 deletions modules/tool/packages/whisper/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"name": "@fastgpt-plugins/tool-whisper",
"module": "index.ts",
"type": "module",
"scripts": {
"build": "bun ../../../../scripts/build.ts"
},
"devDependencies": {
"@types/bun": "^1.2.2"
},
"peerDependencies": {
"typescript": "^5.0.0"
},
"dependencies": {
"zod": "^3.24.3"
}
}
68 changes: 68 additions & 0 deletions modules/tool/packages/whisper/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import { z } from 'zod';
import { POST, GET } from '@tool/utils/request';

export const InputType = z.object({
baseUrl: z.string().optional().default('https://api.openai.com/v1'),
apiKey: z.string().nonempty(),
file: z.string().nonempty(),
model: z.string().nonempty()
});

export const OutputType = z.object({
text: z.string()
});

// convert file input (URL or base64) to File object
async function inputToFile(file: string): Promise<File> {
if (file.startsWith('http://') || file.startsWith('https://')) {
const { data } = await GET(file, { responseType: 'blob' });
return new File([data], 'audio.m4a', { type: data.type || 'audio/m4a' });
}
// if base64 has "data:" prefix
if (file.startsWith('data:')) {
const base64Match = file.match(/^data:audio\/[^;]+;base64,(.+)$/);
if (!base64Match) {
return Promise.reject('Invalid base64 format. Please provide a valid base64 data.');
}
const binaryString = atob(base64Match[1]);
const bytes = Uint8Array.from(binaryString, (c) => c.charCodeAt(0));
return new File([bytes], 'audio.m4a', { type: `audio/m4a` });
}
// if base64 is pure base64 string
if (file.match(/^[A-Za-z0-9+/=]+$/)) {
const binaryString = atob(file);
const bytes = Uint8Array.from(binaryString, (c) => c.charCodeAt(0));
return new File([bytes], 'audio.m4a', { type: 'audio/m4a' });
}
return Promise.reject('Invalid file format. Please provide a URL or base64 data.');
}

export async function tool({
baseUrl,
apiKey,
file,
model
}: z.infer<typeof InputType>): Promise<z.infer<typeof OutputType>> {
// Convert file input to File object
const audioFile = await inputToFile(file);
if (audioFile.size === 0) {
return Promise.reject('Audio file is empty');
}

const formData = new FormData();
formData.append('file', audioFile);
formData.append('model', model);

const { data } = await POST(`${baseUrl}/audio/transcriptions`, formData, {
headers: {
Authorization: `Bearer ${apiKey}`
}
});

const text = data?.text;
if (!text) {
return Promise.reject('No transcription text found in response');
}

return { text };
}
8 changes: 8 additions & 0 deletions modules/tool/packages/whisper/test/index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { expect, test } from 'vitest';
import tool from '..';

test(async () => {
expect(tool.name).toBeDefined();
expect(tool.description).toBeDefined();
expect(tool.cb).toBeDefined();
});
1 change: 1 addition & 0 deletions modules/tool/type/fastgpt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ export const InputConfigSchema = z.object({
description: z.string().optional(),
required: z.boolean().optional(),
inputType: z.enum(['input', 'numberInput', 'secret', 'switch', 'select']),
defaultValue: z.any().optional(),

// select
list: z
Expand Down