labring · c121914yu · Oct 16, 2025 · Oct 16, 2025 · Oct 16, 2025
diff --git a/modules/tool/packages/whisper/config.ts b/modules/tool/packages/whisper/config.ts
@@ -0,0 +1,74 @@
+import { defineTool } from '@tool/type';
+import { FlowNodeInputTypeEnum, WorkflowIOValueTypeEnum } from '@tool/type/fastgpt';
+import { ToolTypeEnum } from '@tool/type/tool';
+
+export default defineTool({
+  name: {
+    'zh-CN': 'Whisper 语音转文字',
+    en: 'Whisper Speech-to-Text'
+  },
+  type: ToolTypeEnum.multimodal,
+  description: {
+    'zh-CN': '使用 OpenAI Whisper 模型将音频文件转换为文字，支持多种音频格式和多语言识别',
+    en: 'Convert audio files to text using OpenAI Whisper model, supporting multiple audio formats and multilingual recognition'
+  },
+  courseUrl: 'https://platform.openai.com/docs/pricing',
+  icon: 'common/openai',
+  toolDescription:
+    'Convert audio files to text using OpenAI Whisper speech recognition API. Supports multiple audio formats and languages.',
+  secretInputConfig: [
+    {
+      key: 'baseUrl',
+      label: 'BaseUrl',
+      inputType: 'input',
+      description: '默认为：https://api.openai.com/v1',
+      defaultValue: 'https://api.openai.com/v1'
+    },
+    {
+      key: 'apiKey',
+      label: 'API Key',
+      required: true,
+      inputType: 'secret'
+    }
+  ],
+  versionList: [
+    {
+      value: '0.1.0',
+      description: 'Default version',
+      inputs: [
+        {
+          key: 'model',
+          label: '模型',
+          toolDescription: 'Whisper model to use for transcription',
+          renderTypeList: [FlowNodeInputTypeEnum.select, FlowNodeInputTypeEnum.reference],
+          valueType: WorkflowIOValueTypeEnum.string,
+          required: true,
+          defaultValue: 'whisper-1',
+          list: [
+            { label: 'whisper-1', value: 'whisper-1' },
+            { label: 'gpt-4o-transcribe', value: 'gpt-4o-transcribe' },
+            { label: 'gpt-4o-mini-transcribe', value: 'gpt-4o-mini-transcribe' },
+            { label: 'gpt-4o-transcribe-diarize', value: 'gpt-4o-transcribe-diarize' }
+          ]
+        },
+        {
+          key: 'file',
+          label: '音频文件',
+          toolDescription:
+            '音频文件，支持 URL 或 base64 格式。URL 格式如：https://example.com/audio.mp3，base64 格式如：data:audio/mp3;base64,xxx...',
+          renderTypeList: [FlowNodeInputTypeEnum.textarea, FlowNodeInputTypeEnum.reference],
+          valueType: WorkflowIOValueTypeEnum.string,
+          required: true,
+          placeholder: '输入音频文件 URL 或 base64 数据'
+        }
+      ],
+      outputs: [
+        {
+          valueType: WorkflowIOValueTypeEnum.string,
+          key: 'text',
+          label: '文本'
+        }
+      ]
+    }
+  ]
+});
diff --git a/modules/tool/packages/whisper/index.ts b/modules/tool/packages/whisper/index.ts
@@ -0,0 +1,10 @@
+import config from './config';
+import { InputType, OutputType, tool as toolCb } from './src';
+import { exportTool } from '@tool/utils/tool';
+
+export default exportTool({
+  toolCb,
+  InputType,
+  OutputType,
+  config
+});
diff --git a/modules/tool/packages/whisper/package.json b/modules/tool/packages/whisper/package.json
@@ -0,0 +1,17 @@
+{
+  "name": "@fastgpt-plugins/tool-whisper",
+  "module": "index.ts",
+  "type": "module",
+  "scripts": {
+    "build": "bun ../../../../scripts/build.ts"
+  },
+  "devDependencies": {
+    "@types/bun": "^1.2.2"
+  },
+  "peerDependencies": {
+    "typescript": "^5.0.0"
+  },
+  "dependencies": {
+    "zod": "^3.24.3"
+  }
+}
diff --git a/modules/tool/packages/whisper/src/index.ts b/modules/tool/packages/whisper/src/index.ts
@@ -0,0 +1,68 @@
+import { z } from 'zod';
+import { POST, GET } from '@tool/utils/request';
+
+export const InputType = z.object({
+  baseUrl: z.string().optional().default('https://api.openai.com/v1'),
+  apiKey: z.string().nonempty(),
+  file: z.string().nonempty(),
+  model: z.string().nonempty()
+});
+
+export const OutputType = z.object({
+  text: z.string()
+});
+
+// convert file input (URL or base64) to File object
+async function inputToFile(file: string): Promise<File> {
+  if (file.startsWith('http://') || file.startsWith('https://')) {
+    const { data } = await GET(file, { responseType: 'blob' });
+    return new File([data], 'audio.m4a', { type: data.type || 'audio/m4a' });
+  }
+  // if base64 has "data:" prefix
+  if (file.startsWith('data:')) {
+    const base64Match = file.match(/^data:audio\/[^;]+;base64,(.+)$/);
+    if (!base64Match) {
+      return Promise.reject('Invalid base64 format. Please provide a valid base64 data.');
+    }
+    const binaryString = atob(base64Match[1]);
+    const bytes = Uint8Array.from(binaryString, (c) => c.charCodeAt(0));
+    return new File([bytes], 'audio.m4a', { type: `audio/m4a` });
+  }
+  // if base64 is pure base64 string
+  if (file.match(/^[A-Za-z0-9+/=]+$/)) {
+    const binaryString = atob(file);
+    const bytes = Uint8Array.from(binaryString, (c) => c.charCodeAt(0));
+    return new File([bytes], 'audio.m4a', { type: 'audio/m4a' });
+  }
+  return Promise.reject('Invalid file format. Please provide a URL or base64 data.');
+}
+
+export async function tool({
+  baseUrl,
+  apiKey,
+  file,
+  model
+}: z.infer<typeof InputType>): Promise<z.infer<typeof OutputType>> {
+  // Convert file input to File object
+  const audioFile = await inputToFile(file);
+  if (audioFile.size === 0) {
+    return Promise.reject('Audio file is empty');
+  }
+
+  const formData = new FormData();
+  formData.append('file', audioFile);
+  formData.append('model', model);
+
+  const { data } = await POST(`${baseUrl}/audio/transcriptions`, formData, {
+    headers: {
+      Authorization: `Bearer ${apiKey}`
+    }
+  });
+
+  const text = data?.text;
+  if (!text) {
+    return Promise.reject('No transcription text found in response');
+  }
+
+  return { text };
+}
diff --git a/modules/tool/packages/whisper/test/index.test.ts b/modules/tool/packages/whisper/test/index.test.ts
@@ -0,0 +1,8 @@
+import { expect, test } from 'vitest';
+import tool from '..';
+
+test(async () => {
+  expect(tool.name).toBeDefined();
+  expect(tool.description).toBeDefined();
+  expect(tool.cb).toBeDefined();
+});
diff --git a/modules/tool/type/fastgpt.ts b/modules/tool/type/fastgpt.ts
@@ -84,6 +84,7 @@ export const InputConfigSchema = z.object({
   description: z.string().optional(),
   required: z.boolean().optional(),
   inputType: z.enum(['input', 'numberInput', 'secret', 'switch', 'select']),
+  defaultValue: z.any().optional(),
 
   // select
   list: z