-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
openai_whisper_audio.ts
54 lines (48 loc) · 1.45 KB
/
openai_whisper_audio.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import { type ClientOptions, OpenAIClient, toFile } from "@langchain/openai";
import { Document } from "@langchain/core/documents";
import { BufferLoader } from "./buffer.js";
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
/* #__PURE__ */ logVersion020MigrationWarning({
oldEntrypointName: "document_loaders/fs/openai_whisper_audio",
newPackageName: "@langchain/community",
});
const MODEL_NAME = "whisper-1";
/**
* @example
* ```typescript
* const loader = new OpenAIWhisperAudio(
* "./src/document_loaders/example_data/test.mp3",
* );
* const docs = await loader.load();
* console.log(docs);
* ```
*/
export class OpenAIWhisperAudio extends BufferLoader {
private readonly openAIClient: OpenAIClient;
constructor(
filePathOrBlob: string | Blob,
fields?: {
clientOptions?: ClientOptions;
}
) {
super(filePathOrBlob);
this.openAIClient = new OpenAIClient(fields?.clientOptions);
}
protected async parse(
raw: Buffer,
metadata: Record<string, string>
): Promise<Document[]> {
const fileName =
metadata.source === "blob" ? metadata.blobType : metadata.source;
const transcriptionResponse =
await this.openAIClient.audio.transcriptions.create({
file: await toFile(raw, fileName),
model: MODEL_NAME,
});
const document = new Document({
pageContent: transcriptionResponse.text,
metadata,
});
return [document];
}
}