-
Notifications
You must be signed in to change notification settings - Fork 313
/
type.ts
120 lines (114 loc) · 1.88 KB
/
type.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import { fs, path } from "@llamaindex/env";
import type { Document } from "../Node.js";
/**
* A reader takes imports data into Document objects.
*/
export interface BaseReader {
loadData(...args: unknown[]): Promise<Document[]>;
}
/**
* A FileReader takes file paths and imports data into Document objects.
*/
export abstract class FileReader implements BaseReader {
abstract loadDataAsContent(fileContent: Buffer): Promise<Document[]>;
async loadData(filePath: string): Promise<Document[]> {
const fileContent = await fs.readFile(filePath);
const docs = await this.loadDataAsContent(fileContent);
docs.forEach(FileReader.addMetaData(filePath));
return docs;
}
static addMetaData(filePath: string) {
return (doc: Document, index: number) => {
// generate id as loadDataAsContent is only responsible for the content
doc.id_ = `${filePath}_${index + 1}`;
doc.metadata["file_path"] = path.resolve(filePath);
doc.metadata["file_name"] = path.basename(filePath);
};
}
}
// For LlamaParseReader.ts
export type ResultType = "text" | "markdown" | "json";
export type Language =
| "abq"
| "ady"
| "af"
| "ang"
| "ar"
| "as"
| "ava"
| "az"
| "be"
| "bg"
| "bh"
| "bho"
| "bn"
| "bs"
| "ch_sim"
| "ch_tra"
| "che"
| "cs"
| "cy"
| "da"
| "dar"
| "de"
| "en"
| "es"
| "et"
| "fa"
| "fr"
| "ga"
| "gom"
| "hi"
| "hr"
| "hu"
| "id"
| "inh"
| "is"
| "it"
| "ja"
| "kbd"
| "kn"
| "ko"
| "ku"
| "la"
| "lbe"
| "lez"
| "lt"
| "lv"
| "mah"
| "mai"
| "mi"
| "mn"
| "mr"
| "ms"
| "mt"
| "ne"
| "new"
| "nl"
| "no"
| "oc"
| "pi"
| "pl"
| "pt"
| "ro"
| "ru"
| "rs_cyrillic"
| "rs_latin"
| "sck"
| "sk"
| "sl"
| "sq"
| "sv"
| "sw"
| "ta"
| "tab"
| "te"
| "th"
| "tjk"
| "tl"
| "tr"
| "ug"
| "uk"
| "ur"
| "uz"
| "vi";