-
Notifications
You must be signed in to change notification settings - Fork 313
/
types.ts
77 lines (68 loc) · 2.18 KB
/
types.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import type { BaseNode } from "../Node.js";
import { MetadataMode, TextNode } from "../Node.js";
import type { TransformComponent } from "../ingestion/types.js";
import { defaultNodeTextTemplate } from "./prompts.js";
/*
* Abstract class for all extractors.
*/
export abstract class BaseExtractor implements TransformComponent {
isTextNodeOnly: boolean = true;
showProgress: boolean = true;
metadataMode: MetadataMode = MetadataMode.ALL;
disableTemplateRewrite: boolean = false;
inPlace: boolean = true;
numWorkers: number = 4;
abstract extract(nodes: BaseNode[]): Promise<Record<string, any>[]>;
async transform(nodes: BaseNode[], options?: any): Promise<BaseNode[]> {
return this.processNodes(
nodes,
options?.excludedEmbedMetadataKeys,
options?.excludedLlmMetadataKeys,
);
}
/**
*
* @param nodes Nodes to extract metadata from.
* @param excludedEmbedMetadataKeys Metadata keys to exclude from the embedding.
* @param excludedLlmMetadataKeys Metadata keys to exclude from the LLM.
* @returns Metadata extracted from the nodes.
*/
async processNodes(
nodes: BaseNode[],
excludedEmbedMetadataKeys: string[] | undefined = undefined,
excludedLlmMetadataKeys: string[] | undefined = undefined,
): Promise<BaseNode[]> {
let newNodes: BaseNode[];
if (this.inPlace) {
newNodes = nodes;
} else {
newNodes = nodes.slice();
}
const curMetadataList = await this.extract(newNodes);
for (const idx in newNodes) {
newNodes[idx].metadata = {
...newNodes[idx].metadata,
...curMetadataList[idx],
};
}
for (const idx in newNodes) {
if (excludedEmbedMetadataKeys) {
newNodes[idx].excludedEmbedMetadataKeys.concat(
excludedEmbedMetadataKeys,
);
}
if (excludedLlmMetadataKeys) {
newNodes[idx].excludedLlmMetadataKeys.concat(excludedLlmMetadataKeys);
}
if (!this.disableTemplateRewrite) {
if (newNodes[idx] instanceof TextNode) {
newNodes[idx] = new TextNode({
...newNodes[idx],
textTemplate: defaultNodeTextTemplate(),
});
}
}
}
return newNodes;
}
}