-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
index.ts
90 lines (85 loc) · 2.87 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import type { DocumentInterface } from "@langchain/core/documents";
import { BaseDocumentTransformer } from "@langchain/core/documents";
import { Callbacks } from "@langchain/core/callbacks/manager";
/**
* Base Document Compression class. All compressors should extend this class.
*/
export abstract class BaseDocumentCompressor {
/**
* Abstract method that must be implemented by any class that extends
* `BaseDocumentCompressor`. This method takes an array of `Document`
* objects and a query string as parameters and returns a Promise that
* resolves with an array of compressed `Document` objects.
* @param documents An array of `Document` objects to be compressed.
* @param query A query string.
* @returns A Promise that resolves with an array of compressed `Document` objects.
*/
abstract compressDocuments(
documents: DocumentInterface[],
query: string,
callbacks?: Callbacks
): Promise<DocumentInterface[]>;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
static isBaseDocumentCompressor(x: any): x is BaseDocumentCompressor {
return x?.compressDocuments !== undefined;
}
}
/**
* Document compressor that uses a pipeline of Transformers.
* @example
* ```typescript
* const compressorPipeline = new DocumentCompressorPipeline({
* transformers: [
* new RecursiveCharacterTextSplitter({
* chunkSize: 200,
* chunkOverlap: 0,
* }),
* new EmbeddingsFilter({
* embeddings: new OpenAIEmbeddings(),
* similarityThreshold: 0.8,
* k: 5,
* }),
* ],
* });
* const retriever = new ContextualCompressionRetriever({
* baseCompressor: compressorPipeline,
* baseRetriever: new TavilySearchAPIRetriever({
* includeRawContent: true,
* }),
* });
* const retrievedDocs = await retriever.getRelevantDocuments(
* "What did the speaker say about Justice Breyer in the 2022 State of the Union?",
* );
* console.log({ retrievedDocs });
* ```
*/
export class DocumentCompressorPipeline extends BaseDocumentCompressor {
transformers: (BaseDocumentTransformer | BaseDocumentCompressor)[];
constructor(fields: {
transformers: (BaseDocumentTransformer | BaseDocumentCompressor)[];
}) {
super();
this.transformers = fields.transformers;
}
async compressDocuments(
documents: DocumentInterface[],
query: string,
callbacks?: Callbacks
): Promise<DocumentInterface[]> {
let transformedDocuments = documents;
for (const transformer of this.transformers) {
if (BaseDocumentCompressor.isBaseDocumentCompressor(transformer)) {
transformedDocuments = await transformer.compressDocuments(
transformedDocuments,
query,
callbacks
);
} else {
transformedDocuments = await transformer.transformDocuments(
transformedDocuments
);
}
}
return transformedDocuments;
}
}