-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
multi_vector.ts
61 lines (51 loc) · 1.81 KB
/
multi_vector.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import { BaseStoreInterface } from "../schema/storage.js";
import { Document } from "../document.js";
import { BaseRetriever, BaseRetrieverInput } from "../schema/retriever.js";
import { VectorStore } from "../vectorstores/base.js";
/**
* Arguments for the MultiVectorRetriever class.
*/
export interface MultiVectorRetrieverInput extends BaseRetrieverInput {
vectorstore: VectorStore;
docstore: BaseStoreInterface<string, Document>;
idKey?: string;
childK?: number;
parentK?: number;
}
/**
* A retriever that retrieves documents from a vector store and a document
* store. It uses the vector store to find relevant documents based on a
* query, and then retrieves the full documents from the document store.
*/
export class MultiVectorRetriever extends BaseRetriever {
static lc_name() {
return "MultiVectorRetriever";
}
lc_namespace = ["langchain", "retrievers", "multi_vector"];
public vectorstore: VectorStore;
public docstore: BaseStoreInterface<string, Document>;
protected idKey: string;
protected childK?: number;
protected parentK?: number;
constructor(args: MultiVectorRetrieverInput) {
super(args);
this.vectorstore = args.vectorstore;
this.docstore = args.docstore;
this.idKey = args.idKey ?? "doc_id";
this.childK = args.childK;
this.parentK = args.parentK;
}
async _getRelevantDocuments(query: string): Promise<Document[]> {
const subDocs = await this.vectorstore.similaritySearch(query, this.childK);
const ids: string[] = [];
for (const doc of subDocs) {
if (doc.metadata[this.idKey] && !ids.includes(doc.metadata[this.idKey])) {
ids.push(doc.metadata[this.idKey]);
}
}
const docs = await this.docstore.mget(ids);
return docs
.filter((doc) => doc !== undefined)
.slice(0, this.parentK) as Document[];
}
}