-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
multi_vector.ts
88 lines (78 loc) · 2.67 KB
/
multi_vector.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import {
BaseRetriever,
type BaseRetrieverInput,
} from "@langchain/core/retrievers";
import type { VectorStoreInterface } from "@langchain/core/vectorstores";
import { Document } from "@langchain/core/documents";
import { BaseStore, type BaseStoreInterface } from "@langchain/core/stores";
import { createDocumentStoreFromByteStore } from "../storage/encoder_backed.js";
/**
* Arguments for the MultiVectorRetriever class.
*/
export interface MultiVectorRetrieverInput extends BaseRetrieverInput {
vectorstore: VectorStoreInterface;
/** @deprecated Prefer `byteStore`. */
docstore?: BaseStoreInterface<string, Document>;
byteStore?: BaseStore<string, Uint8Array>;
idKey?: string;
childK?: number;
parentK?: number;
}
/**
* A retriever that retrieves documents from a vector store and a document
* store. It uses the vector store to find relevant documents based on a
* query, and then retrieves the full documents from the document store.
* @example
* ```typescript
* const retriever = new MultiVectorRetriever({
* vectorstore: new FaissStore(),
* byteStore: new InMemoryStore<Unit8Array>(),
* idKey: "doc_id",
* childK: 20,
* parentK: 5,
* });
*
* const retrieverResult = await retriever.getRelevantDocuments("justice breyer");
* console.log(retrieverResult[0].pageContent.length);
* ```
*/
export class MultiVectorRetriever extends BaseRetriever {
static lc_name() {
return "MultiVectorRetriever";
}
lc_namespace = ["langchain", "retrievers", "multi_vector"];
public vectorstore: VectorStoreInterface;
public docstore: BaseStoreInterface<string, Document>;
protected idKey: string;
protected childK?: number;
protected parentK?: number;
constructor(args: MultiVectorRetrieverInput) {
super(args);
this.vectorstore = args.vectorstore;
if (args.byteStore) {
this.docstore = createDocumentStoreFromByteStore(args.byteStore);
} else if (args.docstore) {
this.docstore = args.docstore;
} else {
throw new Error(
"byteStore and docstore are undefined. Please provide at least one."
);
}
this.idKey = args.idKey ?? "doc_id";
this.childK = args.childK;
this.parentK = args.parentK;
}
async _getRelevantDocuments(query: string): Promise<Document[]> {
const subDocs = await this.vectorstore.similaritySearch(query, this.childK);
const ids: string[] = [];
for (const doc of subDocs) {
if (doc.metadata[this.idKey] && !ids.includes(doc.metadata[this.idKey])) {
ids.push(doc.metadata[this.idKey]);
}
}
const docs = await this.docstore.mget(ids);
return docs
.filter((doc) => doc !== undefined)
.slice(0, this.parentK) as Document[];
}
}