-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
common.ts
162 lines (145 loc) Β· 5.62 KB
/
common.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import type { CloseVectorSaveableVectorStore } from "closevector-common";
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
import { Document } from "@langchain/core/documents";
import { SaveableVectorStore } from "@langchain/core/vectorstores";
type CloseVectorCredentials = {
key?: string;
secret?: string;
};
/**
* package closevector is largely based on hnswlib.ts in the current folder with the following exceptions:
* 1. It uses a modified version of hnswlib-node to ensure the generated index can be loaded by closevector_web.ts.
* 2. It adds features to upload and download the index to/from the CDN provided by CloseVector.
*
* For more information, check out https://closevector-docs.getmegaportal.com/
*/
/**
* Class that implements a vector store using Hierarchical Navigable Small
* World (HNSW) graphs. It extends the SaveableVectorStore class and
* provides methods for adding documents and vectors, performing
* similarity searches, and saving and loading the vector store.
*/
export abstract class CloseVector<
CloseVectorHNSWImplementation extends CloseVectorSaveableVectorStore
> extends SaveableVectorStore {
declare FilterType: (doc: Document) => boolean;
_instance?: CloseVectorHNSWImplementation;
// credentials will not be saved to disk
credentials?: CloseVectorCredentials;
_vectorstoreType(): string {
return "closevector";
}
constructor(
embeddings: EmbeddingsInterface,
args: {
space: "l2" | "ip" | "cosine";
numDimensions?: number;
maxElements?: number;
},
credentials?: CloseVectorCredentials
) {
super(embeddings, args);
this.credentials = credentials;
}
public get instance(): CloseVectorHNSWImplementation {
if (!this._instance) {
throw new Error(
"Vector store not initialised yet. Try calling `addTexts` first."
);
}
return this._instance;
}
protected set instance(instance: CloseVectorHNSWImplementation) {
this._instance = instance;
}
/**
* Method to add documents to the vector store. It first converts the
* documents to vectors using the embeddings, then adds the vectors to the
* vector store.
* @param documents The documents to be added to the vector store.
* @returns A Promise that resolves when the documents have been added.
*/
async addDocuments(documents: Document[]): Promise<void> {
await this.instance.addDocuments(documents);
}
abstract saveToCloud(_options: Record<string, unknown>): Promise<void>;
/**
* Method to save the vector store to a directory. It saves the HNSW
* index, the arguments, and the document store to the directory.
* @param directory The directory to which to save the vector store. In CloseVector, we use IndexedDB to mock the file system. Therefore, this parameter is can be treated as a key to the contents stored.
* @returns A Promise that resolves when the vector store has been saved.
*/
async save(directory: string): Promise<void> {
await this.instance.save(directory);
}
/**
* Method to add vectors to the vector store. It first initializes the
* index if it hasn't been initialized yet, then adds the vectors to the
* index and the documents to the document store.
* @param vectors The vectors to be added to the vector store.
* @param documents The documents corresponding to the vectors.
* @returns A Promise that resolves when the vectors and documents have been added.
*/
async addVectors(vectors: number[][], documents: Document[]) {
await this.instance.addVectors(vectors, documents);
}
/**
* Method to perform a similarity search in the vector store using a query
* vector. It returns the k most similar documents along with their
* similarity scores. An optional filter function can be provided to
* filter the documents.
* @param query The query vector.
* @param k The number of most similar documents to return.
* @param filter An optional filter function to filter the documents.
* @returns A Promise that resolves to an array of tuples, where each tuple contains a document and its similarity score.
*/
async similaritySearchVectorWithScore(
query: number[],
k: number,
filter?: this["FilterType"]
) {
const resp = await this.instance.similaritySearchVectorWithScore(
query,
k,
filter
? (x: { pageContent: string; metadata: Record<string, unknown> }) =>
filter?.({
pageContent: x.pageContent,
metadata: x.metadata || {},
}) || false
: undefined
);
const mapped: [Document<Record<string, unknown>>, number][] = resp.map(
(x) => [
new Document({
pageContent: x[0].pageContent,
metadata: x[0].metadata || {},
}),
1 - x[1],
]
);
return mapped;
}
/**
* Method to delete the vector store from a directory. It deletes the
* hnswlib.index file, the docstore.json file, and the args.json file from
* the directory.
* @param params An object with a directory property that specifies the directory from which to delete the vector store.
* @returns A Promise that resolves when the vector store has been deleted.
*/
async delete(params: { directory: string }) {
return await this.instance.delete(params);
}
static textsToDocuments(texts: string[], metadatas: object[] | object) {
const docs: Document[] = [];
for (let i = 0; i < texts.length; i += 1) {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
const newDoc = new Document({
pageContent: texts[i],
metadata,
});
docs.push(newDoc);
}
return docs;
}
}