-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
cloudflare_vectorize.ts
237 lines (214 loc) Β· 7.38 KB
/
cloudflare_vectorize.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import * as uuid from "uuid";
import {
VectorizeIndex,
VectorizeVectorMetadata,
} from "@cloudflare/workers-types";
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
import { VectorStore } from "@langchain/core/vectorstores";
import { Document } from "@langchain/core/documents";
import {
AsyncCaller,
type AsyncCallerParams,
} from "@langchain/core/utils/async_caller";
import { chunkArray } from "@langchain/core/utils/chunk_array";
/**
* @deprecated Install and import from "@langchain/cloudflare" instead.
*/
export interface VectorizeLibArgs extends AsyncCallerParams {
index: VectorizeIndex;
textKey?: string;
}
/**
* @deprecated Install and import from "@langchain/cloudflare" instead.
*
* Type that defines the parameters for the delete operation in the
* CloudflareVectorizeStore class. It includes ids, deleteAll flag, and namespace.
*/
export type VectorizeDeleteParams = {
ids: string[];
};
/**
* @deprecated Install and import from "@langchain/cloudflare" instead.
*
* Class that extends the VectorStore class and provides methods to
* interact with the Cloudflare Vectorize vector database.
*/
export class CloudflareVectorizeStore extends VectorStore {
textKey: string;
namespace?: string;
index: VectorizeIndex;
caller: AsyncCaller;
_vectorstoreType(): string {
return "cloudflare_vectorize";
}
constructor(embeddings: EmbeddingsInterface, args: VectorizeLibArgs) {
super(embeddings, args);
this.embeddings = embeddings;
const { index, textKey, ...asyncCallerArgs } = args;
if (!index) {
throw new Error(
"Must supply a Vectorize index binding, eg { index: env.VECTORIZE }"
);
}
this.index = index;
this.textKey = textKey ?? "text";
this.caller = new AsyncCaller({
maxConcurrency: 6,
maxRetries: 0,
...asyncCallerArgs,
});
}
/**
* Method that adds documents to the Vectorize database.
* @param documents Array of documents to add.
* @param options Optional ids for the documents.
* @returns Promise that resolves with the ids of the added documents.
*/
async addDocuments(
documents: Document[],
options?: { ids?: string[] } | string[]
) {
const texts = documents.map(({ pageContent }) => pageContent);
return this.addVectors(
await this.embeddings.embedDocuments(texts),
documents,
options
);
}
/**
* Method that adds vectors to the Vectorize database.
* @param vectors Array of vectors to add.
* @param documents Array of documents associated with the vectors.
* @param options Optional ids for the vectors.
* @returns Promise that resolves with the ids of the added vectors.
*/
async addVectors(
vectors: number[][],
documents: Document[],
options?: { ids?: string[] } | string[]
) {
const ids = Array.isArray(options) ? options : options?.ids;
const documentIds = ids == null ? documents.map(() => uuid.v4()) : ids;
const vectorizeVectors = vectors.map((values, idx) => {
const metadata: Record<string, VectorizeVectorMetadata> = {
...documents[idx].metadata,
[this.textKey]: documents[idx].pageContent,
};
return {
id: documentIds[idx],
metadata,
values,
};
});
// Stick to a limit of 500 vectors per upsert request
const chunkSize = 500;
const chunkedVectors = chunkArray(vectorizeVectors, chunkSize);
const batchRequests = chunkedVectors.map((chunk) =>
this.caller.call(async () => this.index.upsert(chunk))
);
await Promise.all(batchRequests);
return documentIds;
}
/**
* Method that deletes vectors from the Vectorize database.
* @param params Parameters for the delete operation.
* @returns Promise that resolves when the delete operation is complete.
*/
async delete(params: VectorizeDeleteParams): Promise<void> {
const batchSize = 1000;
const batchedIds = chunkArray(params.ids, batchSize);
const batchRequests = batchedIds.map((batchIds) =>
this.caller.call(async () => this.index.deleteByIds(batchIds))
);
await Promise.all(batchRequests);
}
/**
* Method that performs a similarity search in the Vectorize database and
* returns the results along with their scores.
* @param query Query vector for the similarity search.
* @param k Number of top results to return.
* @returns Promise that resolves with an array of documents and their scores.
*/
async similaritySearchVectorWithScore(
query: number[],
k: number
): Promise<[Document, number][]> {
const results = await this.index.query(query, {
returnVectors: true,
topK: k,
});
const result: [Document, number][] = [];
if (results.matches) {
for (const res of results.matches) {
const { [this.textKey]: pageContent, ...metadata } =
res.vector?.metadata ?? {};
result.push([
new Document({ metadata, pageContent: pageContent as string }),
res.score,
]);
}
}
return result;
}
/**
* Static method that creates a new instance of the CloudflareVectorizeStore class
* from texts.
* @param texts Array of texts to add to the Vectorize database.
* @param metadatas Metadata associated with the texts.
* @param embeddings Embeddings to use for the texts.
* @param dbConfig Configuration for the Vectorize database.
* @param options Optional ids for the vectors.
* @returns Promise that resolves with a new instance of the CloudflareVectorizeStore class.
*/
static async fromTexts(
texts: string[],
metadatas:
| Record<string, VectorizeVectorMetadata>[]
| Record<string, VectorizeVectorMetadata>,
embeddings: EmbeddingsInterface,
dbConfig: VectorizeLibArgs
): Promise<CloudflareVectorizeStore> {
const docs: Document[] = [];
for (let i = 0; i < texts.length; i += 1) {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
const newDoc = new Document({
pageContent: texts[i],
metadata,
});
docs.push(newDoc);
}
return CloudflareVectorizeStore.fromDocuments(docs, embeddings, dbConfig);
}
/**
* Static method that creates a new instance of the CloudflareVectorizeStore class
* from documents.
* @param docs Array of documents to add to the Vectorize database.
* @param embeddings Embeddings to use for the documents.
* @param dbConfig Configuration for the Vectorize database.
* @param options Optional ids for the vectors.
* @returns Promise that resolves with a new instance of the CloudflareVectorizeStore class.
*/
static async fromDocuments(
docs: Document[],
embeddings: EmbeddingsInterface,
dbConfig: VectorizeLibArgs
): Promise<CloudflareVectorizeStore> {
const instance = new this(embeddings, dbConfig);
await instance.addDocuments(docs);
return instance;
}
/**
* Static method that creates a new instance of the CloudflareVectorizeStore class
* from an existing index.
* @param embeddings Embeddings to use for the documents.
* @param dbConfig Configuration for the Vectorize database.
* @returns Promise that resolves with a new instance of the CloudflareVectorizeStore class.
*/
static async fromExistingIndex(
embeddings: EmbeddingsInterface,
dbConfig: VectorizeLibArgs
): Promise<CloudflareVectorizeStore> {
const instance = new this(embeddings, dbConfig);
return instance;
}
}