-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
zep.ts
427 lines (389 loc) Β· 13.9 KB
/
zep.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
import {
DocumentCollection,
IDocument,
NotFoundError,
ZepClient,
} from "@getzep/zep-js";
import {
MaxMarginalRelevanceSearchOptions,
VectorStore,
} from "@langchain/core/vectorstores";
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
import { Document } from "@langchain/core/documents";
import { Callbacks } from "@langchain/core/callbacks/manager";
import { maximalMarginalRelevance } from "@langchain/core/utils/math";
import { FakeEmbeddings } from "../utils/testing.js";
/**
* Interface for the arguments required to initialize a ZepVectorStore
* instance.
*/
export interface IZepArgs {
collection: DocumentCollection;
}
/**
* Interface for the configuration options for a ZepVectorStore instance.
*/
export interface IZepConfig {
apiUrl: string;
apiKey?: string;
collectionName: string;
description?: string;
metadata?: Record<string, never>;
embeddingDimensions?: number;
isAutoEmbedded?: boolean;
}
/**
* Interface for the parameters required to delete documents from a
* ZepVectorStore instance.
*/
export interface IZepDeleteParams {
uuids: string[];
}
/**
* ZepVectorStore is a VectorStore implementation that uses the Zep long-term memory store as a backend.
*
* If the collection does not exist, it will be created automatically.
*
* Requires `zep-js` to be installed:
* ```bash
* npm install @getzep/zep-js
* ```
*
* @property {ZepClient} client - The ZepClient instance used to interact with Zep's API.
* @property {Promise<void>} initPromise - A promise that resolves when the collection is initialized.
* @property {DocumentCollection} collection - The Zep document collection.
*/
export class ZepVectorStore extends VectorStore {
public client: ZepClient;
public collection: DocumentCollection;
private initPromise: Promise<void>;
private autoEmbed = false;
constructor(embeddings: EmbeddingsInterface, args: IZepConfig) {
super(embeddings, args);
this.embeddings = embeddings;
// eslint-disable-next-line no-instanceof/no-instanceof
if (this.embeddings instanceof FakeEmbeddings) {
this.autoEmbed = true;
}
this.initPromise = this.initCollection(args).catch((err) => {
console.error("Error initializing collection:", err);
throw err;
});
}
/**
* Initializes the document collection. If the collection does not exist, it creates a new one.
*
* @param {IZepConfig} args - The configuration object for the Zep API.
*/
private async initCollection(args: IZepConfig) {
this.client = await ZepClient.init(args.apiUrl, args.apiKey);
try {
this.collection = await this.client.document.getCollection(
args.collectionName
);
// If the Embedding passed in is fake, but the collection is not auto embedded, throw an error
// eslint-disable-next-line no-instanceof/no-instanceof
if (!this.collection.is_auto_embedded && this.autoEmbed) {
throw new Error(`You can't pass in FakeEmbeddings when collection ${args.collectionName}
is not set to auto-embed.`);
}
} catch (err) {
// eslint-disable-next-line no-instanceof/no-instanceof
if (err instanceof Error) {
// eslint-disable-next-line no-instanceof/no-instanceof
if (err instanceof NotFoundError || err.name === "NotFoundError") {
await this.createCollection(args);
} else {
throw err;
}
}
}
}
/**
* Creates a new document collection.
*
* @param {IZepConfig} args - The configuration object for the Zep API.
*/
private async createCollection(args: IZepConfig) {
if (!args.embeddingDimensions) {
throw new Error(`Collection ${args.collectionName} not found.
You can create a new Collection by providing embeddingDimensions.`);
}
this.collection = await this.client.document.addCollection({
name: args.collectionName,
description: args.description,
metadata: args.metadata,
embeddingDimensions: args.embeddingDimensions,
isAutoEmbedded: this.autoEmbed,
});
console.info("Created new collection:", args.collectionName);
}
/**
* Adds vectors and corresponding documents to the collection.
*
* @param {number[][]} vectors - The vectors to add.
* @param {Document[]} documents - The corresponding documents to add.
* @returns {Promise<string[]>} - A promise that resolves with the UUIDs of the added documents.
*/
async addVectors(
vectors: number[][],
documents: Document[]
): Promise<string[]> {
if (!this.autoEmbed && vectors.length === 0) {
throw new Error(`Vectors must be provided if autoEmbed is false`);
}
if (!this.autoEmbed && vectors.length !== documents.length) {
throw new Error(`Vectors and documents must have the same length`);
}
const docs: Array<IDocument> = [];
for (let i = 0; i < documents.length; i += 1) {
const doc: IDocument = {
content: documents[i].pageContent,
metadata: documents[i].metadata,
embedding: vectors.length > 0 ? vectors[i] : undefined,
};
docs.push(doc);
}
// Wait for collection to be initialized
await this.initPromise;
return await this.collection.addDocuments(docs);
}
/**
* Adds documents to the collection. The documents are first embedded into vectors
* using the provided embedding model.
*
* @param {Document[]} documents - The documents to add.
* @returns {Promise<string[]>} - A promise that resolves with the UUIDs of the added documents.
*/
async addDocuments(documents: Document[]): Promise<string[]> {
const texts = documents.map(({ pageContent }) => pageContent);
let vectors: number[][] = [];
if (!this.autoEmbed) {
vectors = await this.embeddings.embedDocuments(texts);
}
return this.addVectors(vectors, documents);
}
_vectorstoreType(): string {
return "zep";
}
/**
* Deletes documents from the collection.
*
* @param {IZepDeleteParams} params - The list of Zep document UUIDs to delete.
* @returns {Promise<void>}
*/
async delete(params: IZepDeleteParams): Promise<void> {
// Wait for collection to be initialized
await this.initPromise;
for (const uuid of params.uuids) {
await this.collection.deleteDocument(uuid);
}
}
/**
* Performs a similarity search in the collection and returns the results with their scores.
*
* @param {number[]} query - The query vector.
* @param {number} k - The number of results to return.
* @param {Record<string, unknown>} filter - The filter to apply to the search. Zep only supports Record<string, unknown> as filter.
* @returns {Promise<[Document, number][]>} - A promise that resolves with the search results and their scores.
*/
async similaritySearchVectorWithScore(
query: number[],
k: number,
filter?: Record<string, unknown> | undefined
): Promise<[Document, number][]> {
await this.initPromise;
const results = await this.collection.search(
{
embedding: new Float32Array(query),
metadata: assignMetadata(filter),
},
k
);
return zepDocsToDocumentsAndScore(results);
}
async _similaritySearchWithScore(
query: string,
k: number,
filter?: Record<string, unknown> | undefined
): Promise<[Document, number][]> {
await this.initPromise;
const results = await this.collection.search(
{
text: query,
metadata: assignMetadata(filter),
},
k
);
return zepDocsToDocumentsAndScore(results);
}
async similaritySearchWithScore(
query: string,
k = 4,
filter: Record<string, unknown> | undefined = undefined,
_callbacks = undefined // implement passing to embedQuery later
): Promise<[Document, number][]> {
if (this.autoEmbed) {
return this._similaritySearchWithScore(query, k, filter);
} else {
return this.similaritySearchVectorWithScore(
await this.embeddings.embedQuery(query),
k,
filter
);
}
}
/**
* Performs a similarity search on the Zep collection.
*
* @param {string} query - The query string to search for.
* @param {number} [k=4] - The number of results to return. Defaults to 4.
* @param {this["FilterType"] | undefined} [filter=undefined] - An optional set of JSONPath filters to apply to the search.
* @param {Callbacks | undefined} [_callbacks=undefined] - Optional callbacks. Currently not implemented.
* @returns {Promise<Document[]>} - A promise that resolves to an array of Documents that are similar to the query.
*
* @async
*/
async similaritySearch(
query: string,
k = 4,
filter: this["FilterType"] | undefined = undefined,
_callbacks: Callbacks | undefined = undefined // implement passing to embedQuery later
): Promise<Document[]> {
await this.initPromise;
let results: [Document, number][];
if (this.autoEmbed) {
const zepResults = await this.collection.search(
{ text: query, metadata: assignMetadata(filter) },
k
);
results = zepDocsToDocumentsAndScore(zepResults);
} else {
results = await this.similaritySearchVectorWithScore(
await this.embeddings.embedQuery(query),
k,
assignMetadata(filter)
);
}
return results.map((result) => result[0]);
}
/**
* Return documents selected using the maximal marginal relevance.
* Maximal marginal relevance optimizes for similarity to the query AND diversity
* among selected documents.
*
* @param {string} query - Text to look up documents similar to.
* @param options
* @param {number} options.k - Number of documents to return.
* @param {number} options.fetchK=20- Number of documents to fetch before passing to the MMR algorithm.
* @param {number} options.lambda=0.5 - Number between 0 and 1 that determines the degree of diversity among the results,
* where 0 corresponds to maximum diversity and 1 to minimum diversity.
* @param {Record<string, any>} options.filter - Optional Zep JSONPath query to pre-filter on document metadata field
*
* @returns {Promise<Document[]>} - List of documents selected by maximal marginal relevance.
*/
async maxMarginalRelevanceSearch(
query: string,
options: MaxMarginalRelevanceSearchOptions<this["FilterType"]>
): Promise<Document[]> {
const { k, fetchK = 20, lambda = 0.5, filter } = options;
let queryEmbedding: number[];
let zepResults: IDocument[];
if (!this.autoEmbed) {
queryEmbedding = await this.embeddings.embedQuery(query);
zepResults = await this.collection.search(
{
embedding: new Float32Array(queryEmbedding),
metadata: assignMetadata(filter),
},
fetchK
);
} else {
let queryEmbeddingArray: Float32Array;
[zepResults, queryEmbeddingArray] =
await this.collection.searchReturnQueryVector(
{ text: query, metadata: assignMetadata(filter) },
fetchK
);
queryEmbedding = Array.from(queryEmbeddingArray);
}
const results = zepDocsToDocumentsAndScore(zepResults);
const embeddingList = zepResults.map((doc) =>
Array.from(doc.embedding ? doc.embedding : [])
);
const mmrIndexes = maximalMarginalRelevance(
queryEmbedding,
embeddingList,
lambda,
k
);
return mmrIndexes.filter((idx) => idx !== -1).map((idx) => results[idx][0]);
}
/**
* Creates a new ZepVectorStore instance from an array of texts. Each text is converted into a Document and added to the collection.
*
* @param {string[]} texts - The texts to convert into Documents.
* @param {object[] | object} metadatas - The metadata to associate with each Document. If an array is provided, each element is associated with the corresponding Document. If an object is provided, it is associated with all Documents.
* @param {Embeddings} embeddings - The embeddings to use for vectorizing the texts.
* @param {IZepConfig} zepConfig - The configuration object for the Zep API.
* @returns {Promise<ZepVectorStore>} - A promise that resolves with the new ZepVectorStore instance.
*/
static async fromTexts(
texts: string[],
metadatas: object[] | object,
embeddings: EmbeddingsInterface,
zepConfig: IZepConfig
): Promise<ZepVectorStore> {
const docs: Document[] = [];
for (let i = 0; i < texts.length; i += 1) {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
const newDoc = new Document({
pageContent: texts[i],
metadata,
});
docs.push(newDoc);
}
return ZepVectorStore.fromDocuments(docs, embeddings, zepConfig);
}
/**
* Creates a new ZepVectorStore instance from an array of Documents. Each Document is added to a Zep collection.
*
* @param {Document[]} docs - The Documents to add.
* @param {Embeddings} embeddings - The embeddings to use for vectorizing the Document contents.
* @param {IZepConfig} zepConfig - The configuration object for the Zep API.
* @returns {Promise<ZepVectorStore>} - A promise that resolves with the new ZepVectorStore instance.
*/
static async fromDocuments(
docs: Document[],
embeddings: EmbeddingsInterface,
zepConfig: IZepConfig
): Promise<ZepVectorStore> {
const instance = new this(embeddings, zepConfig);
// Wait for collection to be initialized
await instance.initPromise;
await instance.addDocuments(docs);
return instance;
}
}
function zepDocsToDocumentsAndScore(
results: IDocument[]
): [Document, number][] {
return results.map((d) => [
new Document({
pageContent: d.content,
metadata: d.metadata,
}),
d.score ? d.score : 0,
]);
}
function assignMetadata(
value: string | Record<string, unknown> | object | undefined
): Record<string, unknown> | undefined {
if (typeof value === "object" && value !== null) {
return value as Record<string, unknown>;
}
if (value !== undefined) {
console.warn("Metadata filters must be an object, Record, or undefined.");
}
return undefined;
}