-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
memory.ts
192 lines (174 loc) Β· 6.89 KB
/
memory.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import { similarity as ml_distance_similarity } from "ml-distance";
import { VectorStore } from "@langchain/core/vectorstores";
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
import { Document } from "@langchain/core/documents";
/**
* Interface representing a vector in memory. It includes the content
* (text), the corresponding embedding (vector), and any associated
* metadata.
*/
interface MemoryVector {
content: string;
embedding: number[];
// eslint-disable-next-line @typescript-eslint/no-explicit-any
metadata: Record<string, any>;
}
/**
* Interface for the arguments that can be passed to the
* `MemoryVectorStore` constructor. It includes an optional `similarity`
* function.
*/
export interface MemoryVectorStoreArgs {
similarity?: typeof ml_distance_similarity.cosine;
}
/**
* Class that extends `VectorStore` to store vectors in memory. Provides
* methods for adding documents, performing similarity searches, and
* creating instances from texts, documents, or an existing index.
*/
export class MemoryVectorStore extends VectorStore {
declare FilterType: (doc: Document) => boolean;
memoryVectors: MemoryVector[] = [];
similarity: typeof ml_distance_similarity.cosine;
_vectorstoreType(): string {
return "memory";
}
constructor(
embeddings: EmbeddingsInterface,
{ similarity, ...rest }: MemoryVectorStoreArgs = {}
) {
super(embeddings, rest);
this.similarity = similarity ?? ml_distance_similarity.cosine;
}
/**
* Method to add documents to the memory vector store. It extracts the
* text from each document, generates embeddings for them, and adds the
* resulting vectors to the store.
* @param documents Array of `Document` instances to be added to the store.
* @returns Promise that resolves when all documents have been added.
*/
async addDocuments(documents: Document[]): Promise<void> {
const texts = documents.map(({ pageContent }) => pageContent);
return this.addVectors(
await this.embeddings.embedDocuments(texts),
documents
);
}
/**
* Method to add vectors to the memory vector store. It creates
* `MemoryVector` instances for each vector and document pair and adds
* them to the store.
* @param vectors Array of vectors to be added to the store.
* @param documents Array of `Document` instances corresponding to the vectors.
* @returns Promise that resolves when all vectors have been added.
*/
async addVectors(vectors: number[][], documents: Document[]): Promise<void> {
const memoryVectors = vectors.map((embedding, idx) => ({
content: documents[idx].pageContent,
embedding,
metadata: documents[idx].metadata,
}));
this.memoryVectors = this.memoryVectors.concat(memoryVectors);
}
/**
* Method to perform a similarity search in the memory vector store. It
* calculates the similarity between the query vector and each vector in
* the store, sorts the results by similarity, and returns the top `k`
* results along with their scores.
* @param query Query vector to compare against the vectors in the store.
* @param k Number of top results to return.
* @param filter Optional filter function to apply to the vectors before performing the search.
* @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score.
*/
async similaritySearchVectorWithScore(
query: number[],
k: number,
filter?: this["FilterType"]
): Promise<[Document, number][]> {
const filterFunction = (memoryVector: MemoryVector) => {
if (!filter) {
return true;
}
const doc = new Document({
metadata: memoryVector.metadata,
pageContent: memoryVector.content,
});
return filter(doc);
};
const filteredMemoryVectors = this.memoryVectors.filter(filterFunction);
const searches = filteredMemoryVectors
.map((vector, index) => ({
similarity: this.similarity(query, vector.embedding),
index,
}))
.sort((a, b) => (a.similarity > b.similarity ? -1 : 0))
.slice(0, k);
const result: [Document, number][] = searches.map((search) => [
new Document({
metadata: filteredMemoryVectors[search.index].metadata,
pageContent: filteredMemoryVectors[search.index].content,
}),
search.similarity,
]);
return result;
}
/**
* Static method to create a `MemoryVectorStore` instance from an array of
* texts. It creates a `Document` for each text and metadata pair, and
* adds them to the store.
* @param texts Array of texts to be added to the store.
* @param metadatas Array or single object of metadata corresponding to the texts.
* @param embeddings `Embeddings` instance used to generate embeddings for the texts.
* @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance.
* @returns Promise that resolves with a new `MemoryVectorStore` instance.
*/
static async fromTexts(
texts: string[],
metadatas: object[] | object,
embeddings: EmbeddingsInterface,
dbConfig?: MemoryVectorStoreArgs
): Promise<MemoryVectorStore> {
const docs: Document[] = [];
for (let i = 0; i < texts.length; i += 1) {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
const newDoc = new Document({
pageContent: texts[i],
metadata,
});
docs.push(newDoc);
}
return MemoryVectorStore.fromDocuments(docs, embeddings, dbConfig);
}
/**
* Static method to create a `MemoryVectorStore` instance from an array of
* `Document` instances. It adds the documents to the store.
* @param docs Array of `Document` instances to be added to the store.
* @param embeddings `Embeddings` instance used to generate embeddings for the documents.
* @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance.
* @returns Promise that resolves with a new `MemoryVectorStore` instance.
*/
static async fromDocuments(
docs: Document[],
embeddings: EmbeddingsInterface,
dbConfig?: MemoryVectorStoreArgs
): Promise<MemoryVectorStore> {
const instance = new this(embeddings, dbConfig);
await instance.addDocuments(docs);
return instance;
}
/**
* Static method to create a `MemoryVectorStore` instance from an existing
* index. It creates a new `MemoryVectorStore` instance without adding any
* documents or vectors.
* @param embeddings `Embeddings` instance used to generate embeddings for the documents.
* @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance.
* @returns Promise that resolves with a new `MemoryVectorStore` instance.
*/
static async fromExistingIndex(
embeddings: EmbeddingsInterface,
dbConfig?: MemoryVectorStoreArgs
): Promise<MemoryVectorStore> {
const instance = new this(embeddings, dbConfig);
return instance;
}
}