-
Notifications
You must be signed in to change notification settings - Fork 295
/
PineconeVectorStore.ts
229 lines (202 loc) · 6.4 KB
/
PineconeVectorStore.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
import {
VectorStoreBase,
type ExactMatchFilter,
type IEmbedModel,
type MetadataFilters,
type VectorStoreNoEmbedModel,
type VectorStoreQuery,
type VectorStoreQueryResult,
} from "./types.js";
import { getEnv } from "@llamaindex/env";
import type {
FetchResponse,
Index,
ScoredPineconeRecord,
} from "@pinecone-database/pinecone";
import { type Pinecone } from "@pinecone-database/pinecone";
import type { BaseNode, Metadata } from "../../Node.js";
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
type PineconeParams = {
indexName?: string;
chunkSize?: number;
namespace?: string;
textKey?: string;
} & IEmbedModel;
/**
* Provides support for writing and querying vector data in Pinecone.
*/
export class PineconeVectorStore
extends VectorStoreBase
implements VectorStoreNoEmbedModel
{
storesText: boolean = true;
/*
FROM @pinecone-database/pinecone:
PINECONE_API_KEY="your_api_key"
PINECONE_ENVIRONMENT="your_environment"
Our addition:
PINECONE_INDEX_NAME="llama"
PINECONE_CHUNK_SIZE=100
*/
db?: Pinecone;
indexName: string;
namespace: string;
chunkSize: number;
textKey: string;
constructor(params?: PineconeParams) {
super(params?.embedModel);
this.indexName =
params?.indexName ?? getEnv("PINECONE_INDEX_NAME") ?? "llama";
this.namespace = params?.namespace ?? getEnv("PINECONE_NAMESPACE") ?? "";
this.chunkSize =
params?.chunkSize ??
Number.parseInt(getEnv("PINECONE_CHUNK_SIZE") ?? "100");
this.textKey = params?.textKey ?? "text";
}
private async getDb(): Promise<Pinecone> {
if (!this.db) {
const { Pinecone } = await import("@pinecone-database/pinecone");
this.db = await new Pinecone();
}
return Promise.resolve(this.db);
}
/**
* Connects to the Pinecone account specified in environment vars.
* This method also checks and creates the named index if not found.
* @returns Pinecone client, or the error encountered while connecting/setting up.
*/
client() {
return this.getDb();
}
async index() {
const db: Pinecone = await this.getDb();
return db.index(this.indexName).namespace(this.namespace);
}
/**
* Delete all records for the current index.
* NOTE: This operation is not supported by Pinecone for "Starter" (free) indexes.
* @returns The result of the delete query.
*/
async clearIndex() {
const idx = await this.index();
return await idx.deleteAll();
}
/**
* Adds vector record(s) to the table.
* @TODO Does not create or insert sparse vectors.
* @param embeddingResults The Nodes to be inserted, optionally including metadata tuples.
* @returns Due to limitations in the Pinecone client, does not return the upserted ID list, only a Promise resolve/reject.
*/
async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> {
if (embeddingResults.length == 0) {
return Promise.resolve([]);
}
const idx: Index = await this.index();
const nodes = embeddingResults.map(this.nodeToRecord);
for (let i = 0; i < nodes.length; i += this.chunkSize) {
const chunk = nodes.slice(i, i + this.chunkSize);
const result = await this.saveChunk(idx, chunk);
if (!result) {
return Promise.reject();
}
}
return Promise.resolve([]);
}
protected async saveChunk(idx: Index, chunk: any) {
try {
await idx.upsert(chunk);
return true;
} catch (err) {
const msg = `${err}`;
console.log(msg, err);
return false;
}
}
/**
* Deletes a single record from the database by id.
* NOTE: Uses the collection property controlled by setCollection/getCollection.
* @param refDocId Unique identifier for the record to delete.
* @param deleteKwargs Required by VectorStore interface. Currently ignored.
* @returns Promise that resolves if the delete query did not throw an error.
*/
async delete(refDocId: string, deleteKwargs?: any): Promise<void> {
const idx = await this.index();
return idx.deleteOne(refDocId);
}
/**
* Query the vector store for the closest matching data to the query embeddings
* @TODO QUERY TYPES
* @param query The VectorStoreQuery to be used
* @param options Required by VectorStore interface. Currently ignored.
* @returns Zero or more Document instances with data from the vector store.
*/
async query(
query: VectorStoreQuery,
_options?: any,
): Promise<VectorStoreQueryResult> {
const filter = this.toPineconeFilter(query.filters);
const defaultOptions: any = {
vector: query.queryEmbedding,
topK: query.similarityTopK,
includeValues: true,
includeMetadata: true,
filter: filter,
};
const idx = await this.index();
const results = await idx.query(defaultOptions);
const idList = results.matches.map((row) => row.id);
const records: FetchResponse<any> = await idx.fetch(idList);
const rows = Object.values(records.records);
const nodes = rows.map((row) => {
const node = metadataDictToNode(row.metadata, {
fallback: {
id: row.id,
text: this.textFromResultRow(row),
metadata: this.metaWithoutText(row.metadata),
embedding: row.values,
},
});
return node;
});
const ret = {
nodes: nodes,
similarities: results.matches.map((row) => row.score || 999),
ids: results.matches.map((row) => row.id),
};
return Promise.resolve(ret);
}
/**
* Required by VectorStore interface. Currently ignored.
* @param persistPath
* @param fs
* @returns Resolved Promise.
*/
persist(persistPath: string): Promise<void> {
return Promise.resolve();
}
toPineconeFilter(stdFilters?: MetadataFilters) {
return stdFilters?.filters?.reduce((carry: any, item: ExactMatchFilter) => {
carry[item.key] = item.value;
return carry;
}, {});
}
textFromResultRow(row: ScoredPineconeRecord<Metadata>): string {
return row.metadata?.[this.textKey] ?? "";
}
metaWithoutText(meta: Metadata): any {
return Object.keys(meta)
.filter((key) => key != this.textKey)
.reduce((acc: any, key: string) => {
acc[key] = meta[key];
return acc;
}, {});
}
nodeToRecord(node: BaseNode<Metadata>) {
const id: any = node.id_.length ? node.id_ : null;
return {
id: id,
values: node.getEmbedding(),
metadata: nodeToMetadata(node),
};
}
}