-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
lancedb.ts
152 lines (139 loc) Β· 4.65 KB
/
lancedb.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import { Table } from "vectordb";
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
import { VectorStore } from "@langchain/core/vectorstores";
import { Document } from "@langchain/core/documents";
/**
* Defines the arguments for the LanceDB class constructor. It includes a
* table and an optional textKey.
*/
export type LanceDBArgs = {
table: Table;
textKey?: string;
};
/**
* A wrapper for an open-source database for vector-search with persistent
* storage. It simplifies retrieval, filtering, and management of
* embeddings.
*/
export class LanceDB extends VectorStore {
private table: Table;
private textKey: string;
constructor(embeddings: EmbeddingsInterface, args: LanceDBArgs) {
super(embeddings, args);
this.table = args.table;
this.embeddings = embeddings;
this.textKey = args.textKey || "text";
}
/**
* Adds documents to the database.
* @param documents The documents to be added.
* @returns A Promise that resolves when the documents have been added.
*/
async addDocuments(documents: Document[]): Promise<void> {
const texts = documents.map(({ pageContent }) => pageContent);
return this.addVectors(
await this.embeddings.embedDocuments(texts),
documents
);
}
_vectorstoreType(): string {
return "lancedb";
}
/**
* Adds vectors and their corresponding documents to the database.
* @param vectors The vectors to be added.
* @param documents The corresponding documents to be added.
* @returns A Promise that resolves when the vectors and documents have been added.
*/
async addVectors(vectors: number[][], documents: Document[]): Promise<void> {
if (vectors.length === 0) {
return;
}
if (vectors.length !== documents.length) {
throw new Error(`Vectors and documents must have the same length`);
}
const data: Array<Record<string, unknown>> = [];
for (let i = 0; i < documents.length; i += 1) {
const record = {
vector: vectors[i],
[this.textKey]: documents[i].pageContent,
};
Object.keys(documents[i].metadata).forEach((metaKey) => {
record[metaKey] = documents[i].metadata[metaKey];
});
data.push(record);
}
await this.table.add(data);
}
/**
* Performs a similarity search on the vectors in the database and returns
* the documents and their scores.
* @param query The query vector.
* @param k The number of results to return.
* @returns A Promise that resolves with an array of tuples, each containing a Document and its score.
*/
async similaritySearchVectorWithScore(
query: number[],
k: number
): Promise<[Document, number][]> {
const results = await this.table.search(query).limit(k).execute();
const docsAndScore: [Document, number][] = [];
results.forEach((item) => {
const metadata: Record<string, unknown> = {};
Object.keys(item).forEach((key) => {
if (key !== "vector" && key !== "score" && key !== this.textKey) {
metadata[key] = item[key];
}
});
docsAndScore.push([
new Document({
pageContent: item[this.textKey] as string,
metadata,
}),
item.score as number,
]);
});
return docsAndScore;
}
/**
* Creates a new instance of LanceDB from texts.
* @param texts The texts to be converted into documents.
* @param metadatas The metadata for the texts.
* @param embeddings The embeddings to be managed.
* @param dbConfig The configuration for the LanceDB instance.
* @returns A Promise that resolves with a new instance of LanceDB.
*/
static async fromTexts(
texts: string[],
metadatas: object[] | object,
embeddings: EmbeddingsInterface,
dbConfig: LanceDBArgs
): Promise<LanceDB> {
const docs: Document[] = [];
for (let i = 0; i < texts.length; i += 1) {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
const newDoc = new Document({
pageContent: texts[i],
metadata,
});
docs.push(newDoc);
}
return LanceDB.fromDocuments(docs, embeddings, dbConfig);
}
/**
* Creates a new instance of LanceDB from documents.
* @param docs The documents to be added to the database.
* @param embeddings The embeddings to be managed.
* @param dbConfig The configuration for the LanceDB instance.
* @returns A Promise that resolves with a new instance of LanceDB.
*/
static async fromDocuments(
docs: Document[],
embeddings: EmbeddingsInterface,
dbConfig: LanceDBArgs
): Promise<LanceDB> {
const instance = new this(embeddings, dbConfig);
await instance.addDocuments(docs);
return instance;
}
}