Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UPDATED: Added support for Azure #6

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,13 @@ Here is a basic example of how to use the VectorStorage class:
import { VectorStorage } from "vector-storage";

// Create an instance of VectorStorage
const vectorStore = new VectorStorage({ openAIApiKey: "your-openai-api-key" });
const vectorStore = new VectorStorage({
azureProxy: "your-azure-endpoint-that-manages-your-api"
azureEndpoint: "your-azure-endpoint",
azureApiKey: "your-azure-api-key",
openAIEndpoint: "https://api.openai.com/v1/embeddings",
openAIApiKey: "your-openai-api-key"
});

// Add a text document to the store
await vectorStore.addText("The quick brown fox jumps over the lazy dog.", {
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
"build": "tsc",
"bump": "npm version patch",
"package": "npm run lint && npm run build && npm run bump && npm publish",
"lint": "eslint --ext .ts src"
"lint": "eslint --ext .ts src",
"prepare": "npm run build"
},
"author": "Nitai Aharoni",
"license": "MIT",
Expand Down
106 changes: 77 additions & 29 deletions src/VectorStorage.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { ICreateEmbeddingResponse } from './types/ICreateEmbeddingResponse';
import { IDBPDatabase, openDB } from 'idb';
import { IVSDocument, IVSSimilaritySearchItem } from './types/IVSDocument';
import { IVSFilterOptions } from './types/IVSFilterOptions';
import { IVSOptions } from './types/IVSOptions';
import { IVSSimilaritySearchParams } from './types/IVSSimilaritySearchParams';
import { constants } from './common/constants';
Expand All @@ -12,22 +13,30 @@ export class VectorStorage<T> {
private readonly maxSizeInMB: number;
private readonly debounceTime: number;
private readonly openaiModel: string;
private readonly openaiEndpoint?: string;
private readonly openaiApiKey?: string;
private readonly azureEndpoint?: string;
private readonly azureApiKey?: string;
private readonly azureProxy?: string;
private readonly embedTextsFn: (texts: string[]) => Promise<number[][]>;

constructor(options: IVSOptions = {}) {
this.maxSizeInMB = options.maxSizeInMB ?? constants.DEFAULT_MAX_SIZE_IN_MB;
this.debounceTime = options.debounceTime ?? constants.DEFAULT_DEBOUNCE_TIME;
this.openaiModel = options.openaiModel ?? constants.DEFAULT_OPENAI_MODEL;
this.embedTextsFn = options.embedTextsFn ?? this.embedTexts; // Use the custom function if provided, else use the default one
this.openaiEndpoint = options.openAIEndpoint;
this.openaiApiKey = options.openAIApiKey;
if (!this.openaiApiKey && !options.embedTextsFn) {
console.error('VectorStorage: pass as an option either an OpenAI API key or a custom embedTextsFn function.');
this.azureEndpoint = options.azureEndpoint;
this.azureApiKey = options.azureApiKey;
this.azureProxy = options.azureProxy;
if (!this.azureProxy && !this.azureApiKey && !this.openaiApiKey && !options.embedTextsFn) {
console.error('VectorStorage: pass as an option either an api key or a custom embedTextsFn function.');
} else {
this.loadFromIndexDbStorage();
}
}

public async addText(text: string, metadata: T): Promise<IVSDocument<T>> {
// Create a document from the text and metadata
const doc: IVSDocument<T> = {
Expand All @@ -40,7 +49,7 @@ export class VectorStorage<T> {
const docs = await this.addDocuments([doc]);
return docs[0];
}

public async addTexts(texts: string[], metadatas: T[]): Promise<Array<IVSDocument<T>>> {
if (texts.length !== metadatas.length) {
throw new Error('The lengths of texts and metadata arrays must match.');
Expand All @@ -54,7 +63,7 @@ export class VectorStorage<T> {
}));
return await this.addDocuments(docs);
}

public async similaritySearch(params: IVSSimilaritySearchParams): Promise<{
similarItems: Array<IVSSimilaritySearchItem<T>>;
query: { text: string; embedding: number[] };
Expand Down Expand Up @@ -83,6 +92,19 @@ export class VectorStorage<T> {
};
}

public async clearAll(): Promise<void> {
while (this.documents.length > 0) {
this.documents.shift();
}
await this.saveToIndexDbStorage();
}

public async retainMatching(filterOptions: IVSFilterOptions): Promise<void> {
const filteredDocuments = filterDocuments(this.documents, filterOptions);
this.documents = filteredDocuments;
await this.saveToIndexDbStorage();
}

private async initDB(): Promise<IDBPDatabase<any>> {
return await openDB<any>('VectorStorageDatabase', undefined, {
upgrade(db) {
Expand All @@ -99,7 +121,7 @@ export class VectorStorage<T> {
},
});
}

private async addDocuments(documents: Array<IVSDocument<T>>): Promise<Array<IVSDocument<T>>> {
// filter out already existing documents
const newDocuments = documents.filter((doc) => !this.documents.some((d) => d.text === doc.text));
Expand All @@ -120,37 +142,63 @@ export class VectorStorage<T> {
await this.saveToIndexDbStorage();
return newDocuments;
}

private async embedTexts(texts: string[]): Promise<number[][]> {
const response = await fetch(constants.OPENAI_API_URL, {
body: JSON.stringify({
input: texts,
model: this.openaiModel,
}),
headers: {
Authorization: `Bearer ${this.openaiApiKey}`,
'Content-Type': 'application/json',
},
method: 'POST',
});

let response;
if (this.azureProxy) {
response = await fetch(this.azureProxy, {
body: JSON.stringify({
model: this.openaiModel,
texts,
}),
headers: {
'Content-Type': 'application/json',
},
method: 'POST',
});
} else if (this.azureEndpoint && this.azureApiKey) {
response = await fetch(this.azureEndpoint, {
body: JSON.stringify({
input: texts,
model: this.openaiModel,
}),
headers: {
'Content-Type': 'application/json',
'api-key': this.azureApiKey,
},
method: 'POST',
});
} else {
response = await fetch(this.openaiEndpoint ?? constants.OPENAI_API_URL, {
body: JSON.stringify({
input: texts,
model: this.openaiModel,
}),
headers: {
Authorization: `Bearer ${this.openaiApiKey}`,
'Content-Type': 'application/json',
},
method: 'POST',
});
}

if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}

const responseData = (await response.json()) as ICreateEmbeddingResponse;
return responseData.data.map((data) => data.embedding);
}

private async embedText(query: string): Promise<number[]> {
return (await this.embedTextsFn([query]))[0];
}

private calculateMagnitude(embedding: number[]): number {
const queryMagnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
return queryMagnitude;
}

private calculateSimilarityScores(filteredDocuments: Array<IVSDocument<T>>, queryVector: number[], queryMagnitude: number): Array<[IVSDocument<T>, number]> {
return filteredDocuments.map((doc) => {
const dotProduct = doc.vector!.reduce((sum, val, i) => sum + val * queryVector[i], 0);
Expand All @@ -159,21 +207,21 @@ export class VectorStorage<T> {
return [doc, score];
});
}

private updateHitCounters(results: Array<IVSDocument<T>>): void {
results.forEach((doc) => {
doc.hits = (doc.hits ?? 0) + 1; // Update hit counter
});
}

private async loadFromIndexDbStorage(): Promise<void> {
if (!this.db) {
this.db = await this.initDB();
}
this.documents = await this.db.getAll('documents');
this.removeDocsLRU();
}

private async saveToIndexDbStorage(): Promise<void> {
if (!this.db) {
this.db = await this.initDB();
Expand All @@ -190,12 +238,12 @@ export class VectorStorage<T> {
console.error('Failed to save to IndexedDB:', error.message);
}
}

private removeDocsLRU(): void {
if (getObjectSizeInMB(this.documents) > this.maxSizeInMB) {
// Sort documents by hit counter (ascending) and then by timestamp (ascending)
this.documents.sort((a, b) => (a.hits ?? 0) - (b.hits ?? 0) || a.timestamp - b.timestamp);

// Remove documents until the size is below the limit
while (getObjectSizeInMB(this.documents) > this.maxSizeInMB) {
this.documents.shift();
Expand Down
8 changes: 5 additions & 3 deletions src/common/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ export function filterDocuments(documents: Array<IVSDocument<any>>, filterOption
function matchesCriteria(document: IVSDocument<any>, criteria: IVSFilterCriteria): boolean {
if (criteria.metadata) {
for (const key in criteria.metadata) {
if (document.metadata[key] !== criteria.metadata[key]) {
return false;
}
if (Array.isArray(criteria.metadata[key])) {
if (!criteria.metadata[key].includes(document.metadata[key])) {
return false;
}
} else if (document.metadata[key] !== criteria.metadata[key]) return false;
}
}
if (criteria.text) {
Expand Down
4 changes: 4 additions & 0 deletions src/types/IVSOptions.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
export interface IVSOptions {
azureProxy?: string; // Azure endpoint which manages your api key
azureEndpoint?: string; // Azure endpoint
azureApiKey?: string; // Azure api key
openAIEndpoint?: string; // The OpenAI API key used for generating embeddings.
openAIApiKey?: string; // The OpenAI API key used for generating embeddings.
maxSizeInMB?: number; // The maximum size of the storage in megabytes. Defaults to 4.8. Cannot exceed 5.
debounceTime?: number; // The debounce time in milliseconds for saving to local storage. Defaults to 0.
Expand Down