Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add pinecone support to create llama #555

Merged
merged 7 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/happy-monkeys-cross.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"llamaindex": patch
---

fix: update pinecone vector store
5 changes: 5 additions & 0 deletions .changeset/pink-tools-look.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

feat: add pinecone support to create llama
2 changes: 1 addition & 1 deletion packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"@datastax/astra-db-ts": "^0.1.4",
"@mistralai/mistralai": "^0.0.10",
"@notionhq/client": "^2.2.14",
"@pinecone-database/pinecone": "^1.1.3",
"@pinecone-database/pinecone": "^2.0.1",
"@qdrant/js-client-rest": "^1.7.0",
"@xenova/transformers": "^2.15.0",
"assemblyai": "^4.2.2",
Expand Down
25 changes: 14 additions & 11 deletions packages/core/src/storage/vectorStore/PineconeVectorStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import {
VectorStoreQueryResult,
} from "./types";

import { BaseNode, Document, Metadata, MetadataMode } from "../../Node";
import { BaseNode, Metadata } from "../../Node";
import { GenericFileSystem } from "../FileSystem";

import {
Expand All @@ -15,6 +15,7 @@ import {
Pinecone,
ScoredPineconeRecord,
} from "@pinecone-database/pinecone";
import { metadataDictToNode, nodeToMetadata } from "./utils";

type PineconeParams = {
indexName?: string;
Expand Down Expand Up @@ -155,12 +156,18 @@ export class PineconeVectorStore implements VectorStore {
const rows = Object.values(records.records);

const nodes = rows.map((row) => {
return new Document({
id_: row.id,
text: this.textFromResultRow(row),
metadata: this.metaWithoutText(row.metadata),
embedding: row.values,
const metadata = this.metaWithoutText(row.metadata);
const text = this.textFromResultRow(row);
const node = metadataDictToNode(metadata, {
fallback: {
id: row.id,
text,
metadata,
embedding: row.values,
},
});
node.setContent(text);
return node;
});

const ret = {
Expand Down Expand Up @@ -207,14 +214,10 @@ export class PineconeVectorStore implements VectorStore {

nodeToRecord(node: BaseNode<Metadata>) {
let id: any = node.id_.length ? node.id_ : null;
let meta: any = node.metadata || {};
meta.create_date = new Date();
meta.text = node.getContent(MetadataMode.EMBED);

return {
id: id,
values: node.getEmbedding(),
metadata: meta,
metadata: nodeToMetadata(node),
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
};
}
}
2 changes: 1 addition & 1 deletion packages/create-llama/helpers/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export type TemplateType = "simple" | "streaming" | "community" | "llamapack";
export type TemplateFramework = "nextjs" | "express" | "fastapi";
export type TemplateEngine = "simple" | "context";
export type TemplateUI = "html" | "shadcn";
export type TemplateVectorDB = "none" | "mongo" | "pg";
export type TemplateVectorDB = "none" | "mongo" | "pg" | "pinecone";
export type TemplatePostInstallAction = "none" | "dependencies" | "runApp";
export type TemplateDataSource = {
type: TemplateDataSourceType;
Expand Down
1 change: 1 addition & 0 deletions packages/create-llama/questions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ const getVectorDbChoices = (framework: TemplateFramework) => {
},
{ title: "MongoDB", value: "mongo" },
{ title: "PostgreSQL", value: "pg" },
{ title: "Pinecone", value: "pinecone" },
];

const vectordbLang = framework === "fastapi" ? "python" : "typescript";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
DATA_DIR = "data" # directory containing the documents to index
CHUNK_SIZE = 512
CHUNK_OVERLAP = 20
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from llama_index import ServiceContext

from app.context import create_base_context
from app.engine.constants import CHUNK_SIZE, CHUNK_OVERLAP


def create_service_context():
base = create_base_context()
return ServiceContext.from_defaults(
llm=base.llm,
embed_model=base.embed_model,
chunk_size=CHUNK_SIZE,
chunk_overlap=CHUNK_OVERLAP,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from dotenv import load_dotenv

load_dotenv()
import os
import logging
from llama_index.vector_stores import PineconeVectorStore

from app.engine.constants import DATA_DIR
from app.engine.context import create_service_context
from app.engine.loader import get_documents


from llama_index import (
SimpleDirectoryReader,
VectorStoreIndex,
StorageContext,
)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()


def generate_datasource(service_context):
logger.info("Creating new index")
# load the documents and create the index
documents = get_documents()
store = PineconeVectorStore(
api_key=os.environ["PINECONE_API_KEY"],
index_name=os.environ["PINECONE_INDEX_NAME"],
environment=os.environ["PINECONE_ENVIRONMENT"],
)
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
service_context=service_context,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info(
f"Successfully created embeddings and save to your Pinecone index {os.environ['PINECONE_INDEX_NAME']}"
)


if __name__ == "__main__":
generate_datasource(create_service_context())
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import logging
import os

from llama_index import (
VectorStoreIndex,
)
from llama_index.vector_stores import PineconeVectorStore

from app.engine.context import create_service_context


def get_index():
service_context = create_service_context()
logger = logging.getLogger("uvicorn")
logger.info("Connecting to index from Pinecone...")
store = PineconeVectorStore(
api_key=os.environ["PINECONE_API_KEY"],
index_name=os.environ["PINECONE_INDEX_NAME"],
environment=os.environ["PINECONE_ENVIRONMENT"],
)
index = VectorStoreIndex.from_vector_store(store, service_context)
logger.info("Finished connecting to index from Pinecone.")
return index
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import * as dotenv from "dotenv";
import {
PineconeVectorStore,
SimpleDirectoryReader,
VectorStoreIndex,
storageContextFromDefaults,
} from "llamaindex";
import { STORAGE_DIR, checkRequiredEnvVars } from "./shared.mjs";

dotenv.config();

async function loadAndIndex() {
// load objects from storage and convert them into LlamaIndex Document objects
const documents = await new SimpleDirectoryReader().loadData({
directoryPath: STORAGE_DIR,
});

// create vector store
const vectorStore = new PineconeVectorStore();

// create index from all the Documentss and store them in Pinecone
console.log("Start creating embeddings...");
const storageContext = await storageContextFromDefaults({ vectorStore });
await VectorStoreIndex.fromDocuments(documents, { storageContext });
console.log(
"Successfully created embeddings and save to your Pinecone index.",
);
}

(async () => {
checkRequiredEnvVars();
await loadAndIndex();
console.log("Finished generating storage.");
})();
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import {
ContextChatEngine,
LLM,
PineconeVectorStore,
VectorStoreIndex,
serviceContextFromDefaults,
} from "llamaindex";
import { CHUNK_OVERLAP, CHUNK_SIZE, checkRequiredEnvVars } from "./shared.mjs";

async function getDataSource(llm: LLM) {
checkRequiredEnvVars();
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
const store = new PineconeVectorStore();
return await VectorStoreIndex.fromVectorStore(store, serviceContext);
}

export async function createChatEngine(llm: LLM) {
const index = await getDataSource(llm);
const retriever = index.asRetriever({ similarityTopK: 5 });
return new ContextChatEngine({
chatModel: llm,
retriever,
});
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
export const STORAGE_DIR = "./data";
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;

const REQUIRED_ENV_VARS = ["PINECONE_ENVIRONMENT", "PINECONE_API_KEY"];

export function checkRequiredEnvVars() {
const missingEnvVars = REQUIRED_ENV_VARS.filter((envVar) => {
return !process.env[envVar];
});

if (missingEnvVars.length > 0) {
console.log(
`The following environment variables are required but missing: ${missingEnvVars.join(
", ",
)}`,
);
throw new Error(
`Missing environment variables: ${missingEnvVars.join(", ")}`,
);
}
}
65 changes: 14 additions & 51 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading