In [1]:
import os
from dotenv import load_dotenv

# load env vars
# load_dotenv()
# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# load llama libraries 
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
import chromadb
from llama_index.core.node_parser import (SentenceSplitter)

# define embeeding function
embed_model = OpenAIEmbedding(model_name="text-embedding-3-large")
Settings.embed_model = embed_model
Settings.chunk_size = 512
Settings.chunk_overlap = 64
documents = SimpleDirectoryReader("./data").load_data()

splitter = SentenceSplitter()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
db = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db.get_or_create_collection("FAQ_collection")
# db.delete_collection(name="FAQ_collection")

# set up ChromaVectorStore and load in data
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
VectorStoreIndex.from_documents(documents, storage_context=storage_context, embed_model=embed_model, transformations=[splitter])



<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x14e3bc4f0>

In [3]:
# llamindex to read chroma db data from disk
db2 = chromadb.PersistentClient(path="./chroma_db")

chroma_collection = db2.get_or_create_collection("FAQ_collection")

vector_store= ChromaVectorStore(chroma_collection=chroma_collection)

vector_store_llamindex = VectorStoreIndex.from_vector_store(
    vector_store,embed_model=embed_model
)
vector_store_llamindex.as_retriever()


<llama_index.core.indices.vector_store.retrievers.retriever.VectorIndexRetriever at 0x10c950220>

In [4]:
response = vector_store_llamindex.as_query_engine().query("who is sam saltis")
print(response)

Sam Saltis is not a relevant figure in the provided context.
