<a href="https://colab.research.google.com/github/khajum/ai-playground/blob/main/rag/rag-application-101/ChromaVectorStore.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install chromadb

In [None]:
from typing import Self
import os
import chromadb

class ChromaVectorStore:
  """ Manages document embeddings into a ChromaDB vector store"""
  def __init__(self, collection_name="pdf_documents", persist_directory="./data/vector_store"):
    """
    Initialize the vector store

    Arg:
      collection_name: Name of ChromaDB collection
      persist_directory: Path to store ChromaDB data
    """
    self.collection_name = collection_name
    self.persist_directory = persist_directory
    self._initialize_store()

  def _initialize_store(self):
    """ Initialize the ChromaDB client and collection"""
    try:
      # Create persistent ChromaDB client
      os.makedirs(self.persist_directory, exist_ok=True)
      self.client = chromadb.PersistentClient(path=self.persist_directory)

      # Get or create collection
      self.collection = self.client.get_or_create_collection(
          name=self.collection_name,
          metadata={"description":"PDF document embeddings for RAG"})

      print(f"ChromaDB Vector Store initialized with collection: '{self.collection_name}' at path: '{self.persist_directory}'")
      print(f"Existing documents in the collection: {self.collection.count()}")

    except Exception as e:
      print(f"Error initializing ChromaDB vector store db: {e}")
      raise