In [1]:
import chromadb
# from logger import logger
from chromadb.config import Settings

class ChromaDBConnection:
    _instance = None

    def __new__(cls, path):
        if cls._instance is None:
            # logger.info("Creating new ChromaDB connection...")
            cls._instance = super(ChromaDBConnection, cls).__new__(cls)
            cls._instance.client = chromadb.PersistentClient(path=path, settings=Settings(anonymized_telemetry=False))
        return cls._instance

    def get_collection(self, name):
        # logger.info(f"Accessing collection: {name}")
        collection = self.client.get_or_create_collection(name=name, metadata={"hnsw:space": "cosine"})
        # logger.info(f"Collection size: {collection.count()}")
        return collection

In [2]:
connection = ChromaDBConnection('TestDB')
collection = connection.get_collection('collection_test')

In [3]:
collection.count()

0

In [4]:
# from ChromaDBConnection import ChromaDBConnection
# from logger import logger
# from config import CONNECTION_NAME

CONNECTION_NAME = "TestDB"

class Vectorstore:
    def __init__(self, path):
        self.client = ChromaDBConnection(path)
        self.collection = self.client.get_collection(CONNECTION_NAME)

    def add_to_vectorstore(self, ids, vector, metadata, documents):
        self.collection.upsert(ids = ids , embeddings = vector, metadatas=metadata, documents = documents)
        # logger.info(f"Added vector to collection: {metadata}")

    def search_vectorstore(self, vector, k):
        results = self.collection.query(vector, n_results=k)
        # logger.info(f"Found {len(results)} results")
        return results

    def delete_from_vectorstore(self, metadata):
        self.collection.delete(metadata)
        # logger.info(f"Deleted vector from collection: {metadata}")

In [5]:
store = Vectorstore(CONNECTION_NAME)

In [6]:
from langchain_ollama import ChatOllama, OllamaEmbeddings
LLM_MODEL = 'llama3.2'
EMBEDDING_MODEL = 'nomic-embed-text'
def get_llm():
    # logger.info("Initializing LLM...")
    llm = ChatOllama(model=LLM_MODEL)
    return llm

def query_llama(prompt):
    # logger.info("Querying LLM...")
    llm = get_llm()
    res = llm.invoke(prompt)
    return res.content

def get_embeddings():
    # logger.info("Loading embedding model...")
    embeddings = OllamaEmbeddings(model=EMBEDDING_MODEL)
    return embeddings

In [7]:
from langchain_ollama import OllamaEmbeddings
text = ['Hi I am Praneet', 'I am a student', 'I am a developer']
vector_embeddings = []

embedding_model = get_embeddings()
# for t in text:
metadata=[]
for i,t in enumerate(text) :
    metadata.append({t: t})
    vector_embeddings.append(embedding_model.embed_query(t))
print(len(metadata))
store.add_to_vectorstore(ids = ['1','2','3'],vector=vector_embeddings, metadata=metadata, documents=text)

3


In [63]:
vector = embedding_model.embed_query('Praneet developer')
store.search_vectorstore(vector, 2)

{'ids': [['1', '3']],
 'embeddings': None,
 'documents': [['Hi I am Praneet', 'I am a developer']],
 'uris': None,
 'data': None,
 'metadatas': [[{'Hi I am Praneet': 'Hi I am Praneet'},
   {'I am a developer': 'I am a developer'}]],
 'distances': [[0.21941244946106198, 0.35755051853415787]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}