# ChromaDB


### Save embedding to chromadb (local)

In [None]:
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
import sys

def embd_vectordb(filepath):
    # Initialize the embedding model
    embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

    try:
        # Load and split the PDF document
        docs = PyPDFLoader(filepath).load_and_split()

        # Create a Chroma vector store with a specified directory for persistence
        db = Chroma.from_documents(docs, embedding, persist_directory="./chroma_db")
        print("Vector database created and persisted.")
        return db
    except Exception as e:
        print(f"Error creating vector database: {e}")
        return None

def load_vectordb():
    # Initialize the embedding model
    embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    
    try:
        # Load the previously saved Chroma vector store
        loaded_db = Chroma(persist_directory="./chroma_db", embedding_function=embedding)
        return loaded_db
    except Exception as e:
        print(f"Error loading vector database: {e}")
        return None

if __name__ == "__main__":
    embd_vectordb("knowledge/health.pdf")


### Get responce using chromadb

In [33]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_chroma import Chroma
from functools import lru_cache

# Cached LLM initialization
@lru_cache()
def get_llm():
    return ChatGroq(model_name="Llama3-8b-8192")

# Cached vector database initialization
@lru_cache()
def get_vectordb():
    embedding = SentenceTransformerEmbeddings(
        model_name="all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'}  # Explicitly set to CPU if you don't need GPU
    )
    return Chroma(persist_directory="./test_db", embedding_function=embedding)

def generate_response(query: str):
    """
    Generates a response to the user's query using the info agent.
    """
    template ="""You are a healthcare e-commerce assistant that provides factual, direct answers based solely on the provided context. 

    IMPORTANT: Do not add greetings, introductions, or closing questions when responding to direct queries. Only respond with relevant information from the context.

    RULES:
    - If the user's message is a greeting (like "hi", "hello", "hey","how are u" etc.) or contains only small talk, respond with a friendly greeting
    - Answer directly without adding "Hi there" or "I'm happy to help" introductions
    - Do not ask follow-up questions like "Do you have any other questions?"
    - Only acknowledge greetings if the user's message is purely a greeting with no question
    - Use simple, patient-friendly language while being factual
    - Only use information found in the context
    - Say "I don't have enough information to answer that" if the context doesn't contain relevant information

    Context:
    {context}
    
    Patient's Question:
    {question}
    """

    QA_CHAIN_PROMPT = PromptTemplate(
        input_variables=["context", "question"],
        template=template
    )

    try:
        llm = get_llm()
        vectordb = get_vectordb()
        
        qa_chain = RetrievalQA.from_chain_type(
            llm,
            retriever=vectordb.as_retriever(search_kwargs={"k": 3}),  # Limit to top 3 results
            return_source_documents=True,
            chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
        )

        result = qa_chain.invoke(query)
        return result["result"]
    except Exception as e:
        raise Exception(f"Error generating response: {str(e)}")



In [34]:
generate_response("What is the most common cause of a sore throat?")

'The most common cause of a sore throat is viral pharyngitis, which is caused by a viral infection such as the common cold or the flu.'

In [4]:
generate_response("What is the most common cause of a sore throat?")

'The most common cause of a sore throat is usually a viral infection, such as a cold or flu.'

## Qdrant 

### Save embedding to Qdrant (qdrant cloud)

In [None]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from langchain_community.embeddings import SentenceTransformerEmbeddings

import os
import qdrant_client
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_groq import ChatGroq
from langchain_core.documents import Document
from uuid import uuid4

from dotenv import load_dotenv
load_dotenv()

qdrant_api_key = os.getenv("QDRANT_API_KEY")

client = QdrantClient(
    url="https://b5e3e25c-0644-477b-8c25-76b8e3c4fb7a.us-east-1-0.aws.cloud.qdrant.io:6333", 
    api_key=qdrant_api_key,
)

embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# client = QdrantClient(":memory:")

client.create_collection(
    collection_name="rag",
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="rag",
    embedding=embeddings,
)

chunks = PyPDFLoader("C:/Users/mayur/Desktop/FRACSNET/knowledge/health.pdf").load_and_split()
uuids = [str(uuid4()) for _ in range(len(chunks))]
vector_store.add_documents(documents=chunks, ids=uuids)

query = "What is the most common cause of a sore throat?"
results = vector_store.similarity_search(query, k=2)

print("\n🔍 Search Results:")
for res in results:
    print(f"📄 {res.page_content} [{res.metadata}]")



### Get responce using qdrant

In [3]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from langchain_community.embeddings import SentenceTransformerEmbeddings

import os
import qdrant_client
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_groq import ChatGroq
from langchain_core.documents import Document
from uuid import uuid4

from dotenv import load_dotenv
load_dotenv()

from functools import lru_cache

# Cached LLM initialization
@lru_cache()
def get_llm():
    return ChatGroq(model_name="Llama3-8b-8192")

embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

qdrant_api_key = os.getenv("QDRANT_API_KEY")

client = QdrantClient(
    url="https://b5e3e25c-0644-477b-8c25-76b8e3c4fb7a.us-east-1-0.aws.cloud.qdrant.io:6333", 
    api_key=qdrant_api_key,
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="rag",
    embedding=embeddings,
)

def response_generator(vectordb, query, llm):
    context = vectordb.similarity_search(query, k=2)

    # for res in results:
    #     print(f"📄 {res.page_content} [{res.metadata}]")
    template = f"""
    You are an intelligent assistant designed to provide accurate and concise answers based on the context provided. 
    Follow these rules strictly:
    1. Use ONLY the information provided in the context to answer the question.
    2. If the context does not contain enough information to answer the question, say "I don't know."
    3. Do not make up or assume any information outside of the context.
    4. Keep your answer concise and to the point (maximum 3 sentences).

    Context:
    {context}

    Question:
    {query}

    Helpful Answer:
    """
    ans = llm.invoke(template)
    return ans.content

  embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")





In [5]:
llm = get_llm()
response_generator(vector_store, "What is the most common cause of a sore throat?", llm)

"I don't know. The provided context does not contain information about the most common cause of a sore throat."

In [25]:
query = "What is the most common cause of a sore throat?"
results = vector_store.similarity_search(query, k=2)

print("\n🔍 Search Results:")
for res in results:
    print(f"📄 {res.page_content} [{res.metadata}]")



🔍 Search Results:
📄 Natural D-Hist® includes quercetin , known for its ability to stabilize mast cells, thereby diminishing the release of histamine, the compound known to cause hypersensitivity reactions. Stinging 
nettle promotes immune system balance, specifically in the mucosal passages. Bromelain  is a proteolytic enzyme that aids in the breakdown of large protein complexes, including antigenic 
compounds, and has been shown to enhance the absorption of quercetin. Bromelain has also been shown to reduce circulating allergenic protein complexes associated with hyperimmune 
sensitivity and seasonal discomfort. N-acetyl cysteine (NAC)  is an amino acid precursor to one of the most important antioxidants in the body, glutathione. NAC helps reduce the viscosity 
of mucus, promoting optimal mucus clearance and healthy mucosal passages. Vitamin C has many immune-boosting properties, but is distinctively beneficial for individuals with seasonal 
discomfort because of its ability to deact

In [26]:
def response_generator(vectordb, query, llm):
    context = vectordb.similarity_search(query, k=2)

    # for res in results:
    #     print(f"📄 {res.page_content} [{res.metadata}]")
    template = f"""
    You are an intelligent assistant designed to provide accurate and concise answers based on the context provided. 
    Follow these rules strictly:
    1. Use ONLY the information provided in the context to answer the question.
    2. If the context does not contain enough information to answer the question, say "I don't know."
    3. Do not make up or assume any information outside of the context.
    4. Keep your answer concise and to the point (maximum 3 sentences).

    Context:
    {context}

    Question:
    {query}

    Helpful Answer:
    """
    ans = llm.invoke(template)
    return ans.content

In [27]:
llm = get_llm()

In [None]:
llm = get_llm()
response_generator(vector_store, "What is the most common cause of a sore throat?", llm)

"I don't know. The provided context does not contain information about the most common cause of a sore throat."