In [10]:
import chromadb
from chromadb.utils import embedding_functions

# Initialize the same embedding function used during storage
embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="dunzhang/stella_en_1.5B_v5"
)

# Connect to your existing Chroma database
chroma_client = chromadb.PersistentClient(path="./chromadb")

# Get your collection
collection = chroma_client.get_collection(
    name="guidlines_embeddings",
    embedding_function=embedding_function
)

# Query the collection (you can adjust n_results as needed)
query = "What is the importance of protein?"  # Replace with your actual query
results = collection.query(
    query_texts=[query],
    n_results=3  # Return top 3 most relevant chunks
)

# Print results
for i, (document, metadata, distance) in enumerate(zip(
    results['documents'][0],
    results['metadatas'][0],
    results['distances'][0]
)):
    print(f"\nResult {i+1}")
    print(f"Page: {metadata['page_number']}")
    print(f"Similarity Score: {1 - distance:.4f}")  # Converting distance to similarity
    print(f"Text: {document[:200]}...")  # Show first 200 characters


Result 1
Page: 45
Similarity Score: 0.4953
Text: Meats, poultry, and seafood are not included. Seafood, which includes fish and shellfish, is a protein foods subgroup that provides beneficial fatty acids (e.g., eicosapentaenoic acid [EPA] and docosa...

Result 2
Page: 157
Similarity Score: 0.4935
Text: Nuts should be unsalted. Beans, Peas, Lentils: Can be considered part of the protein foods group as well as the vegetable group, but should be counted in one group only....

Result 3
Page: 140
Similarity Score: 0.4917
Text: About 50 percent of women and 30 percent of men 71 and older fall short of protein foods recommendations. The majority of protein in the Healthy U.S.-Style Dietary Pattern is accounted for in the prot...


In [4]:
import chromadb
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

def query_documents(query: str, collection_name: str = "guidlines_embeddings", n_results: int = 3):
    # Initialize the same embedding model used during storage
    embedding_model = SentenceTransformer("dunzhang/stella_en_1.5B_v5", device="cpu")
    query_embedding = embedding_model.encode(query).tolist()
    
    # Connect to Chroma
    client = chromadb.PersistentClient(path="./chromadb")
    collection = client.get_collection(name=collection_name)
    
    # Query the collection
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=n_results
    )
    
    return results

def generate_response(
    query: str,
    context: list[str],
    model_name: str = "meta-llama/Llama-3.2-1B-Instruct",
    max_length: int = 512
) -> str:
    # Construct a friendly, professional prompt
    system_prompt = """You are a helpful and professional assistant. Always:
    - Provide accurate information based solely on the given context
    - Use professional, respectful, and inclusive language
    - If the context doesn't contain enough information, honestly say "I don't have enough information to answer that question"
    - Keep responses clear and concise
    """
    
    formatted_context = "\n\n".join(context)
    
    prompt = f"""{system_prompt}

Context:
{formatted_context}

Question: {query}
Answer: """

    # Initialize tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
    model.eval()

    # Move to GPU if available
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)

    # Generate response
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_length,
        do_sample=True,
        temperature=0.1,  # Lower temperature for more focused responses
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode and clean up response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    if "Answer:" in response:
        response = response.split("Answer:", 1)[-1].strip()
    
    return response.strip()

In [8]:
results = query_documents("What is a healthy breakfast for a 30 year old woman?")

In [10]:
context = results['documents'][0]  # Get the retrieved documents
response = generate_response("What is a healthy breakfast for a 30 year old woman?", context)
print(response)

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

300-400 calories, with a balance of protein, healthy fats, and complex carbohydrates.
