In [1]:
import chromadb

In [4]:
chroma_client =chromadb.Client()

In [5]:
collection = chroma_client.create_collection(name = "poem")


In [6]:
collection.add(
    ids=["id1", "id2"],
    documents=[
        "This is a document about pineapple",
        "This is a document about oranges"
    ]
)

In [7]:
results = collection.query(
    query_texts=["This is a query document about hawaii"], # Chroma will embed this for you
    n_results=2 # how many results to return
)
print(results)

{'ids': [['id1', 'id2']], 'embeddings': None, 'documents': [['This is a document about pineapple', 'This is a document about oranges']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[None, None]], 'distances': [[1.0404009819030762, 1.2430799007415771]]}


In [9]:

from sentence_transformers import SentenceTransformer
import ollama

# Step 1: Define 10 sample documents
documents = [
    "The capital of France is Paris.",
    "Python is a popular programming language for AI research.",
    "The Great Wall of China is visible from space.",
    "Water boils at 100 degrees Celsius.",
    "The Pacific Ocean is the largest ocean on Earth.",
    "Mount Everest is the tallest mountain in the world.",
    "Shakespeare wrote Romeo and Juliet.",
    "The human heart has four chambers.",
    "Photosynthesis is how plants make food using sunlight.",
    "Albert Einstein developed the theory of relativity."
]

doc_ids = [f"doc_{i}" for i in range(len(documents))]

# Step 2: Embedding model
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Step 3: Insert docs into Chroma
embeddings = embedder.encode(documents).tolist()
collection.add(
    documents=documents,
    embeddings=embeddings,
    ids=doc_ids
)

print("Inserted documents into Chroma ✅")

# ----------- RAG Pipeline -----------

def rag_query(query, top_k=3):
    # Step 4: Embed query
    query_emb = embedder.encode([query]).tolist()
    
    # Step 5: Retrieve from Chroma
    results = collection.query(
        query_embeddings=query_emb,
        n_results=top_k
    )
    retrieved_docs = results["documents"][0]
    
    # Step 6: Construct prompt for LLM
    context = "\n".join(retrieved_docs)
    prompt = f"""You are a helpful assistant. 
Use the following context to answer the question.

Context:
{context}

Question: {query}
Answer:"""

    # Step 7: Call Gemma via Ollama
    response = ollama.chat(
        model="gemma3:4b",
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response["message"]["content"]

# Example
print(rag_query("Who wrote Romeo and Juliet?"))
print(rag_query("What is the tallest mountain?"))



Inserted documents into Chroma ✅
Shakespeare.
Mount Everest is the tallest mountain in the world.
