<a href="https://colab.research.google.com/github/rajkiran15/colab-hands-on/blob/main/rag_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from groq import Groq

# ---- 1. init embedding (local, free) ----
embedder = SentenceTransformer("all-MiniLM-L6-v2")

def embed(text):
    return embedder.encode(text)

# ---- 2. sample documents (pretend chunks) ----
documents = [
    "LLM stands for Large Language Model.",
    "LLMs are neural networks trained on text to generate responses.",
    "RAG retrieves relevant text chunks, then LLM generates answers using that context.",
    "Cosine similarity finds the most relevant document chunks for a query.",
    "Prime Minister of India is Raj Kiran."
]

doc_embeddings = np.vstack([embed(doc) for doc in documents])
print(f"doc_embeddings: {doc_embeddings}")

# ---- 3. retriever top-k ----
def retrieve(query, k=2):
    q_vec = embed(query).reshape(1, -1)
    print(f"q_vec: {q_vec}")
    sims = cosine_similarity(q_vec, doc_embeddings)[0]
    print(f"sims: {sims}")
    top_idx = sims.argsort()[::-1][:k]
    print(f"top_idx: {top_idx}")
    return [documents[i] for i in top_idx]

# ---- 4. generator with real LLM (Groq llama 70B) ----
groq_client = Groq(api_key="")

def rag_answer(question, k=2):
    context = "\n".join(retrieve(question, k))
    print(f"context: {context}")

    prompt = f"""Answer using only context below.
                 If not present, say 'I don't know'.

                Context:
                {context}

                Question: {question}
                Answer in one line:
              """

    chat = groq_client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3,
        max_tokens=100
    )
    return chat.choices[0].message.content

# ---- Demo ----
q = "Who is the prime minister of India?"
print("Used context:", retrieve(q))
print("Answer:", rag_answer(q))


doc_embeddings: [[ 0.04154063 -0.10107709  0.00187925 ... -0.01314523 -0.03162565
   0.00113997]
 [-0.00629217 -0.08012642  0.02022319 ...  0.06822985 -0.02015463
  -0.0443715 ]
 [-0.05207714  0.04141601 -0.00823354 ...  0.0232152   0.02978858
   0.02350594]
 [-0.02408327  0.01232633 -0.04885484 ...  0.02700846  0.08153643
   0.06153347]
 [-0.0278332   0.01828905 -0.00164464 ... -0.02353774  0.0041954
  -0.00866061]]
q_vec: [[ 1.89726837e-02  2.93299407e-02 -1.78657416e-02  1.80592518e-02
  -7.82864355e-03 -1.06616337e-02  1.11110426e-01  4.75727282e-02
  -1.33271739e-02  2.11975407e-02 -3.93488146e-02 -3.01005598e-02
   1.04149070e-03  8.76416173e-03  3.16209160e-02  4.46070880e-02
   2.32314877e-02  2.64463779e-02  2.09059566e-02 -8.59333128e-02
   1.40612097e-02  4.82150055e-02  3.12742754e-03 -5.48108183e-02
   1.65378843e-02 -1.44661148e-03  1.79683752e-02 -4.99177948e-02
  -3.15187015e-02 -4.07110006e-02  4.70174663e-02 -3.33348289e-02
  -1.69962682e-02  2.31388807e-02 -1.3497842