# Setup & Load Resources

In [1]:
import os
import chromadb
from chromadb.utils import embedding_functions
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv

# 1. Load Environment Variables
load_dotenv()

# 2. Setup Gemini (The Brain)
llm = ChatGoogleGenerativeAI(model="gemini-flash-latest", temperature=0)

# 3. Setup Vector DB (The Memory)
# Must match the path and model used in Notebook 02
PERSIST_DIRECTORY = "../data/processed/chroma_db"
embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="all-MiniLM-L6-v2"
)

client = chromadb.PersistentClient(path=PERSIST_DIRECTORY)
collection = client.get_collection(
    name="customer_feedback", embedding_function=embedding_func
)

print("RAG Pipeline loaded successfully!")

  from .autonotebook import tqdm as notebook_tqdm
Loading weights: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 103/103 [00:00<00:00, 417.91it/s, Materializing param=pooler.dense.weight]
BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


RAG Pipeline loaded successfully!


# The Retrieval Function

In [2]:
def retrieve_context(query_text, n_results=3):
    """
    Searches the Vector DB for the most relevant documents.
    Returns a single string containing the joined context.
    """
    results = collection.query(query_texts=[query_text], n_results=n_results)

    # Flatten the list of documents (Chroma returns a list of lists)
    docs = results["documents"][0]

    # Join them into a single string with separators
    context_str = "\n\n---\n\n".join(docs)
    return context_str


# Test the retrieval function alone
test_context = retrieve_context("internet issues")
print(f"Retrieved Context:\n{test_context}")

Retrieved Context:
The fiber internet cuts out every time it rains.

---

My 5G signal is very weak in the city center.

---

The technician was rude and didn't fix the router.


# The Generation Function (The RAG)

In [3]:
def query_rag(question):
    """
    Full RAG Pipeline: Retrieve -> Augment -> Generate
    """
    # 1. Retrieve relevant context from DB
    context = retrieve_context(question)

    # 2. Construct the Prompt
    # We instruct the model to ONLY use the provided context
    prompt = f"""
    You are a Customer Experience Analyst. 
    Analyze the customer feedback provided in the 'Context' section below to answer the user's question.
    
    If the answer is not in the context, strictly state: "I don't have enough information to answer that."
    
    Context:
    {context}
    
    Question: 
    {question}
    
    Answer:
    """

    # 3. Generate response with Gemini
    response = llm.invoke(prompt)
    return response.content


print("RAG Function defined.")

RAG Function defined.


# Final Testing

In [4]:
# Test Case 1: Something that IS in the database
q1 = "What are the main complaints regarding internet connectivity?"
print(f"User: {q1}")
print(f"AI: {query_rag(q1)}")

print("\n" + "=" * 50 + "\n")

# Test Case 2: Something that is NOT in the database (Hallucination check)
q2 = "What do customers think about the price of the sports channel package?"
print(f"User: {q2}")
print(f"AI: {query_rag(q2)}")

User: What are the main complaints regarding internet connectivity?
AI: The main complaints regarding internet connectivity are:

1.  Fiber internet experiences outages (cuts out) whenever it rains.
2.  The 5G signal is very weak in the city center.


User: What do customers think about the price of the sports channel package?
AI: I don't have enough information to answer that.
