## cell 1 (install):


In [None]:
!pip install -r ../requirements.txt

print("[LOG] Requirements installed.")




[notice] A new release of pip is available: 24.2 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


## Cell 2 (load env safely):

In [None]:
from dotenv import load_dotenv
import os, getpass

load_dotenv()

print("[LOG] .env loaded.")

# prefer .env; if not present, prompt
if not os.getenv("QDRANT_URL"):
    print("QDRANT_URL not found in .env. You can paste it now (input won't be saved):")
    os.environ["QDRANT_URL"] = getpass.getpass("QDRANT_URL: ")

if not os.getenv("QDRANT_API_KEY"):
    os.environ["QDRANT_API_KEY"] = getpass.getpass("QDRANT_API_KEY: ")

if not os.getenv("GROQ_API_KEY"):
    print("If you want to use Groq API (recommended for better generation) paste key now; or leave blank to use local fallback.")
    os.environ["GROQ_API_KEY"] = getpass.getpass("GROQ_API_KEY (optional): ")

print("[LOG] QDRANT_URL present:", bool(os.getenv("QDRANT_URL")))
print("[LOG] QDRANT_API_KEY present:", bool(os.getenv("QDRANT_API_KEY")))
print("[LOG] GROQ_API_KEY present:", bool(os.getenv("GROQ_API_KEY")))

## Cell 3 (index data):


In [None]:
# run index (this will use Qdrant Cloud by default)

import pandas as pd

df = pd.read_csv("../data/rag_optimized_5000.csv")

print(f"[LOG] Loaded {len(df)} rows from CSV.")

!python ../src/load_data.py --csv ../data/rag_optimized_5000.csv --max 1000

# remove --max to index all

print("[LOG] Indexing script executed.")

##  Cell 4 (inference example):

In [None]:
from src.rag import RAG

rag = RAG(use_qdrant=True)  # set False for local DB

query = "How do I reset my password?"

print("[LOG] Query:", query)

retrieved = rag.retrieve(query, top_k=5)

print("[LOG] Retrieved context:")

for i, doc in enumerate(retrieved):

    print(f"Doc {i+1}:", doc[:120], "...")

print("Answer:\n")

answer = rag.answer(query, top_k=5)

print(answer)

## Cell 5 (local fallback generation demo):

In [None]:
rag_local = RAG(use_qdrant=False)  # local in-memory DB

# index a few docs quickly

from src.embeddings import Embedder

embedder = Embedder()

sample_docs = [
    "How to reset password: go to portal -> forgot password -> follow instructions.",
    "Library hours are 9am-9pm Monday-Friday, 10am-6pm on weekends."

]

print(f"[LOG] Sample docs: {len(sample_docs)}")

embs = embedder.embed(sample_docs)

from src.db_local import InMemoryDB

db = InMemoryDB()

db.upsert(sample_docs, embs)

rag_local.db = db

print("[LOG] Local DB indexed.")

retrieved_local = rag_local.db.search(embedder.embed(["When is the library open?"])[0], top_k=2)

print("[LOG] Retrieved local context:")

for i, doc in enumerate(retrieved_local):

    print(f"Doc {i+1}:", doc)

print(rag_local.answer("When is the library open?", top_k=2, use_groq_if_available=False))