In [1]:
import os
import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer

In [2]:
# Re-open the existing Chroma index
INDEX_DIR = "../data/index/meditations"

client = chromadb.PersistentClient(path=INDEX_DIR)
collection = client.get_collection(name="meditations")

# Same embedding model you used when building the index
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")



In [3]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

model_name = "google/flan-t5-base"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

llm = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer
)

In [4]:
# --- RAG Helper Functions ---

def retrieve_context(query: str, collection, embedder, k: int = 4):
    """Retrieve top-k most relevant chunks using Chroma."""
    query_emb = embedder.encode([query]).tolist()
    results = collection.query(
        query_embeddings=query_emb,
        n_results=k
    )
    docs = results["documents"][0]
    metadatas = results["metadatas"][0]
    return docs, metadatas


def build_prompt(query: str, retrieved_docs: list[str]) -> str:
    """Build a clean, meditation-friendly prompt using retrieved context."""
    context = "\n\n---\n\n".join(retrieved_docs)
    prompt = f"""
Using the reference texts below, write a calm, secular guided meditation that answers the userâ€™s request.

Reference texts:
{context}

User request:
{query}

Now write a clear, compassionate response. 
Speak directly to the listener in second person ("you").
Do NOT mention the reference texts or describe your process.
"""
    return prompt.strip()


def generate_with_rag(
    query: str,
    collection,
    embedder,
    llm,
    k: int = 4,
    max_new_tokens: int = 300
):
    """Retrieve relevant text, build a prompt, and generate an LLM response."""
    
    # 1. Retrieve
    docs, metas = retrieve_context(query, collection, embedder, k=k)

    # 2. Build prompt
    prompt = build_prompt(query, docs)

    # 3. Generate
    output = llm(
        prompt,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )[0]["generated_text"]

    return output, docs, metas


In [5]:
query = "Create a short grounding meditation for anxiety before bed."
response, docs_used, metas_used = generate_with_rag(query, collection, embedder, llm)

print(response)

Token indices sequence length is longer than the specified maximum sequence length for this model (722 > 512). Running this sequence through the model will result in indexing errors


A quick, quiet meditation. :]
