In [None]:
!pip install chromadb

Collecting chromadb
  Downloading chromadb-1.3.5-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.3.0-py3-none-any.whl.metadata (5.6 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.38.0-py3-none-any.whl.metadata (2.4 kB)
Collecting pypika>=0.48.9 (from chromadb)
  Downloading PyPika-0.48.9.tar.gz (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m

In [None]:
import os
import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer
import pandas as pd

In [None]:
from google.colab import files
uploaded = files.upload()

chunks_df = pd.read_csv("/content/meditation_chunks.csv")
chunks_df.head()

Saving meditation_chunks.csv to meditation_chunks.csv


Unnamed: 0,chunk_id,source_file,chunk_index,text,word_count
0,cureus_cleaned.txt_chunk_0,cureus_cleaned.txt,0,This article discusses the power of meditation...,150
1,cureus_cleaned.txt_chunk_1,cureus_cleaned.txt,1,yield more beneficial clinical outcomes. Every...,150
2,cureus_cleaned.txt_chunk_2,cureus_cleaned.txt,2,that constant reinforcement of happy thoughts ...,150
3,cureus_cleaned.txt_chunk_3,cureus_cleaned.txt,3,energy and possibility is there remains a myst...,150
4,cureus_cleaned.txt_chunk_4,cureus_cleaned.txt,4,benefits at the genetic or immunological level...,150


In [None]:
# location of ChromaDB
INDEX_DIR = "../data/index/meditations"
os.makedirs(INDEX_DIR, exist_ok=True)

# Create a persistent Chroma client
client = chromadb.PersistentClient(path=INDEX_DIR)

# Create (or get) a collection
collection = client.get_or_create_collection(name="meditations")

# Load embedding model
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
BATCH_SIZE = 64

ids = chunks_df["chunk_id"].tolist()
documents = chunks_df["text"].tolist()
metadatas = chunks_df[["source_file", "chunk_index", "word_count"]].to_dict(orient="records")

for start in range(0, len(documents), BATCH_SIZE):
    end = start + BATCH_SIZE
    batch_docs = documents[start:end]
    batch_ids = ids[start:end]
    batch_metadatas = metadatas[start:end]

    # Embeddings for this batch
    batch_embeddings = embedder.encode(batch_docs).tolist()

    collection.add(
        ids=batch_ids,
        documents=batch_docs,
        metadatas=batch_metadatas,
        embeddings=batch_embeddings
    )

    print(f"Added chunks {start}–{end}")


Added chunks 0–64
Added chunks 64–128
Added chunks 128–192


In [None]:
# Retrieve top-k chunks
def retrieve_context(query: str, collection, embedder, k: int = 4):
    query_emb = embedder.encode([query]).tolist()
    results = collection.query(
        query_embeddings=query_emb,
        n_results=k
    )
    docs = results["documents"][0]
    metadatas = results["metadatas"][0]
    return docs, metadatas


# Prompt builder tuned for big chat-style models
def build_prompt(query: str, retrieved_docs: list[str]) -> str:
    context = "\n\n---\n\n".join(retrieved_docs)
    prompt = f"""You are a calm, secular meditation teacher.
You write detailed, compassionate guided meditations grounded in the reference texts.

Reference texts:
{context}

User request:
{query}

Write a clear, step-by-step guided meditation in the second person ("you"),
about 400–600 words, with:
1) Settling the body
2) Anchoring attention (e.g., breath or body)
3) Working with thoughts/emotions
4) A gentle closing.

Do not mention the reference texts or describe your process.
Speak as if you are guiding the listener right now.
"""
    return prompt.strip()


# Main RAG generation function
def generate_with_rag(
    query: str,
    collection,
    embedder,
    llm,
    k: int = 4,
    max_new_tokens: int = 600
):
    docs, metas = retrieve_context(query, collection, embedder, k=k)
    prompt = build_prompt(query, docs)

    output = llm(
        prompt,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )[0]["generated_text"]

    return output, docs, metas


In [None]:
!pip install -q transformers accelerate bitsandbytes

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25h

Model 1: Minstral 7B Instruct

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

model_name = "mistralai/Mistral-7B-Instruct-v0.2"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",          # use GPU
    torch_dtype=torch.float16   # saves VRAM
)

llm = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer
)


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
query = "Create a grounding meditation for the night before an exam that gently acknowledges the specific worries that tend to arise (fear of forgetting information, worry about performing poorly, racing thoughts about the future) and helps me settle my mind and body so I can rest."
response, docs_used, metas_used = generate_with_rag(
    query, collection, embedder, llm, k=4, max_new_tokens=600
)

print(response)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


You are a calm, secular meditation teacher.
You write detailed, compassionate guided meditations grounded in the reference texts.

Reference texts:
Week Two: STOP Meditation So you can decide for yourself if you would like your eyes open or your eyes closed. We'll begin with a few moments of coming into the present moment. Just notice your body, the fact that you're breathing, or hearing, anything that brings you into this present moment. And I'll have you take a slightly deeper inhale and a slightly deeper exhale. And as you take that exhale, see if you can soften through your body, relaxing a bit more into the chair if that's possible. So what I'd like you to do now in your awareness is recall a stressful situation, something that's recent in the last 48 to 72 hours. Please don't pick the most stressful situation. So pick something that has a little bit of charge. It's a little stressful. So take a moment. Now, in your awareness, I want you to recall

---

Week Two: Breath as Anchor 