In [20]:
!pip install chromadb sentence-transformers transformers accelerate




In [21]:
!mkdir -p data

with open("data/company_policy.txt", "w") as f:
    f.write("""
Our refund policy allows customers to request a refund within 30 days of purchase.
Refunds are processed within 5–7 business days.
Subscriptions can be canceled anytime, but refunds are only available within 7 days.
""")


In [22]:
from pathlib import Path

def load_documents(folder="data"):
    texts = []
    for file in Path(folder).glob("*.txt"):
        texts.append(file.read_text())
    return texts

def chunk_text(text, chunk_size=300, overlap=50):
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start = end - overlap
    return chunks


In [26]:
from sentence_transformers import SentenceTransformer

embedder = SentenceTransformer("all-MiniLM-L6-v2")

def embed_texts(texts):
    return embedder.encode(texts).tolist()


In [27]:
import chromadb

chroma_client = chromadb.Client()
collection = chroma_client.get_or_create_collection(name="knowledge_base")

def index_documents():
    docs = load_documents()
    chunks = []

    for doc in docs:
        chunks.extend(chunk_text(doc))

    embeddings = embed_texts(chunks)

    collection.add(
        documents=chunks,
        embeddings=embeddings,
        ids=[f"chunk_{i}" for i in range(len(chunks))]
    )

    print(f"Indexed {len(chunks)} chunks")

index_documents()


Indexed 1 chunks


In [28]:
def retrieve_chunks(query, k=3):
    query_embedding = embed_texts([query])[0]

    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=k
    )

    return results["documents"][0]


In [29]:
from transformers import pipeline

generator = pipeline(
    "text2text-generation",
    model="google/flan-t5-small",
    max_length=256
)


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cpu


In [30]:
def build_prompt(context_chunks, question):
    context = "\n\n".join(context_chunks)

    return f"""
Answer the question using ONLY the context below.
If the answer is not present, say "I don't know".

Context:
{context}

Question:
{question}
"""


In [31]:
def generate_answer(prompt):
    output = generator(prompt)
    return output[0]["generated_text"]


In [32]:
def rag_query(question):
    chunks = retrieve_chunks(question)
    prompt = build_prompt(chunks, question)
    return generate_answer(prompt)


In [33]:
print(rag_query("What is the refund policy?"))


We allow customers to request a refund within 30 days of purchase. Refunds are processed within 5–7 business days. Subscriptions can be canceled anytime, but refunds are only available within 7 days.


In [40]:
print(rag_query("how long are refunds available?"))


within 7 days
