<a href="https://colab.research.google.com/github/mahieshwar-budati/RAG-basic/blob/main/RAG_Basic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q \
sentence-transformers==2.7.0 \
langchain \
langchain-community \
langchain-text-splitters \
chromadb \
pypdf
!pip install transformers==4.41.2

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.0/52.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m171.5/171.5 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m66.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.5/21.5 MB[0m [31m46.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m331.0/331.0 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.2/278.2 kB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m54.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m54.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# Load PDFs
import os

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader

folder_path = "/content/be.pdf"
all_text = []

loader = PyPDFLoader(folder_path)
pages = loader.load()


for page in pages:
    all_text.append(page.page_content)
print(all_text)
print("PDFs loaded")

# Chunking
splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=100
)

full_text = "\n".join(all_text)
chunks = splitter.split_text(full_text)
print("Chunks created:", len(chunks))

#  Embedding Model
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)
#  Store in ChromaDB
db = Chroma.from_texts(
    chunks,
    embedding_model,
    persist_directory="/content/chroma_db"
)

print("Vector database ready!")

ValueError: File path /content/be.pdf is not a valid file or url

In [None]:
# ---------- Install if needed ----------
# pip install transformers torch sentencepiece

# ---------- Imports ----------
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

# ---------- Load FLAN-T5 ----------
model_name = "google/flan-t5-base"

tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

device = "cuda" if .is_available() else "cpu"
model = model.to(device)

print("FLAN-T5 loaded on:", device)
# ---------- Query Loop ----------
print("\nRAG Chatbot Ready! Type 'exit' to quit.\n")

while True:
    try:
        query = input("Ask question: ").strip()

        if query.lower() == "exit":
            print("Goodbye!")
            break

        if not query:
            print("Please enter a valid question.")
            continue

        # ---------- Retrieve context ----------
        results = db.similarity_search(query, k=3)

        if not results:
            print("\nNo relevant documents found.\n")
            continue

        # Limit context size for model safety
        context_chunks = [r.page_content[:500] for r in results]
        context = "\n".join(context_chunks)

        # ---------- Prompt ----------
        prompt = f"""
Use the provided context to answer the question clearly.
If the answer is not in the context, say you don't know.

Context:
{context}

Question:
{query}

Answer:
"""

        # ---------- Tokenize ----------
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=1024
        ).to(device)

        # ---------- Generate ----------
        outputs = model.generate(
            **inputs,
            max_new_tokens=300, # max no. of words in ans
            temperature=0.5,
            do_sample=False,
            top_p=0.5-0.9,
            repetition_penalty=1.1
        )

        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

        print("\nAnswer:\n", answer)
        print("-" * 60)

    except KeyboardInterrupt:
        print("\nSession ended.")
        break

    except Exception as e:
        print("\nError:", e)
        continue


Prompt text
      ↓
Tokenizer converts text → tokens
      ↓
Tokens converted to tensors
      ↓
Limit tokens to 1024
      ↓
Move tensors to GPU/CPU
      ↓
Ready for model


Model generates tokens
        ↓
Tokens decoded to text
        ↓
Answer printed
        ↓
Wait for next question
