In [None]:
# Setup Environment and Install Packages
import os
os.makedirs('rag_chatbot', exist_ok=True)
os.makedirs('rag_chatbot/docs', exist_ok=True)
os.chdir('/content/rag_chatbot')

!pip install -q google-generativeai langchain langchain-google-genai chromadb PyMuPDF langchain-community sentence-transformers
print("✅ Setup complete!")

✅ Setup complete!


In [None]:
# Configure API and Imports
from google.colab import userdata
import google.generativeai as genai
from langchain.vectorstores import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA

# Get API key and configure
API_KEY = userdata.get('MY_GEMINI_KEY')
os.environ["GOOGLE_API_KEY"] = API_KEY
genai.configure(api_key=API_KEY)
print("✅ API configured!")

✅ API configured!


In [None]:
# Upload Documents
from google.colab import files
import shutil

uploaded = files.upload()

for filename in uploaded.keys():
    if filename.endswith('.pdf'):
        shutil.move(filename, f'docs/{filename}')
        print(f"✅ {filename}")

print(f"📚 {len([f for f in os.listdir('docs') if f.endswith('.pdf')])} PDFs ready")

Saving NIPS-2017-attention-is-all-you-need-Paper.pdf to NIPS-2017-attention-is-all-you-need-Paper.pdf
✅ NIPS-2017-attention-is-all-you-need-Paper.pdf
📚 2 PDFs ready


In [None]:
#Load and Process Documents
documents = []
for filename in os.listdir("docs"):
    if filename.endswith(".pdf"):
        loader = PyMuPDFLoader(f"docs/{filename}")
        documents.extend(loader.load())

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

print(f"📄 {len(documents)} pages → {len(chunks)} chunks")

📄 23 pages → 76 chunks


In [None]:
#  Create Vector Database
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectordb = Chroma.from_documents(chunks, embeddings, persist_directory='db')
retriever = vectordb.as_retriever()
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.2)
print("🗄️ Vector database ready!")

🗄️ Vector database ready!


In [None]:
# Define Chat Functions
def ask_llm_only(query):
    prompt = f"Answer only if confident. If unsure, say 'UNSURE'.\n\nQuestion: {query}"
    response = llm.invoke(prompt)
    content = response.content if hasattr(response, "content") else str(response)

    if "unsure" in content.lower() or "don't know" in content.lower():
        return None
    return content

def ask_with_docs(query):
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    response = qa_chain.invoke({"query": query})
    return response["result"]



In [None]:
# Start Chatbot
def chat():
    print("\n💬 RAG Chatbot Started! (type 'exit' to stop)")
    print("🔍 Watch for [DIRECT] vs [RAG] indicators!\n")

    while True:
        query = input("You: ").strip()

        if query.lower() == 'exit':
            print("👋 Goodbye!")
            break

        # Try LLM first
        response = ask_llm_only(query)

        if response:
            print(f"\n[DIRECT] 🧠 AI: {response}\n")
        else:
            print("🔍 LLM unsure → Searching documents...")
            response = ask_with_docs(query)
            print(f"\n[RAG] 📚 AI: {response}\n")

# Start the enhanced chatbot
chat()


💬 RAG Chatbot Started! (type 'exit' to stop)
🔍 Watch for [DIRECT] vs [RAG] indicators!


[DIRECT] 🧠 AI: Hey


[DIRECT] 🧠 AI: An attention mechanism is a technique in deep learning that allows a model to focus on different parts of its input when processing it.  Instead of processing the entire input equally, it assigns weights to different parts, emphasizing the most relevant information for the current task.  This allows the model to handle long sequences of data more effectively and capture complex relationships between different parts of the input.


[DIRECT] 🧠 AI: RNNs (Recurrent Neural Networks) process sequential data, maintaining a hidden state that carries information from previous inputs.  CNNs (Convolutional Neural Networks) process grid-like data (like images) using convolutional filters to detect features.  The key difference lies in their architecture and the type of data they excel at processing.

🔍 LLM unsure → Searching documents...

[RAG] 📚 AI: The Transformer (big) m