In [2]:
# Setup Environment and Install Packages
import os
os.makedirs('rag_chatbot', exist_ok=True)
os.makedirs('rag_chatbot/docs', exist_ok=True)
os.chdir('/content/rag_chatbot')

!pip install -q google-generativeai langchain langchain-google-genai chromadb PyMuPDF langchain-community sentence-transformers
print("✅ Setup complete!")

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.8/19.8 MB[0m [31m63.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m57.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m50.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m16.5 MB/s[0m eta [36m0:00:00

In [3]:
# Configure API and Imports
from google.colab import userdata
import google.generativeai as genai
from langchain.vectorstores import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA

# Get API key and configure
API_KEY = userdata.get('MY_GEMINI_KEY')
os.environ["GOOGLE_API_KEY"] = API_KEY
genai.configure(api_key=API_KEY)
print("✅ API configured!")

✅ API configured!


In [11]:
# Upload Documents
from google.colab import files
import shutil

uploaded = files.upload()

for filename in uploaded.keys():
    if filename.endswith('.pdf'):
        shutil.move(filename, f'docs/{filename}')
        print(f"✅ {filename}")

print(f"📚 {len([f for f in os.listdir('docs') if f.endswith('.pdf')])} PDFs ready")

Saving NIPS-2017-attention-is-all-you-need-Paper.pdf to NIPS-2017-attention-is-all-you-need-Paper.pdf
✅ NIPS-2017-attention-is-all-you-need-Paper.pdf
📚 2 PDFs ready


In [12]:
#Load and Process Documents
documents = []
for filename in os.listdir("docs"):
    if filename.endswith(".pdf"):
        loader = PyMuPDFLoader(f"docs/{filename}")
        documents.extend(loader.load())

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

print(f"📄 {len(documents)} pages → {len(chunks)} chunks")

📄 23 pages → 76 chunks


In [13]:
#  Create Vector Database
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectordb = Chroma.from_documents(chunks, embeddings, persist_directory='db')
retriever = vectordb.as_retriever()
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.2)
print("🗄️ Vector database ready!")

🗄️ Vector database ready!


In [14]:
# Define Chat Functions
def ask_llm_only(query):
    prompt = f"Answer only if confident. If unsure, say 'UNSURE'.\n\nQuestion: {query}"
    response = llm.invoke(prompt)
    content = response.content if hasattr(response, "content") else str(response)

    if "unsure" in content.lower() or "don't know" in content.lower():
        return None
    return content

def ask_with_docs(query):
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    response = qa_chain.invoke({"query": query})
    return response["result"]



In [None]:
# Start Chatbot
def chat():
    print("\n💬 RAG Chatbot Started! (type 'exit' to stop)")
    print("🔍 Watch for [DIRECT] vs [RAG] indicators!\n")

    while True:
        query = input("You: ").strip()

        if query.lower() == 'exit':
            print("👋 Goodbye!")
            break

        # Try LLM first
        response = ask_llm_only(query)

        if response:
            print(f"\n[DIRECT] 🧠 AI: {response}\n")
        else:
            print("🔍 LLM unsure → Searching documents...")
            response = ask_with_docs(query)
            print(f"\n[RAG] 📚 AI: {response}\n")

# Start the enhanced chatbot
chat()


💬 RAG Chatbot Started! (type 'exit' to stop)
🔍 Watch for [DIRECT] vs [RAG] indicators!

You: "What is attention mechanism in general?

[DIRECT] 🧠 AI: Attention mechanisms are a family of techniques that allow a neural network to focus on different parts of its input when processing it.  Instead of processing the entire input equally, attention assigns weights to different parts, emphasizing the most relevant information for the current task.  This allows the network to effectively handle long sequences or complex inputs by selectively focusing on the most important elements.

You: What are transformers in machine learning?

[DIRECT] 🧠 AI: Transformers are a type of neural network architecture primarily known for their effectiveness in processing sequential data, such as text and time series.  They utilize a mechanism called "self-attention" to weigh the importance of different parts of the input sequence when generating an output.  This allows them to capture long-range dependencies w

In [None]:
# Set up Gemini
import google.generativeai as genai
import os
print("API key configured successfully!")


os.environ["GOOGLE_API_KEY"] = MY_GEMINI_KEY
genai.configure(api_key=MY_GEMINI_KEY)

API key configured successfully!


In [None]:
# Check what's in your documents directory
!ls -la documents/

total 2164
drwxr-xr-x 2 root root    4096 Aug 20 02:31 .
drwxr-xr-x 4 root root    4096 Aug 20 02:31 ..
-rw-r--r-- 1 root root 2206753 Aug 20 02:31 3-17_NERVOUS_HANDOUT.pdf


In [None]:
from google.colab import files
import shutil

print("Upload your PDF files:")
uploaded = files.upload()

# Move uploaded files to the documents directory
for filename in uploaded.keys():
    if filename.endswith('.pdf'):
        shutil.move(filename, f'documents/{filename}')
        print(f"✅ Moved {filename} to documents/")
    else:
        print(f"⚠️ {filename} is not a PDF file, skipping...")

# Check what's now in documents
print("\nDocuments directory now contains:")
!ls -la documents/

Upload your PDF files:


Saving 3-17_NERVOUS_HANDOUT.pdf to 3-17_NERVOUS_HANDOUT.pdf
✅ Moved 3-17_NERVOUS_HANDOUT.pdf to documents/

Documents directory now contains:
total 2164
drwxr-xr-x 2 root root    4096 Aug 20 02:42 .
drwxr-xr-x 4 root root    4096 Aug 20 02:42 ..
-rw-r--r-- 1 root root 2206753 Aug 20 02:42 3-17_NERVOUS_HANDOUT.pdf


In [None]:
import os
from langchain.vectorstores import Chroma
from langchain.document_loaders import AsyncChromiumLoader #directlyt import the data from an URL
from langchain.document_transformers import Html2TextTransformer #converts html to text data
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from dotenv import load_dotenv

In [None]:
document = []
for filename in os.listdir("documents"):
    if filename.lower().endswith(".pdf"):
        loader = PyMuPDFLoader(os.path.join("documents", filename))
        document.extend(loader.load())

print(f"Total pages loaded: {len(document)}")

Total pages loaded: 12


In [None]:
# Split text into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
text_chunks = text_splitter.split_documents(document)

len(text_chunks)

33

In [None]:
# Use in embeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embedding = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=MY_GEMINI_KEY  # Use your variable name here
)

In [None]:
# Create vector database
persist_directory = 'db'
vectordb = Chroma.from_documents(documents=text_chunks,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

In [None]:
retriever = vectordb.as_retriever()


In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.2)


In [None]:
### Function to Ask LLM First
def ask_llm_first(query):
    """Ask the LLM first. If it confidently answers, return it. Otherwise, proceed to VectorDB."""

    system_prompt = (
        "You are an AI assistant. Answer only if you are confident about your response. "
        "If you do not know the answer, simply respond with: 'I don't know.' "
        "Do not guess or make assumptions.\n\n"
        f"User Question: {query}"
    )

    response = llm.invoke(system_prompt)  # Query the LLM

    # Extract text content from AIMessage object
    if hasattr(response, "content"):
        response_text = response.content  # Correct way to get the response text
    else:
        response_text = str(response)  # Fallback in case 'content' is missing

    # Define phrases that indicate uncertainty
    vague_responses = [
        "i don't know", "i am not sure", "i need more information", "please provide more details",
        "i cannot determine", "i am unable to", "i do not have enough context",
        "i need more context", "i do not have enough information", "i require additional details"
    ]

     # Debugging: Print each phrase check
    for phrase in vague_responses:
        if phrase in response_text.lower():
            print(f"✅ Match found: '{phrase}' is in LLM response!")
            print("⚠️ LLM is unsure. Fetching data from VectorDB...")
            return False  # Indicate retrieval is needed

    print("🤖 LLM confidently answered without RAG.")
    return response_text  # Return LLM's answer

In [None]:
### Function to Perform RAG Retrieval
def ask_with_rag(query):
    """Retrieve documents from VectorDB and generate an answer using LLM with context."""
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True
    )
    response = qa_chain.invoke({"query": query})
    print("\n🤖 AI Agent's Response (with RAG):")
    return response["result"]


In [None]:
def start_conversation():
    """Start an interactive chat session with the LLM and VectorDB."""
    print("\n💬 Start chatting with the AI! (Type 'exit' to stop)\n")

    while True:
        # Get user input dynamically
        user_query = input("👤 You: ").strip()

        # Exit condition
        if user_query.lower() == "exit":
            print("👋 Goodbye!")
            break

        # Step 1: Ask LLM first
        llm_response = ask_llm_first(user_query)

        # Step 2: If LLM is unsure (False), use VectorDB retrieval
        if llm_response is False:
            final_answer = ask_with_rag(user_query)
            print(f"\n🤖 AI: {final_answer}\n")
        else:
            print(f"\n🤖 AI: {llm_response}\n")

In [None]:
start_conversation()



💬 Start chatting with the AI! (Type 'exit' to stop)

👤 You: what drugs affect the nervous system
🤖 LLM confidently answered without RAG.

🤖 AI: Many drugs affect the nervous system.  This is a very broad category.  To give a useful answer, I need more specifics.  What kind of effect are you interested in? (e.g., stimulants, depressants, analgesics, etc.)

👤 You: stimulants please
✅ Match found: 'i don't know' is in LLM response!
⚠️ LLM is unsure. Fetching data from VectorDB...

🤖 AI Agent's Response (with RAG):

🤖 AI: Based on the provided text, caffeine and nicotine are stimulants.  Caffeine stimulates the central nervous system by suppressing melatonin and promoting adrenalin. Nicotine also stimulates the central nervous system, causing an increased release of neurotransmitters associated with pleasure.

👤 You: what about anticonvulsants
✅ Match found: 'i don't know' is in LLM response!
⚠️ LLM is unsure. Fetching data from VectorDB...

🤖 AI Agent's Response (with RAG):

🤖 AI: I'm so

KeyboardInterrupt: Interrupted by user