In [1]:

!pip install langchain langchain-community langchain-openai
!pip install chromadb
!pip install pypdf
!pip install sentence-transformers
!pip install openai

Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.3.27-py3-none-any.whl.metadata (2.3 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<

In [2]:

import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
import tempfile

In [4]:
import getpass
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key:")

Enter your OpenAI API key:··········


In [5]:
# Cell 4: Document Processing Functions
def load_pdf(pdf_path):
    """
    Load PDF and extract text
    """
    loader = PyPDFLoader(pdf_path)
    documents = loader.load()
    print(f"Loaded {len(documents)} pages from PDF")
    return documents

def split_documents(documents, chunk_size=1000, chunk_overlap=200):
    """
    Split documents into smaller chunks for better retrieval
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
    )
    splits = text_splitter.split_documents(documents)
    print(f"Split into {len(splits)} chunks")
    return splits

In [6]:
# Cell 5: Initialize Embeddings
def setup_embeddings():
    """
    Initialize embedding model - using free HuggingFace model
    """
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    print("Embeddings model loaded successfully!")
    return embeddings

# Initialize embeddings
embeddings = setup_embeddings()

  embeddings = HuggingFaceEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Embeddings model loaded successfully!


In [7]:
# Vector Database Functions
def create_vector_store(documents, embeddings):
    """
    Create vector database from document chunks
    """
    vectorstore = Chroma.from_documents(
        documents=documents,
        embedding=embeddings,
        persist_directory="./chroma_db"  # Save to disk
    )
    print(f"Created vector store with {len(documents)} documents")
    return vectorstore

def load_existing_vector_store(embeddings):
    """
    Load existing vector database if it exists
    """
    vectorstore = Chroma(
        persist_directory="./chroma_db",
        embedding_function=embeddings
    )
    return vectorstore

In [8]:
#  RAG Chain Setup
def create_rag_chain(vectorstore):
    """
    Create the conversational RAG chain
    """
    # Create retriever from vector store
    retriever = vectorstore.as_retriever(
        search_type="similarity",
        search_kwargs={"k": 3}  # Retrieve top 3 most similar chunks
    )

    # Initialize LLM
    llm = OpenAI(temperature=0.7)

    # Create memory for conversation history
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        return_messages=True,
        output_key="answer"
    )

    # Create conversational chain
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        memory=memory,
        return_source_documents=True,
        verbose=True
    )

    print("RAG chain created successfully!")
    return qa_chain

In [9]:
# PDF Upload and Processing Function
def process_pdf_file(uploaded_file):
    """
    Process uploaded PDF file and create vector store
    """
    try:
        # Save uploaded file temporarily
        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
            tmp_file.write(uploaded_file.read())
            tmp_file_path = tmp_file.name

        print("Step 1: Loading PDF...")
        documents = load_pdf(tmp_file_path)

        print("Step 2: Splitting documents...")
        splits = split_documents(documents)

        print("Step 3: Creating vector store...")
        vectorstore = create_vector_store(splits, embeddings)

        print("Step 4: Creating RAG chain...")
        qa_chain = create_rag_chain(vectorstore)

        # Clean up temporary file
        os.unlink(tmp_file_path)

        print("PDF processed successfully! You can now ask questions.")
        return qa_chain, vectorstore

    except Exception as e:
        print(f"Error processing PDF: {str(e)}")
        return None, None

In [10]:
# Chat Interface Function
def chat_with_pdf(qa_chain, question):
    """
    Ask questions about the PDF content
    """
    try:
        print(f"Question: {question}")
        print("Searching relevant content...")

        # Get response from RAG chain
        response = qa_chain({"question": question})

        print(f"Answer: {response['answer']}")

        # Show source documents if available
        if 'source_documents' in response and response['source_documents']:
            print("\n Sources used:")
            for i, doc in enumerate(response['source_documents']):
                print(f"   Source {i+1}: Page {doc.metadata.get('page', 'Unknown')}")
                print(f"   Content preview: {doc.page_content[:200]}...")
                print()

        return response

    except Exception as e:
        print(f"Error during chat: {str(e)}")
        return None

In [11]:
# File Upload Helper for Colab
from google.colab import files

def upload_and_process_pdf():
    """
    Upload PDF file in Colab and process it
    """
    print("Please upload your PDF file...")
    uploaded = files.upload()

    if uploaded:
        # Get the first uploaded file
        filename = list(uploaded.keys())[0]
        file_content = uploaded[filename]

        print(f"Processing {filename}...")

        # Create a file-like object
        import io
        file_obj = io.BytesIO(file_content)

        # Process the PDF
        qa_chain, vectorstore = process_pdf_file(file_obj)

        return qa_chain, vectorstore
    else:
        print("No file uploaded")
        return None, None

In [12]:
# Cell 11: Initialize the System
print("RAG PDF Chatbot System Ready!")
print("=" * 50)

# Global variables to store our system
qa_chain = None
vectorstore = None

print(" System initialized. Ready to process PDF!")

🚀 RAG PDF Chatbot System Ready!
✅ System initialized. Ready to process PDF!


In [13]:
# Cell 12: Upload and Process PDF
print("Upload and Process Your PDF")
print("=" * 40)

# Upload and process PDF
qa_chain, vectorstore = upload_and_process_pdf()

if qa_chain:
    print("Success! Your PDF is now ready for questions.")
    print("You can now ask questions about the content in the next cell.")
else:
    print("Failed to process PDF. Please try again.")

📤 Upload and Process Your PDF
📁 Please upload your PDF file...


Saving BI Data Dictionary.pdf to BI Data Dictionary.pdf
📄 Processing BI Data Dictionary.pdf...
Step 1: Loading PDF...
Loaded 27 pages from PDF
Step 2: Splitting documents...
Split into 67 chunks
Step 3: Creating vector store...
Created vector store with 67 documents
Step 4: Creating RAG chain...


  llm = OpenAI(temperature=0.7)


RAG chain created successfully!
✅ PDF processed successfully! You can now ask questions.
🎉 Success! Your PDF is now ready for questions.
💡 You can now ask questions about the content in the next cell.


  memory = ConversationBufferMemory(


In [14]:
# Interactive Chat Interface
def start_chat():
    """
    Start interactive chat session
    """
    if qa_chain is None:
        print("Please upload and process a PDF first!")
        return

    print("💬 Chat with your PDF - Type 'quit' to exit")
    print("=" * 45)

    while True:
        question = input("\n Your question: ").strip()

        if question.lower() in ['quit', 'exit', 'q']:
            print("Goodbye!")
            break

        if not question:
            print("Please enter a question.")
            continue

        # Get response
        response = chat_with_pdf(qa_chain, question)

        if response:
            print("-" * 50)

# Start the chat
start_chat()

💬 Chat with your PDF - Type 'quit' to exit

🤔 Your question: what this document is about?
🤔 Question: what this document is about?
🔍 Searching relevant content...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

AML Doc 
Attached Documents attached in member documents 
AML Doc Status As per member information centre screen 
Bad Debt Member status - set in member maintenance  
Branch Name 
The branch in which the member opened their account (based on member 
area) 
Branch Group The “parent” branch grouping. 
Closed Member status - set in member maintenance  
Common Bond As set in details tab in member maintenance screen 
Correspondence 
Address 1 Member’s address marked as correspondence in extended member details  
Correspondence 
Address 2

  response = qa_chain({"question": question})


❌ Error during chat: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

🤔 Your question: Can you summarize the key points?
🤔 Question: Can you summarize the key points?
🔍 Searching relevant content...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

10 
 
Field Notes  
New Member Count 
Count based on previous months member count minus the current 
member count 
New No 
Correspondence Count 
Count based on previous months No Correspondence count minus the 
current No Correspondence co

KeyboardInterrupt: Interrupted by user

In [None]:
##Token limit in api call of OpenAI

In [None]:
##FreeModel- Using local model

In [None]:
!pip install langchain-huggingface

In [17]:
# Setup Free Local LLM
from langchain_huggingface import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

def setup_free_llm():
    """
    Setup free local language model
    """
    print(" Loading free local model (this may take a few minutes first time)...")

    # Use a smaller, efficient model that works well in Colab
    model_name = "microsoft/DialoGPT-medium"

    # Alternative models you can try:
    # "google/flan-t5-base" - Good for Q&A
    # "microsoft/DialoGPT-small" - Smaller, faster

    try:
        # Create pipeline
        pipe = pipeline(
            "text-generation",
            model=model_name,
            tokenizer=model_name,
            max_length=512,
            temperature=0.7,
            do_sample=True,
            device=0 if torch.cuda.is_available() else -1  # Use GPU if available
        )

        # Wrap in LangChain
        llm = HuggingFacePipeline(pipeline=pipe)
        print("Free local model loaded successfully!")
        return llm

    except Exception as e:
        print(f"Error loading model: {e}")
        print("Trying alternative model...")

        # Fallback to even smaller model
        pipe = pipeline(
            "text-generation",
            model="gpt2",
            max_length=256,
            temperature=0.7,
            do_sample=True
        )
        llm = HuggingFacePipeline(pipeline=pipe)
        print("Fallback model loaded!")
        return llm

# Load the free model
free_llm = setup_free_llm()

🔄 Loading free local model (this may take a few minutes first time)...


config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/863M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/863M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

Device set to use cpu


✅ Free local model loaded successfully!


In [18]:
# Modified RAG Chain with Free LLM
def create_free_rag_chain(vectorstore):
    """
    Create RAG chain using free local model
    """
    # Create retriever
    retriever = vectorstore.as_retriever(
        search_type="similarity",
        search_kwargs={"k": 2}  # Reduced to 2 for smaller context
    )

    # Use our free LLM
    llm = free_llm

    # Create memory
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        return_messages=True,
        output_key="answer"
    )

    # Create chain
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        memory=memory,
        return_source_documents=True,
        verbose=True
    )

    print("✅ Free RAG chain created!")
    return qa_chain

# If you already have a vectorstore from before, recreate the chain
if vectorstore is not None:
    print("🔄 Recreating RAG chain with free model...")
    qa_chain = create_free_rag_chain(vectorstore)
    print("✅ Ready to chat with free model!")
else:
    print("📤 Please upload a PDF first using the upload function")

🔄 Recreating RAG chain with free model...
✅ Free RAG chain created!
✅ Ready to chat with free model!


In [20]:
# Cell 19: Check Current Status
print("🔍 Checking current system status...")
print(f"qa_chain exists: {qa_chain is not None}")
print(f"vectorstore exists: {vectorstore is not None}")
print(f"embeddings exists: {embeddings is not None}")
print(f"free_llm exists: {free_llm is not None}")

🔍 Checking current system status...
qa_chain exists: True
vectorstore exists: True
embeddings exists: True
free_llm exists: True


In [22]:
# Created Simpler QA Function
def simple_qa(question):
    """
    Simple QA function that works with free model
    """
    print(f"🤔 Question: {question}")

    # Get relevant chunks
    retriever = vectorstore.as_retriever(search_kwargs={"k": 2})
    docs = retriever.get_relevant_documents(question)

    if not docs:
        print("❌ No relevant documents found")
        return

    # Combine context
    context = "\n".join([doc.page_content[:300] for doc in docs])

    # Create simple prompt
    prompt = f"""Based on the following context, answer the question:

Context: {context}

Question: {question}

Answer:"""

    print("🤖 Generating answer...")
    try:
        answer = free_llm.predict(prompt)
        print(f"✅ Answer: {answer}")

        # Show sources
        print("\n📄 Sources:")
        for i, doc in enumerate(docs):
            print(f"Source {i+1}: {doc.page_content[:100]}...")

        return answer
    except Exception as e:
        print(f"❌ Error: {str(e)}")
        return None

# Test the simple function
simple_qa("What is this document about?")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


🤔 Question: What is this document about?
🤖 Generating answer...
✅ Answer: Based on the following context, answer the question:

Context: AML Doc 
Attached Documents attached in member documents 
AML Doc Status As per member information centre screen 
Bad Debt Member status - set in member maintenance  
Branch Name 
The branch in which the member opened their account (based on member 
area) 
Branch Group The “parent” branch grouping. 
5 
 
Members 
The Members view contains current member data. The data within the members view is a 
combination of data at replication date and month end.   
 
Field Notes 
Accommodation 
Type As set in indicators tab in member maintenance screen 
Active Address 1 Members address marked as Active in

Question: What is this document about?

Answer:

📄 Sources:
Source 1: AML Doc 
Attached Documents attached in member documents 
AML Doc Status As per member information c...
Source 2: 5 
 
Members 
The Members view contains current member data. The data within

'Based on the following context, answer the question:\n\nContext: AML Doc \nAttached Documents attached in member documents \nAML Doc Status As per member information centre screen \nBad Debt Member status - set in member maintenance  \nBranch Name \nThe branch in which the member opened their account (based on member \narea) \nBranch Group The “parent” branch grouping. \n5 \n \nMembers \nThe Members view contains current member data. The data within the members view is a \ncombination of data at replication date and month end.   \n \nField Notes \nAccommodation \nType As set in indicators tab in member maintenance screen \nActive Address 1 Members address marked as Active in\n\nQuestion: What is this document about?\n\nAnswer:'

In [None]:
# Interactive Chat with Simple Function
def simple_chat():
    """
    Simple chat interface using our working function
    """
    print("💬 Simple PDF Chat (type 'quit' to exit)")
    print("=" * 40)

    while True:
        question = input("\n🤔 Your question: ").strip()

        if question.lower() in ['quit', 'exit', 'q']:
            print("👋 Goodbye!")
            break

        if not question:
            continue

        simple_qa(question)
        print("-" * 40)

# Start simple chat
simple_chat()

💬 Simple PDF Chat (type 'quit' to exit)

🤔 Your question: is there any mention of accomodation or residency?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


🤔 Question: is there any mention of accomodation or residency?
🤖 Generating answer...
✅ Answer: Based on the following context, answer the question:

Context: -> indicators -> marketing consents management 
PPSN As set in details tab in member maintenance screen 
Primary Savings 
Account IBAN IBAN of members primary share account 
Proof of Address Member information > member ID documents 
Proof of Address 
Expiry Date 
Member Information > member ID docum
Guardian Address 
3 Guardians address in extended member details  
Guardian Address 
4 Guardians address in extended member details  
Guardian Date of 
Birth Guardians address in extended member details  
Guardian 
Member Number Guardians’ member number in extended member details  
Guardian Name Gua

Question: is there any mention of accomodation or residency?

Answer:

📄 Sources:
Source 1: -> indicators -> marketing consents management 
PPSN As set in details tab in member maintenance scr...
Source 2: Guardian Address 
3 Guardians ad