In [3]:
%pip install -q \
  llama-index \
  llama-index-llms-openrouter \
  llama-index-embeddings-huggingface \
  llama-index-readers-file \
  llama-index-packs-fusion-retriever \
  sentence-transformers \
  nest-asyncio \
  requests

print("✅ Installation complete")





✅ Installation complete


In [4]:
import os
from getpass import getpass
import nest_asyncio

nest_asyncio.apply()

from llama_index.core import Settings
from llama_index.llms.openrouter import OpenRouter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Ask for your OpenRouter API key (input is hidden like a password)
os.environ["OPENROUTER_API_KEY"] = getpass("Enter your OpenRouter API key: ")

# Configure the LLM (Meta Llama via OpenRouter)
llm = OpenRouter(
    api_key=os.environ["OPENROUTER_API_KEY"],
    model="meta-llama/llama-3.3-70b-instruct:free",
    max_tokens=512,
    temperature=0.1,  # Low = more precise, less “creative”
    timeout=60,
    system_prompt=(
        "You are an expert RAG system that answers ONLY using the provided context. "
        "Never hallucinate. Never guess. If the answer is not in the context, say so. "
        "Provide short, clear, factual responses with 2–4 evidence bullets."
    ),
)

# Configure the embedding model
embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

# Register both with LlamaIndex settings
Settings.llm = llm
Settings.embed_model = embed_model

print("✅ AI model and settings are ready to use")


Enter your OpenRouter API key: ··········


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ AI model and settings are ready to use


In [5]:
import os
import re
import requests

def download_pdf_from_drive(drive_url: str, save_path: str):
    """
    Download a PDF from a Google Drive sharing link and save it locally.
    """
    # Try pattern: /d/<FILE_ID>/
    match = re.search(r"/d/([A-Za-z0-9_-]+)", drive_url)
    if match:
        file_id = match.group(1)
    else:
        # Try pattern: ?id=<FILE_ID>
        match = re.search(r"id=([A-Za-z0-9_-]+)", drive_url)
        if match:
            file_id = match.group(1)
        else:
            raise ValueError("❌ Could not extract file ID from the link.")

    download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
    print(f"📥 Downloading PDF (file ID {file_id})...")

    resp = requests.get(download_url)
    resp.raise_for_status()

    with open(save_path, "wb") as f:
        f.write(resp.content)

    print(f"✅ PDF downloaded → {save_path}")

# Ask for the Drive link
drive_link = input("📌 Paste your Google Drive PDF link here: ").strip()

# Make sure the data folder exists
DATA_DIR = "data"
os.makedirs(DATA_DIR, exist_ok=True)

# Local path for the PDF
pdf_path = os.path.join(DATA_DIR, "source.pdf")

# Download the PDF
download_pdf_from_drive(drive_link, pdf_path)


📌 Paste your Google Drive PDF link here: https://drive.google.com/file/d/1JikYXnoUjNvkJ5tbRgGYnWjuWYdlpFkr/view?usp=sharing
📥 Downloading PDF (file ID 1JikYXnoUjNvkJ5tbRgGYnWjuWYdlpFkr)...
✅ PDF downloaded → data/source.pdf


In [6]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Load the PDF as a document
documents = SimpleDirectoryReader(input_files=[pdf_path]).load_data()
print(f"📄 Loaded {len(documents)} document(s).")

# Embedding model for semantic splitting (can reuse the same model name)
semantic_embed = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# Create a semantic splitter
parser = SemanticSplitterNodeParser(
    buffer_size=3,
    breakpoint_percentile_threshold=95,
    embed_model=semantic_embed,
)

# Generate semantic nodes (chunks)
nodes = parser.get_nodes_from_documents(documents)

# Add simple metadata to each chunk
for n in nodes:
    n.metadata["source"] = pdf_path
    n.metadata["chunk_type"] = "semantic"

print(f"🔍 Created {len(nodes)} high-quality semantic nodes.")


📄 Loaded 26 document(s).
🔍 Created 26 high-quality semantic nodes.


In [7]:
from llama_index.core.llama_pack import download_llama_pack

# Download or load the Query Fusion pack
QueryRewritingRetrieverPack = download_llama_pack(
    "QueryRewritingRetrieverPack",
    "./query_rewriting_pack",
)

# Create the advanced retriever using your nodes
query_rewriting_pack = QueryRewritingRetrieverPack(
    nodes,                      # semantic chunks from Step 4
    chunk_size=256,
    vector_similarity_top_k=8,
    fusion_similarity_top_k=8,
    num_queries=6,              # number of query rewrites
)

print("🚀 Advanced Query Fusion RAG Engine Ready!")


🚀 Advanced Query Fusion RAG Engine Ready!


In [None]:
def safe_rag_run(question, retries=3):
    """
    Run the RAG pipeline with basic retry logic.
    """
    for attempt in range(retries):
        try:
            resp = query_rewriting_pack.run(question)

            if resp is None or str(resp).strip() == "":
                raise ValueError("Empty LLM response.")

            return resp

        except Exception as e:
            print(f"⚠️ Error: {e}")
            print(f"🔁 Retrying ({attempt+1}/{retries})...")

    return "❌ Could not generate a valid answer after retries."

print("\nRAG Interactive Mode")
print("Ask any question about your PDF.")
print("Type 'end' to exit.\n")

# Interactive Q&A loop
while True:
    user_question = input("🟦 Enter your question: ").strip()

    if user_question.lower() == "end":
        print("\n👋 Session ended.")
        break

    print("\n🔍 Retrieving answer...\n")

    # Run the question through the RAG pipeline
    response = safe_rag_run(user_question)

    print("\n──────────────────────────────────────────────")
    print("❓ QUESTION:")
    print(user_question)
    print("\n🧠 ANSWER:")
    print(response)
    print("──────────────────────────────────────────────\n")



RAG Interactive Mode
Ask any question about your PDF.
Type 'end' to exit.

🟦 Enter your question: what is the pdf about

🔍 Retrieving answer...

Generated queries:
What is the content of the PDF file?
How to view the PDF document online for free?
What are the main topics discussed in the PDF?
Can I download the PDF file for offline reading?
What is the purpose and summary of the PDF document?

──────────────────────────────────────────────
❓ QUESTION:
what is the pdf about

🧠 ANSWER:
The PDF appears to be about:
* Understanding the relationship between thoughts, emotions, and behaviors
* Identifying and challenging negative thought patterns
* Developing coping strategies and techniques for managing emotions
* Improving overall mental well-being and resilience
──────────────────────────────────────────────

