<a href="https://colab.research.google.com/github/thakkars9/Agentic_AI/blob/main/Google_Kaggle_Day1b.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -U pymupdf sentence-transformers faiss-cpu openai

Collecting pymupdf
  Downloading pymupdf-1.26.7-cp310-abi3-manylinux_2_28_x86_64.whl.metadata (3.4 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Collecting openai
  Downloading openai-2.15.0-py3-none-any.whl.metadata (29 kB)
Downloading pymupdf-1.26.7-cp310-abi3-manylinux_2_28_x86_64.whl (24.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m96.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m83.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading openai-2.15.0-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m64.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymupdf, faiss-cpu, openai
  Attempting uninstall: openai
  

In [2]:
import os
import fitz  # PyMuPDF
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from openai import OpenAI
from google.colab import userdata

# Initialize OpenRouter Client
client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=userdata.get('OPENROUTER_API_KEY'),
)

# 1. READ DOCUMENTS
documents = []
folder_path = "/content/input"

print("Reading documents...")
for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)
    if filename.endswith(".pdf"):
        doc = fitz.open(file_path)
        text = "".join([page.get_text() for page in doc])
        # Split into chunks of ~500 characters for better search accuracy
        chunks = [text[i:i+500] for i in range(0, len(text), 400)]
        documents.extend(chunks)

# 2. CREATE SEARCH INDEX (The "Retrieval" part)
print(f"Indexing {len(documents)} text chunks...")
embedder = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embedder.encode(documents)

index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings).astype('float32'))

# 3. DEFINE RAG FUNCTION
def ask_my_docs(question):
    # Search for top 3 relevant chunks
    query_vector = embedder.encode([question])
    distances, indices = index.search(np.array(query_vector).astype('float32'), k=3)

    # Combine retrieved chunks into context
    context = "\n---\n".join([documents[i] for i in indices[0]])

    # Generate Answer using Gemini 2.0
    response = client.chat.completions.create(
        model="google/gemini-2.0-flash-exp:free",
        messages=[
            {"role": "system", "content": "You are a research assistant. Use the provided context to answer the question accurately. If the answer isn't in the context, say you don't know."},
            {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"}
        ]
    )
    return response.choices[0].message.content

print("✅ System Ready!")



Reading documents...
Indexing 941 text chunks...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ System Ready!


In [12]:
import openai

def ask_rag_system(question):
    # Retrieve context from your /content/input index
    query_vector = embedder.encode([question])
    distances, indices = index.search(np.array(query_vector).astype('float32'), k=3)
    context = "\n---\n".join([documents[i] for i in indices[0]])

    # Models to try in order of preference
    preferred_models = [
         "google/gemini-2.0-flash-exp:free"
        "meta-llama/llama-3.1-405b-instruct:free",
        "nousresearch/hermes-3-llama-3.1-405b:free",
        "meta-llama/llama-3.3-70b-instruct:free",

    ]

    for model_id in preferred_models:
        try:
            print(f"🤖 Attempting with: {model_id}...")
            response = client.chat.completions.create(
                model=model_id,
                messages=[
                    {"role": "system", "content": "Use the context to answer the question."},
                    {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}"}
                ],
                timeout=45
            )
            return response.choices[0].message.content
        except openai.NotFoundError:
            print(f"❌ {model_id} not found/offline. Trying next...")
            continue
        except Exception as e:
            print(f"⚠️ Error with {model_id}: {e}")
            continue

    return "All models are currently unavailable. Please check your OpenRouter settings."

# Run the test
print(ask_rag_system("What is MCP?"))

🤖 Attempting with: google/gemini-2.0-flash-exp:freemeta-llama/llama-3.1-405b-instruct:free...
⚠️ Error with google/gemini-2.0-flash-exp:freemeta-llama/llama-3.1-405b-instruct:free: Error code: 429 - {'error': {'message': 'Provider returned error', 'code': 429, 'metadata': {'raw': 'google/gemini-2.0-flash-exp:free is temporarily rate-limited upstream. Please retry shortly, or add your own key to accumulate your rate limits: https://openrouter.ai/settings/integrations', 'provider_name': 'Google', 'is_byok': False}}, 'user_id': 'user_38K3Aod572pjtAmowPb88rTIqxy'}
🤖 Attempting with: nousresearch/hermes-3-llama-3.1-405b:free...
MCP stands for Model Command Protocol. It is a tool that allows users to give specific, stateless commands such as fetching weather data or querying a database. MCP is governed by the Linux Foundation and is designed to handle simple, direct tasks rather than complex goals that require autonomous reasoning and planning.
