# üöÄ RAG Pipeline - Kaggle Backend with Ngrok

This notebook sets up a complete RAG backend with FastAPI and exposes it via ngrok.

**Prerequisites:**
1. Enable **Internet** in Kaggle notebook settings
2. Get your ngrok auth token from: https://dashboard.ngrok.com/get-started/your-authtoken

**Run cells in order!**

In [None]:
# CELL 1: Install Dependencies
!pip install fastapi uvicorn pyngrok python-multipart --quiet
!pip install torch transformers faiss-cpu rank_bm25 rouge_score sentence-transformers PyPDF2 --quiet
!pip install scikit-learn psutil nltk pydantic --quiet

print("‚úÖ All dependencies installed!")

In [None]:
# CELL 2: Configure Ngrok
from pyngrok import ngrok, conf

# ‚ö†Ô∏è REPLACE WITH YOUR NGROK TOKEN!
NGROK_AUTH_TOKEN = "YOUR_NGROK_TOKEN_HERE"

conf.get_default().auth_token = NGROK_AUTH_TOKEN
print("‚úÖ Ngrok configured successfully!")
print("üìù Don't have a token? Get one at: https://dashboard.ngrok.com/signup")

In [None]:
# CELL 3: Import Libraries
import re
import time
import numpy as np
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sklearn.metrics.pairwise import cosine_similarity
from rank_bm25 import BM25Okapi
import torch
import io
from sentence_transformers import CrossEncoder, SentenceTransformer, util
import psutil
import nltk
from nltk.tokenize import sent_tokenize
from collections import Counter

try:
    import faiss
except:
    print("‚ö†Ô∏è FAISS not available")
    faiss = None

nltk.download('punkt', quiet=True)
print("‚úÖ Libraries imported!")

In [None]:
# CELL 4: Paste Your RAG Code Here
# Copy all your chunking functions and OptimizedRAG class from your existing notebook
# For example:

def clean_text(text):
    text = re.sub(r'\s+', ' ', text.strip())
    text = re.sub(r'[^\x20-\x7E]', '', text)
    return text

def read_pdf_from_bytes(pdf_bytes):
    """Read PDF from bytes"""
    try:
        pdf_file = io.BytesIO(pdf_bytes)
        reader = PdfReader(pdf_file)
        pages = []
        for page in reader.pages:
            text = page.extract_text()
            if text:
                pages.append(text)
        return pages
    except Exception as e:
        print(f"Error: {e}")
        return []

# TODO: Add your chunking methods here
# - chunk_with_overlap
# - Gradient_chunking
# - Gradient_chunking_final
# - etc.

# TODO: Add your evaluate_chunk_quality function here

# TODO: Add your OptimizedRAG class here

print("‚úÖ RAG functions loaded! (Make sure you pasted your code above)")

In [None]:
# CELL 5: FastAPI Setup
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List
import uvicorn
import nest_asyncio

nest_asyncio.apply()

app = FastAPI(title="RAG Pipeline API")

# CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Models
class RAGConfig(BaseModel):
    chunkSize: int = 500
    overlap: int = 50
    method: str = "gradient"
    useBM25: bool = True
    useCosine: bool = True
    useFaiss: bool = False
    rerankEnabled: bool = True
    topK: int = 4

class ProcessRequest(BaseModel):
    text: str
    query: str
    config: RAGConfig

class ChunkData(BaseModel):
    id: int
    content: str

class MetricsData(BaseModel):
    num_chunks: int
    weighted_score: float
    latency: float
    avg_coherence: float
    context_preservation: float
    avg_information_density: float
    coverage: float
    semantic_coverage: float
    cpu_usage: float
    memory_usage: float

class RAGResponse(BaseModel):
    response: str
    chunks: List[ChunkData]
    retrievedChunks: List[int]
    metrics: MetricsData

@app.get("/")
async def root():
    return {"message": "RAG Pipeline API", "status": "healthy"}

@app.post("/upload-document")
async def upload_document(file: UploadFile = File(...)):
    try:
        content = await file.read()
        if not file.filename.endswith('.pdf'):
            raise HTTPException(400, "Only PDF files")
        
        documents = read_pdf_from_bytes(content)
        if not documents:
            raise HTTPException(400, "Could not extract text")
        
        return {
            "filename": file.filename,
            "extracted_text": " ".join(documents),
            "message": "Success"
        }
    except Exception as e:
        raise HTTPException(500, str(e))

@app.post("/process", response_model=RAGResponse)
async def process_rag(request: ProcessRequest):
    try:
        start_time = time.time()
        process = psutil.Process()
        memory_start = process.memory_info().rss / 1024 / 1024
        
        # Method mapping
        method_map = {
            "fixed": "fixed",
            "gradient": "Gradient_chunking",
            "gradient_final": "Gradient_chunking_final"
        }
        
        # Build config
        rag_config = {
            "chunking_method": method_map.get(request.config.method, "Gradient_chunking"),
            "chunk_size": request.config.chunkSize,
            "overlap": request.config.overlap,
            "use_bm25": request.config.useBM25,
            "use_cosine": request.config.useCosine,
            "use_faiss": request.config.useFaiss,
            "rerank_enabled": request.config.rerankEnabled,
            "top_k": request.config.topK,
        }
        
        # Initialize RAG
        rag = OptimizedRAG([request.text], **rag_config)
        
        # Retrieve and generate
        retrieved = rag.retrieve(request.query)
        response_text = rag.generate_answer(request.query, retrieved)
        
        # Metrics
        latency = time.time() - start_time
        memory_end = process.memory_info().rss / 1024 / 1024
        memory_usage = max(memory_end - memory_start, 0.0)
        cpu_usage = process.cpu_percent(interval=0.1)
        
        quality = evaluate_chunk_quality(rag.chunks, request.text)
        
        # Find retrieved indices
        retrieved_indices = []
        for rc in retrieved:
            for idx, chunk in enumerate(rag.chunks):
                if chunk == rc:
                    retrieved_indices.append(idx)
                    break
        
        return RAGResponse(
            response=response_text,
            chunks=[ChunkData(id=i, content=c) for i, c in enumerate(rag.chunks)],
            retrievedChunks=retrieved_indices,
            metrics=MetricsData(
                num_chunks=len(rag.chunks),
                weighted_score=quality["weighted_score"],
                latency=latency * 1000,
                avg_coherence=quality["avg_coherence"],
                context_preservation=quality["context_preservation"],
                avg_information_density=quality["avg_information_density"],
                coverage=quality["coverage"],
                semantic_coverage=quality["semantic_coverage"],
                cpu_usage=cpu_usage,
                memory_usage=memory_usage
            )
        )
    except Exception as e:
        raise HTTPException(500, str(e))

print("‚úÖ FastAPI app configured!")

In [None]:
# CELL 6: Start Server with Ngrok
import threading

def run_server():
    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")

# Start server
server_thread = threading.Thread(target=run_server, daemon=True)
server_thread.start()
time.sleep(3)

# Start ngrok
public_url = ngrok.connect(8000)

print("=" * 80)
print("üöÄ RAG PIPELINE API IS LIVE!")
print("=" * 80)
print(f"üì° Public URL: {public_url}")
print(f"üìù Docs: {public_url}/docs")
print("=" * 80)
print("")
print("‚úÖ Copy this URL and update it in your React frontend!")
print("")
print("Update apiService.js:")
print(f"const API_BASE_URL = '{public_url}';")
print("=" * 80)

In [None]:
# CELL 7: Test API
import requests

try:
    response = requests.get(f"{public_url}/")
    print("‚úÖ API Test Successful!")
    print(f"Response: {response.json()}")
except Exception as e:
    print(f"‚ùå Test failed: {e}")

In [None]:
# CELL 8: Keep Alive (Keep this running!)
print(f"üîÑ Server running at: {public_url}")
print("üí° Keep this cell running to maintain connection")
print("‚ö†Ô∏è Free ngrok sessions timeout after 2 hours")
print("")

try:
    while True:
        time.sleep(60)
        print(".", end="", flush=True)
except KeyboardInterrupt:
    print("\nüõë Server stopped")
    ngrok.kill()