<a href="https://colab.research.google.com/github/sainiakhil/FastAPI-Powered-RAG-Pipeline-for-Web-and-Document-Data/blob/main/Scrapper_RAG_With_FASTAPI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install fastapi uvicorn bs4 requests transformers pyngrok python-multipart chromadb pydantic PyPDF2 torch sentence-transformers accelerate jinja2 nest-asyncio


In [None]:
import os
import torch
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import requests
from bs4 import BeautifulSoup
from pyngrok import ngrok
import PyPDF2
import chromadb
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
import uvicorn
import nest_asyncio
from fastapi.templating import Jinja2Templates
from fastapi.responses import HTMLResponse
from fastapi import Request
from io import BytesIO

In [None]:
# Set your ngrok authentication token
ngrok.set_auth_token("2m3INDfD7mYEcHw8VB1STZuofFc_UBjx4mHCjjpu5iZbbxgN")



In [None]:
# Initialize Qwen model for chat
qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct")
qwen_model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2.5-3B-Instruct",
    device_map="auto",
    torch_dtype=torch.float16
)

In [None]:
# Initialize Sentence Transformer for embeddings
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
app = FastAPI(title="Content Processing and Chat Service")


# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize Chroma client for vector storage
chroma_client = chromadb.PersistentClient(path="./chroma_storage")
collection = chroma_client.get_or_create_collection(name="content_collection")

# Add templates directory
templates = Jinja2Templates(directory="templates")



class URLProcessRequest(BaseModel):
    url: str

class ChatRequest(BaseModel):
    query: str
    source_id: str

def get_embedding(text):
    """Generate embeddings using Sentence Transformers"""
    return embedding_model.encode(text).tolist()

def chunk_text(text, max_tokens=500):
    """Chunk text into manageable pieces"""
    tokens = qwen_tokenizer.encode(text)
    chunks = []

    for i in range(0, len(tokens), max_tokens):
        chunk = tokens[i:i + max_tokens]
        chunks.append(qwen_tokenizer.decode(chunk))

    return chunks

@app.get("/", response_class=HTMLResponse)
async def serve_ui(request: Request):
    return templates.TemplateResponse("index.html", {"request": request})

@app.post("/process-url")
async def process_url(request: URLProcessRequest):
    try:
        # Fetch webpage content
        response = requests.get(request.url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract main content (you might want to improve this)
        text = soup.get_text()

        # Generate a unique ID for this source
        source_id = f"url_{hash(request.url)}"

        # Chunk and embed the text
        text_chunks = chunk_text(text)

        # Store chunks in vector database
        for i, chunk in enumerate(text_chunks):
            embedding = get_embedding(chunk)
            collection.add(
                embeddings=[embedding],
                documents=[chunk],
                ids=[f"{source_id}_chunk_{i}"]
            )

        return {"source_id": source_id, "status": "Processed successfully"}

    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))


@app.post("/process-pdf")
async def process_pdf(file: UploadFile = File(...)):
    try:
        # Read the uploaded file as bytes
        pdf_content = await file.read()

        # Wrap the bytes content in a BytesIO object
        pdf_file = BytesIO(pdf_content)

        # Use PyPDF2 to read the PDF
        pdf_reader = PyPDF2.PdfReader(pdf_file)

        # Extract text from all pages
        full_text = ""
        for page in pdf_reader.pages:
            full_text += page.extract_text()

        # Generate a unique ID for this source
        source_id = f"pdf_{hash(file.filename)}"

        # Chunk and embed the text
        text_chunks = chunk_text(full_text)

        # Store chunks in the vector database
        for i, chunk in enumerate(text_chunks):
            embedding = get_embedding(chunk)
            collection.add(
                embeddings=[embedding],
                documents=[chunk],
                ids=[f"{source_id}_chunk_{i}"]
            )

        return {"source_id": source_id, "status": "Processed successfully"}

    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))


@app.post("/chat")
async def chat_with_content(request: ChatRequest):
    try:
        # Get query embedding
        query_embedding = get_embedding(request.query)

        # Perform similarity search
        results = collection.query(
            query_embeddings=[query_embedding],
            n_results=2 # Top 2 most relevant chunks

        )

         # Check if results are empty
        if not results or not results.get('documents'):
            raise HTTPException(status_code=404, detail="No relevant context found for the given source ID.")


        # Combine relevant chunks for context
        context = " ".join(results['documents'][0])

        # Prepare the prompt for Qwen model
        prompt = f"""Context: {context}

Question: {request.query}

Please provide a helpful and concise answer based on the given context.\n\n
FINAL ANSWER:-->>
 """

        # Tokenize the prompt
        inputs = qwen_tokenizer(prompt, return_tensors="pt").to(qwen_model.device)

        # Generate response
        outputs = qwen_model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )

        # Decode the response
        response = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)

        return {
            "response": response,
            "context_used": context
        }

    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

In [None]:
ngrok_tunnel = ngrok.connect(8000)
print(f"Public URL: {ngrok_tunnel.public_url}")


Public URL: https://ffde-35-198-250-126.ngrok-free.app


In [None]:
nest_asyncio.apply()
uvicorn.run(app, port=8000)

INFO:     Started server process [185]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     43.224.1.210:0 - "GET / HTTP/1.1" 200 OK
INFO:     43.224.1.210:0 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     43.224.1.210:0 - "POST /process-url HTTP/1.1" 200 OK
INFO:     43.224.1.210:0 - "POST /chat HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [185]


------------------------------------------------------------------------------------------------------------------------------------------------
