In [1]:
# Imports 

# Core packages
import os
import tempfile
from dotenv import load_dotenv

# pdf processing
from pdfminer.high_level import extract_text

# vector search
import faiss
import numpy as np

# Text chunking and embedding
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings

# LLM and RAG
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
from langchain.vectorstores import FAISS

# Interface
import gradio as gr

# Token counting
import tiktoken

# loading environment variables from .env file

load_dotenv()
if not os.getenv("OPENAI_API_KEY"):
    raise ValueError("OPENAI_API_KEY is not set in the environment variables")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# PDF extraction and chunking

def extract_pdf_text(file_path: str) -> str:
    """Extract raw text from a PDF file using pdfminer."""
    return extract_text(file_path)

def split_text_to_chunks(text: str, chunk_size=1000, chunk_overlap=200) -> list:
    """Split text into overlapping chunks using LangChain's text splitter."""
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        separators=["\n\n", "\n", ".", " "]
    )
    return splitter.split_text(text)

In [3]:
# sample run
pdf_path = "notes.pdf"
raw_text = extract_pdf_text(pdf_path)
text_chunks = split_text_to_chunks(raw_text)

print(f"Extracted {len(text_chunks)} text chunks from the PDF.")
print("Sample chunk:\n", text_chunks[0][:200])

Extracted 1697 text chunks from the PDF.
Sample chunk:
 Site 
Reliability 
Engineering

HOW GOOGLE RUNS PRODUCTION SYSTEMS

Edited by Betsy Beyer, Chris Jones,  
Jennifer Petoff & Niall Richard Murphy

Praise for Site Reliability Engineering

Google‚Äôs SR


In [4]:
# Embedding + FAISS Index creation

from langchain_community.vectorstores import FAISS

def create_faiss_index(chunks: list, model_name: str = "gpt-4o-mini") -> FAISS:
    """
    Generate embeddings for text chunks using a specific OpenAI embedding model,
    then store them in a FAISS vector index.
    
    Default is 'text-embedding-3-small' (gpt-4o-mini embeddings).
    """
    embedding_model = OpenAIEmbeddings(model=model_name)
    vectorstore = FAISS.from_texts(text_chunks, embedding_model)
    return vectorstore

In [6]:
# Sample run
# Creating a model instance
embedding_model = OpenAIEmbeddings()

# Use gpt-4o-mini's embedding model (text-embedding-3-small)
vectorstore = create_faiss_index(text_chunks, model_name="text-embedding-3-large")  # small - 1536 vs large - 3072 dimensions


# Save for reuse
vectorstore.save_local("faiss_index_store")


In [7]:
# Retrieve Top-k Chunks and Build RAG Prompt

def retrieve_relevant_chunks(vectorstore, query: str, k: int = 4) -> list:
    """Return top-k relevant text chunks for the query."""
    docs = vectorstore.similarity_search(query, k=k)
    return [doc.page_content for doc in docs]

def build_rag_prompt(query: str, context_chunks: list) -> str:
    """Format retrieved chunks and user query into a prompt for LLM."""
    context_text = "\n\n---\n\n".join(context_chunks)
    prompt = f"""You are an expert assistant. Use the following context from a document to answer the user's question. If unsure, say so.

Context:
{context_text}

Question:
{query}

Answer:"""
    return prompt


In [11]:
# Testing
# Step 1: Get top 4 chunks for the query
query = "Explain error budgets in SRE."
top_chunks = retrieve_relevant_chunks(vectorstore, query, k=4) # Adjust k for context length

# Step 2: Build prompt for LLM
prompt = build_rag_prompt(query, top_chunks)

print(prompt[:1000])  # Preview prompt


You are an expert assistant. Use the following context from a document to answer the user's question. If unsure, say so.

Context:
Error Budgets
Balance reliability and the pace of innovation with error budgets (see ‚ÄúMotivation for
Error Budgets‚Äù on page 33), which define the acceptable level of failure for a service,
over some period; we often use a month. A budget is simply 1 minus a service‚Äôs SLO;
for  instance,  a  service  with  a  99.99%  availability  target  has  a  0.01%  ‚Äúbudget‚Äù  for
unavailability.  As  long  as  the  service  hasn‚Äôt  spent  its  error  budget  for  the  month
through the background rate of errors plus any downtime, the development team is
free (within reason) to launch new features, updates, and so on.

---

Forming Your Error Budget
In order to base these decisions on objective data, the two teams jointly define a quar‚Äê
terly  error  budget  based  on  the  service‚Äôs  service  level  objective,  or  SLO  (see  Chap‚Äê
ter  4).  The  error 

In [12]:
# Call OpenAI LLM with the formatted RAG prompt

def get_llm_response(prompt: str, model_name: str = "gpt-4o-mini", temperature: float = 0.2) -> str:
    """
    Sends the RAG prompt to the specified OpenAI Chat model and returns the response.

    Parameters:
    - model_name: e.g., "gpt-3.5-turbo", "gpt-4", "gpt-4o"
    - temperature: controls randomness (0.0 = deterministic)

    Returns:
    - Response text from LLM
    """
    llm = ChatOpenAI(model=model_name, temperature=temperature)
    response = llm([HumanMessage(content=prompt)])
    return response.content


In [13]:
# Sample usage
rag_response = get_llm_response(prompt, model_name="gpt-4o")
print("Answer:\n", rag_response)

Answer:
 Error budgets in Site Reliability Engineering (SRE) are a tool used to balance the reliability of a service with the pace of innovation. They define the acceptable level of failure for a service over a certain period, often a month or a quarter. An error budget is calculated as 1 minus the service's Service Level Objective (SLO). For example, if a service has a 99.99% availability target, it has a 0.01% error budget for unavailability.

The error budget provides a clear, objective metric that determines how unreliable the service is allowed to be within a given period. This metric helps remove the politics from negotiations between SREs and product developers when deciding how much risk to allow. As long as the service hasn't exhausted its error budget, the development team is free to launch new features and updates.

The benefits of an error budget include providing a common incentive for both product development and SRE to find the right balance between innovation and reliab

In [14]:
# Gradio Interface for PDF QA Bot

import gradio as gr
from pathlib import Path

# Globals to cache state
VECTORSTORE = None
CHUNKS = []
PDF_NAME = None

def process_pdf(file_obj, embedding_model: str = "text-embedding-3-small"):
    """Extracts text, creates chunks, and builds FAISS index from uploaded PDF."""
    global VECTORSTORE, CHUNKS, PDF_NAME

    if not file_obj:
        return "‚ùó No file provided."

    file_path = file_obj.name  # Gradio passes NamedString (with .name = path)
    PDF_NAME = Path(file_path).stem

    text = extract_pdf_text(file_path)
    CHUNKS = split_text_to_chunks(text)
    VECTORSTORE = create_faiss_index(CHUNKS, model_name=embedding_model)

    return f"‚úÖ Processed {len(CHUNKS)} chunks from: {PDF_NAME}"

def handle_question(question: str, model: str = "gpt-4o"):
    """Handles the user query after PDF is processed."""
    if VECTORSTORE is None:
        return "‚ùó Please upload and process a PDF first."
    
    relevant = retrieve_relevant_chunks(VECTORSTORE, question, k=4)
    prompt = build_rag_prompt(question, relevant)
    answer = get_llm_response(prompt, model_name=model)
    return answer

# Gradio UI
with gr.Blocks() as server:
    gr.Markdown("## üìÑ RAG-based PDF QA Bot (OpenAI + FAISS)")

    with gr.Row():
        pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        embedding_model = gr.Textbox(label="Embedding Model", value="text-embedding-3-small")
        process_btn = gr.Button("üìö Process PDF")

    status = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        question = gr.Textbox(label="Ask a question")
        model_choice = gr.Dropdown(choices=["gpt-4o", "gpt-4", "gpt-3.5-turbo"], value="gpt-4o", label="LLM Model")
        ask_btn = gr.Button("üîç Get Answer")

    answer_output = gr.Textbox(label="Answer", lines=8)

    # Button actions
    process_btn.click(process_pdf, inputs=[pdf_input, embedding_model], outputs=status)
    ask_btn.click(handle_question, inputs=[question, model_choice], outputs=answer_output)

# Launch the app
server.launch()


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


