In [2]:
import os
import glob
import re
import PyPDF2
import numpy as np
import pandas as pd
from typing import Any
from google.genai import types
from google import genai as gclint
import google.generativeai as genai
from sklearn.metrics.pairwise import cosine_similarity
from tenacity import retry, stop_after_attempt, wait_random_exponential
from langchain_text_splitters import RecursiveCharacterTextSplitter
import time
import asyncio
import nest_asyncio
nest_asyncio.apply()

In [3]:
client = gclint.Client(api_key=os.getenv("GOOGLE_API_KEY"))
MODEL_ID = "gemini-2.0-flash" 
text_embedding_model = "text-embedding-004"
documents = glob.glob("server_room/*")

In [4]:
generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    "top_k": 40,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
    model_name="gemini-2.0-flash",
    generation_config=generation_config,
)

In [5]:
async def async_generate_response(prompt):
    chat_session = model.start_chat(history=[])
    response = await asyncio.to_thread(chat_session.send_message, prompt)
    return response.text

In [6]:
@retry(wait=wait_random_exponential(multiplier=1, max=120), stop=stop_after_attempt(4))
def get_embeddings(
    embedding_client: Any, embedding_model: str, text: str, output_dim: int = 768
) -> list[float]:
    try:
        response = embedding_client.models.embed_content(
            model=embedding_model,
            contents=[text],
            config=types.EmbedContentConfig(output_dimensionality=output_dim),
        )
        return [response.embeddings[0].values]
    except Exception as e:
        if "RESOURCE_EXHAUSTED" in str(e):
            return None
        print(f"Error generating embeddings: {str(e)}")
        raise

In [7]:
def build_index(
    document_paths: list[str],
    embedding_client: Any,
    embedding_model: str,
    chunk_size: int = 1500,
    chunk_overlap: int = 200,
    separators: list[str] = [
        "\n\nḤadīth No.", "\n\n", "\n", ". ", ", ", " ", ""],
    output_dim: int = 768,
    sleep_between_embeddings: float = 1.0 # Seconds to wait (adjust as needed)
) -> pd.DataFrame:
    """
    Builds an index from PDF documents using RecursiveCharacterTextSplitter,
    INCLUDING DELAYS to manage API quotas.
    """
    all_chunks_data = []
    print(f"Using chunk_size={chunk_size}, chunk_overlap={chunk_overlap}")
    print(f"Waiting {sleep_between_embeddings} seconds between embedding calls.")

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        separators=separators
    )

    total_chunks_processed = 0
    total_embeddings_failed = 0

    for doc_path in document_paths:
        print(f"Processing document: {doc_path}")
        try:
            with open(doc_path, "rb") as file:
                pdf_reader = PyPDF2.PdfReader(file)
                doc_name = os.path.basename(doc_path)

                for page_num in range(len(pdf_reader.pages)):
                    page = pdf_reader.pages[page_num]
                    try:
                         page_text = page.extract_text()
                         if not page_text:
                             continue
                    except Exception as page_err:
                        print(f"  Error extracting text from page {page_num + 1}: {page_err}")
                        continue

                    chunks = text_splitter.split_text(page_text)
                    # print(f"  Page {page_num + 1}: Found {len(chunks)} chunks.") # Less verbose

                    for chunk_num, chunk_text in enumerate(chunks):
                        total_chunks_processed += 1
                        if len(chunk_text.strip()) < 20:
                             continue
                        time.sleep(sleep_between_embeddings)

                        embeddings = get_embeddings(
                            embedding_client, embedding_model, chunk_text, output_dim=output_dim
                        )

                        if embeddings is None:
                            total_embeddings_failed += 1
                            print(
                                f"  Warning: Embedding failed for chunk {chunk_num} on page {page_num + 1} (likely quota)"
                            )
                            # Optionally add a longer sleep after a failure
                            # time.sleep(5)
                            continue # Skip this chunk

                        chunk_info = {
                            "document_name": doc_name,
                            "page_number": page_num + 1,
                            "chunk_number": chunk_num,
                            "chunk_text": chunk_text,
                            "embeddings": embeddings,
                        }
                        all_chunks_data.append(chunk_info)

                        # Optional: Print progress less frequently
                        if total_chunks_processed % 50 == 0:
                            print(f"  Processed {total_chunks_processed} chunks...")


        except Exception as e:
            print(f"Error processing document {doc_path}: {str(e)}")
            continue

    if not all_chunks_data:
        raise ValueError("No chunks were successfully created and embedded. Check API Keys, Quotas, and PDF files.")

    print(f"\nFinished processing.")
    print(f"Total chunks attempted: {total_chunks_processed}")
    print(f"Total chunks successfully embedded: {len(all_chunks_data)}")
    print(f"Total embedding failures (skipped): {total_embeddings_failed}")
    return pd.DataFrame(all_chunks_data)


In [8]:
if os.path.exists("vector_db_mini_vertex.csv"):
    print("vector_db_mini_vertex.csv already exists")
    vector_db_mini_vertex = pd.read_csv("vector_db_mini_vertex.csv")
else:
    print("Building vector_db_mini_vertex.csv...")
    vector_db_mini_vertex = build_index(
        documents,
        embedding_client=client,
        embedding_model=text_embedding_model,
        chunk_size=1500,
        chunk_overlap=200
    )

Building vector_db_mini_vertex.csv...
Using chunk_size=1500, chunk_overlap=200
Waiting 1.0 seconds between embedding calls.
Processing document: server_room/Server_rooms.pdf

Finished processing.
Total chunks attempted: 15
Total chunks successfully embedded: 15
Total embedding failures (skipped): 0


In [9]:
vector_db_mini_vertex.tail(5)

Unnamed: 0,document_name,page_number,chunk_number,chunk_text,embeddings
10,Server_rooms.pdf,6,0,"lines)\n \nto\n \nthe\n \ncentral\n \n""Server""...","[[0.10008587, 0.008953217, -0.0647227, -0.0159..."
11,Server_rooms.pdf,6,1,■\n \nOn\n \nthe\n \nServer\n \nLoad\n \nDispl...,"[[0.10569678, 0.00500749, -0.065157756, 0.0017..."
12,Server_rooms.pdf,7,0,Arrow\n \ntwice\n \nto\n \ndecrease\n \nthe\n ...,"[[0.105651915, 0.014186258, -0.033513512, 0.01..."
13,Server_rooms.pdf,7,1,4.\n \nIncrease\n \nTraffic\n \nVolume\n \n(Ma...,"[[0.07891798, -0.008526407, -0.050584637, 0.00..."
14,Server_rooms.pdf,8,0,5.\n \nFailur e\n \nOutcome\n:\n \n○\n \nThe\n...,"[[0.066506274, 0.013523649, -0.040942088, -0.0..."


In [10]:
def get_relevant_chunks(
    query: str,
    vector_db: pd.DataFrame,
    embedding_client: Any,
    embedding_model: str,
    top_k: int = 3,
) -> str:
    try:
        query_embedding = get_embeddings(embedding_client, embedding_model, query)

        if query_embedding is None:
            return "Could not process query due to quota issues"

        similarities = [
            cosine_similarity(query_embedding, chunk_emb)[0][0]
            for chunk_emb in vector_db["embeddings"]
        ]

        top_indices = np.argsort(similarities)[-top_k:]
        relevant_chunks = vector_db.iloc[top_indices]

        context = []
        for _, row in relevant_chunks.iterrows():
            context.append(
                {
                    "document_name": row["document_name"],
                    "page_number": row["page_number"],
                    "chunk_number": row["chunk_number"],
                    "chunk_text": row["chunk_text"],
                }
            )

        return "\n\n".join(
            [
                f"[Page {chunk['page_number']}, Chunk {chunk['chunk_number']}]: {chunk['chunk_text']}"
                for chunk in context
            ]
        )

    except Exception as e:
        print(f"Error getting relevant chunks: {str(e)}")
        return "Error retrieving relevant chunks"

In [11]:
@retry(wait=wait_random_exponential(multiplier=1, max=120), stop=stop_after_attempt(4))
async def generate_answer(
    query: str, context: str
) -> str:
    """
    Generates an answer to the query based on the provided context using an LLM.
    Includes specific instructions for explaining Hadith.
    """
    try:
        if context in [
            "Could not process query due to quota issues",
            "Error retrieving relevant chunks",
        ]:
            return "Could not generate answer because context retrieval failed or hit quota limits."

        prompt = f"""You are an assistant knowledgeable about Imam Nawawi's 40 Hadith.
            Your task is to answer the user's question based *only* on the provided context below, which is extracted from the book "The 40 Hadith of Al-Imam An-Nawawi".

            Instructions:
            1. Read the context carefully.
            2. If the question asks to explain a specific Hadith (e.g., "explain Hadith No. 1"), synthesize the information from the context that pertains *only* to that specific Hadith number and its explanation/commentary.
            3. Provide a clear and concise explanation based *strictly* on the given text.
            4. Do *not* add any information, commentary, opinions, or interpretations that are not present in the provided context.
            5. If the provided context does *not* contain the information needed to answer the question (e.g., the requested Hadith number is not found in the context), explicitly state that the information is not available in the provided context. Do not attempt to guess or retrieve information from external knowledge.
            6. Structure your answer clearly.

            Context:
            ---
            {context}
            ---

            Question: {query}

            Answer:"""
        response = await async_generate_response(prompt)
        return response

    except types.StopCandidateException as stop_ex:
        print(f"Generation stopped: {stop_ex}")
        return "The generation process was stopped, possibly due to safety settings or other limits."
    except Exception as e:
        if "RESOURCE_EXHAUSTED" in str(e) or (hasattr(e, 'message') and "RESOURCE_EXHAUSTED" in e.message):
            print(f"Quota Error during generation: {e}")
            return "Could not generate answer due to API quota limits."
        elif "response.text" in str(e): # Example check if response structure is bad
            print(f"API Response Error: {e}. The response object might be malformed.")
            return "Error processing the response from the language model."
        else:
            print(f"Error generating answer: {str(e)}")
            return f"An unexpected error occurred while generating the answer: {str(e)}"

In [12]:
def transform_query(query: str) -> str:
    """
    Transforms natural language numbers (first, second, etc.) in a query
    to standard "Hadith No. X" format for better retrieval matching.
    """
    query_lower = query.lower()
    
    replacements = {
        "server_room": "server room",
        "server_room_1": "server room 1",
        "server_room_2": "server room 2",
        "server_room_3": "server room 3",
        "server_room_4": "server room 4",
        "server_room_5": "server room 5",
    }
    transformed_query = query # Start with original
    for key, value in replacements.items():
        if key in query_lower:
            transformed_query = re.sub(r'(?i)' + re.escape(key), value, transformed_query, count=1)
            break

    return transformed_query

In [13]:
async def rag(
    question: str,
    vector_db: pd.DataFrame,
    embedding_client: Any,
    embedding_model: str,
    top_k: int,
) -> tuple[str | None, str | None]: # Return answer and source page/chunk info

    """
    Performs Retrieval-Augmented Generation.
    Transforms the query, retrieves relevant chunks, and generates an answer.
    Includes improved error handling for context retrieval.
    """
    generated_answer = "Error: Processing failed."
    info_source = "N/A"
    relevant_context = None # Initialize context

    try:
        print(f"Original question: {question}")
        transformed_question = transform_query(question)
        print(f"Transformed question for retrieval: {transformed_question}")

        relevant_context = get_relevant_chunks(
            transformed_question, vector_db, embedding_client, embedding_model, top_k=top_k
        )
        print(f"Retrieved context snippet: '{str(relevant_context)[:200]}...'") # Log retrieved context

        if not relevant_context or relevant_context in [
             "Could not process query due to quota issues",
             "Error retrieving relevant chunks"
        ]:
             print(f"Context retrieval failed or returned error: {relevant_context}")
             generated_answer = "Could not generate answer because context retrieval failed (possibly due to quota or no relevant chunks found in the incomplete index)."
             info_source = "Context Retrieval Failed"
             return generated_answer, info_source # Exit early
        try:
            first_line = relevant_context.split('\n\n')[0]
            if ':' in first_line and '[' in first_line and ']' in first_line:
                 info_source = first_line.split(':', 1)[0].strip('[] ') # Gets "Page X, Chunk Y"
            else:
                 info_source = "Retrieved Context (Format Unknown)"
                 print(f"Warning: Could not parse standard source format from context header: '{first_line}'")
        except Exception as context_parse_err:
            print(f"Could not parse source info from context header: {context_parse_err}")
            info_source = "Source Parsing Error"

        generated_answer = await generate_answer(
            question, relevant_context
        )
        if "quota" in generated_answer.lower():
             info_source = "Generation Failed (Quota)"

        return generated_answer, info_source

    except Exception as e:
        print(f"Error processing question '{question}': {str(e)}")
        error_context_info = f"(Context snippet before error: '{str(relevant_context)[:100]}...')" if relevant_context else "(No context retrieved before error)"
        generated_answer = f"Error processing question: {str(e)} {error_context_info}"
        return generated_answer, info_source # Return defaults or error info


In [14]:
question_set = [
    {
        "question": "I am at Server Room 1, what should I do?",
        "answer": "",
    },
    {
        "question": "I am at Server Room 2, how can is somplete this rooms task?",
        "answer": "",
    },
    {
        "question": "I am at Server Room 2, what should I do?",
        "answer": "",
    },
    {
        "question": "I am at Server Room 1, how can i succees here?",
        "answer": "",
    },
    {
        "question": "I am at Server Room 2, how can i succees here?",
        "answer": "",
    },
    {
        "question": "How many users can I afford to add before the budget gets too low for the other settings?",
        "answer": "",
    },
    {
        "question": "What happens to the server load if I increase the request frequency by one level?",
        "answer": "",
    },
    {
        "question": "How can I tell if I'm about to exceed the budget before finalizing my settings?",
        "answer": "",
    },
    {
        "question": "What does the pie chart on the Server Load Display tell me about my current setup?",
        "answer": "",
    },
    {
        "question": "If I accidentally overspend, can I undo user additions to recover budget?",
        "answer": "",
    },
    {
        "question": "How do I match the Request Frequency and Traffic Volume sliders to exactly level 9?",
        "answer": "",
    },
]

In [15]:
for question in question_set:
    print(f"Question: {question['question']}")
    answer, info_source = await rag(
        question["question"],
        vector_db_mini_vertex,
        client,
        text_embedding_model,
        top_k=3
    )
    question["answer"] = answer
    print(f"Answer: {answer}")
    print(f"Source Info: {info_source}")
    print("-" * 80)

Question: I am at Server Room 1, what should I do?
Original question: I am at Server Room 1, what should I do?
Transformed question for retrieval: I am at Server Room 1, what should I do?
Retrieved context snippet: '[Page 4, Chunk 0]: ○
 
Starting
 
Budget:
 
$25
 
○
 
Budget
 
Needed
 
for
 
Win
 
(9,9,9):
 
$23
 
○
 
Budget
 
Management:
 
Setting
 
the
 
IP
 
Distribution
 
value
 
is
 
the
 
main
 
cost.
 
Ex...'
Answer: In Server Room 1, your goal is to successfully overload the server using the three control panels within a 5-minute time limit without exceeding the $25 budget. To win, set the IP Distribution to 9 users (by clicking the Right Arrow exactly 7 times, starting from 2), the Request Frequency to 9, and the Traffic Volume to 9. Then, verify that the budget reads $2, and the Server Load Display turns solid red.

Source Info: Page 4, Chunk 0
--------------------------------------------------------------------------------
Question: I am at Server Room 2, how can is somplet