In [None]:
import asyncio
from os import getenv
from typing import Optional

import google.generativeai as genai
import pandas as pd
from dotenv import load_dotenv
from google.api_core import exceptions, retry
from tqdm import tqdm
from yarl import URL

from src.scraper import Indexer, ScrapConfig, scrap

In [2]:
config = ScrapConfig(
    root=URL("https://eduwiki.innopolis.university/index.php/Main_Page"),
    host="eduwiki.innopolis.university",
)

In [None]:
indexer = await scrap(config)

In [4]:
load_dotenv()

GOOGLE_API_KEY = getenv("GOOGLE_API_KEY")

if not GOOGLE_API_KEY:
    raise ValueError("GOOGLE_API_KEY is not set")

In [5]:
genai.configure(api_key=GOOGLE_API_KEY)
gemini_model = genai.GenerativeModel("gemini-1.5-flash")

In [27]:
@retry.Retry(predicate=retry.if_exception_type(exceptions.ResourceExhausted))
def generate_answer(query: str, indexer: Indexer) -> str:
    pages = indexer.search(query, 5)

    if not pages:
        return "I'm sorry, I couldn't find any relevant information."

    prompt = """You are an Eduwiki intellectual assistant at Innopolis University. Your task is to analyze user queries based on the context provided. The context includes fragments of pages and their sources (URLs). You have to:

1.	Read the highlighted context between CONTEXT START and CONTEXT END.
2.	Find relevant information to answer the user's query using only the pages provided.
3.	Provide a concise and clear answer to the query.
4.	If no relevant information is found in the context, respond: “I'm sorry, I couldn't find any relevant information.”
5.	Cite sources using the format "Source(s): "

CONTEXT START"""

    for page in pages:
        prompt += f"\n\nSource: {page[0]}\n\n{page[1]}"

    prompt += f"\n\nCONTEXT END\n\nQuestion: {query}"

    response = gemini_model.generate_content(prompt)

    return response.text



@retry.Retry(predicate=retry.if_exception_type(exceptions.ResourceExhausted))
def judge_answer(
    question: str, answer: str, source: str, generated: str
) -> Optional[bool]:
    prompt = f"""You are a judge model tasked with evaluating whether the generated answer is correct based on the provided data.

Instructions:
	1.	Review the following data:
	•	Question: "{question}"
	•	Original answer: "{answer}"
	•	Original source: "{source}"
	•	Generated answer: "{generated}"
	2.	Evaluate whether the generated answer:
	•	Matches the accuracy and completeness of the original answer.
	•	Correctly cites the source.

Please provide your evaluation as a boolean value (True/False)."""

    response = gemini_model.generate_content(prompt)

    match response.text.strip():
        case "True":
            return True
        case "False":
            return False
        case _:
            print(question, answer, source, generated, response.text)
            return None

In [None]:
# Load the dataset
df = pd.read_csv("qa.csv")

# Add a column for relevance
df["Relevance"] = None

# Iterate over the dataset
for row in tqdm(df.itertuples(), total=df.shape[0], desc="Benchmarking"):
    question, answer, source = row[1:4]

    # Generate and judge answers
    generated = generate_answer(question, indexer)
    relevance = judge_answer(question, answer, source, generated)

    # Save the relevance score
    df.at[row.Index, "Relevance"] = relevance

# Save the results
df.to_csv("qa_result.csv", index=False)