# Structured Q&A

Source code: https://github.com/mozilla-ai/structured-qa

Docs: https://mozilla-ai.github.io/structured-qa

## Installing dependencies

In [None]:
%pip install git+https://github.com/mozilla-ai/structured-qa.git@5-add-benchmark

In [None]:
!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/5-add-benchmark/benchmark/structured_qa.csv

# Run Benchmark

In [None]:
import os
import google.generativeai as genai

os.environ["LOGURU_LEVEL"] = "INFO"
genai.configure(api_key="GEMINI_API_KEY")

In [None]:
from structured_qa.model_loaders import load_gemini_model

## Function to Process a single Document

In [None]:
import json
import time

from loguru import logger


def process_document(
    document_file,
    document_data,
    model,
):
    logger.info("Uploading file")
    file = genai.upload_file(document_file, mime_type="application/pdf")
    while file.state.name == "PROCESSING":
        logger.debug("Waiting for file to be processed.")
        time.sleep(2)
        file = genai.get_file(file.name)

    logger.info("Predicting")
    n = 0
    answers = {}
    sections = {}
    for index, row in document_data.iterrows():
        if n > 0 and n % 9 == 0:
            logger.info("Waiting for 60 seconds")
            time.sleep(60)
        question = row["question"]
        logger.debug(f"Question: {question}")
        messages = [
            {
                "role": "user",
                "parts": [
                    file,
                    question,
                ],
            }
        ]
        response = model.get_response(messages)
        logger.debug(response)
        response_json = json.loads(response)
        answers[index] = response_json["answer"]
        sections[index] = response_json["section"]
        n += 1
    return answers, sections

## Function to Download Document

In [None]:
from pathlib import Path
from urllib.request import urlretrieve


def download_document(url, output_file):
    if not Path(output_file).exists():
        urlretrieve(url, output_file)
        logger.debug(f"Downloaded {url} to {output_file}")
    else:
        logger.debug(f"File {output_file} already exists")

## Load Model

In [None]:
FULL_CONTEXT_PROMPT = """
You are given an input document and a question.
You can only answer the question based on the information in the document.
You will return a JSON name with two keys: "section" and "answer".
In `"section"`, you will return the name of the section where you found the answer.
In `"answer"`, you will return the answer one of the following JSON:
- Yes/No (for boolean questions)
Is the model an LLM?
{
  "section": "1. Introduction",
  "answer": "No"
}
- Single number (for numeric questions)
How many layers does the model have?
{
  "section": "2. Architecture",
  "answer": 12
}
- Single letter (for multiple-choice questions)
What is the activation function used in the model?
-A: ReLU
-B: Sigmoid
-C: Tanh
{
  "section": "2. Architecture",
  "answer": "C"
}
"""

In [None]:
model = load_gemini_model(
    "gemini-2.0-flash-exp",
    system_prompt=FULL_CONTEXT_PROMPT,
    generation_config={
        "response_mime_type": "application/json",
    },
)

# Run Benchmark

In [None]:
import pandas as pd


logger.info("Loading input data")
data = pd.read_csv("structured_qa.csv")
data["pred_answer"] = [None] * len(data)
data["pred_section"] = [None] * len(data)

for document_link, document_data in data.groupby("document"):
    logger.info(f"Downloading document {document_link}")
    downloaded_document = Path(f"{Path(document_link).name}.pdf")
    download_document(document_link, downloaded_document)

    answers, sections = process_document(downloaded_document, document_data, model)

    for index in document_data.index:
        data.loc[index, "pred_answer"] = str(answers[index]).upper()
        data.loc[index, "pred_section"] = sections[index]

data.to_csv("results.csv")

# Results

In [None]:
results = pd.read_csv("results.csv")
results.loc[results["answer"] != results["pred_answer"]]

In [None]:
accuracy = sum(results["answer"] == results["pred_answer"]) / len(results)
accuracy