# Structured Q&A

Source code: https://github.com/mozilla-ai/structured-qa

Docs: https://mozilla-ai.github.io/structured-qa

## Installing dependencies

In [None]:
%pip install --quiet structured-qa

In [None]:
!wget https://raw.githubusercontent.com/mozilla-ai/structured-qa/refs/heads/main/benchmark/structured_qa.csv

# Setup

In [3]:
import os
import google.generativeai as genai
from google.colab.userdata import get, SecretNotFoundError

try:
    genai.configure(api_key=get("GOOGLE_API_KEY"))
except SecretNotFoundError as e:
    raise RuntimeError("Please set the GOOGLE_API_KEY secret to your API key") from e
os.environ["LOGURU_LEVEL"] = "INFO"

In [4]:
from loguru import logger

## Function to Process a single Document

In [5]:
from structured_qa.preprocessing import document_to_sections_dir
from structured_qa.workflow import find_retrieve_answer


FIND_PROMPT = """
You are given two pieces of information:
1. A list of valid section names.
2. A user question.

Your task is to:
- Identify exactly one `section_name` from the provided list that seems related to the user question.
- Return the `section_name` exactly as it appears in the list.
- Do NOT answer the question.
- Do NOT return any additional text, explanation, or formatting.
- Do NOT combine multiple section names into a single response.

Here is the list of valid section names:

```
{SECTIONS}
```

Now, based on the following question, return the single most relevant `section_name` from the list.
"""

ANSWER_WITH_TYPE_PROMPT = """
You are a rigorous assistant answering questions.
You must only answer based on the current information available which is:

```
{CURRENT_INFO}
```

If the current information available not enough to answer the question,
you must return "I need more info" srting and nothing else:

If the current information is enough to answer, you must return one of the following formats:
- YES/NO (for boolean questions)
- Number (for numeric questions)
- Single letter (for multiple-choice questions)
"""


def process_document(document_file, document_data, model):
    sections_dir = Path("sections") / Path(document_file).stem
    if not sections_dir.exists():
        logger.info("Splitting document into sections")
        document_to_sections_dir(document_file, sections_dir)

    logger.info("Predicting")
    answers = {}
    sections = {}
    for index, row in document_data.iterrows():
        question = row["question"]
        logger.info(f"Question: {question}")
        answer, sections_checked = find_retrieve_answer(
            question, model, sections_dir, FIND_PROMPT, ANSWER_WITH_TYPE_PROMPT
        )
        logger.info(f"Answer: {answer}")
        answers[index] = answer
        sections[index] = sections_checked[-1] if sections_checked else None

    return answers, sections

## Load Model

In [6]:
from structured_qa.model_loaders import load_gemini_model

In [7]:
model = load_gemini_model("gemini-2.0-flash-exp", system_prompt=None)

# Run Benchmark

In [None]:
from pathlib import Path
from urllib.request import urlretrieve

import pandas as pd

logger.info("Loading input data")
data = pd.read_csv("structured_qa.csv")
data["pred_answer"] = [None] * len(data)
data["pred_section"] = [None] * len(data)

for document_link, document_data in data.groupby("document"):
    logger.info(f"Downloading document {document_link}")
    downloaded_document = Path(f"{Path(document_link).name}.pdf")
    if not Path(downloaded_document).exists():
        urlretrieve(document_link, downloaded_document)
        logger.info(f"Downloaded {document_link} to {downloaded_document}")
    else:
        logger.info(f"File {downloaded_document} already exists")

    answers, sections = process_document(downloaded_document, document_data, model)

    for index in document_data.index:
        data.loc[index, "pred_answer"] = str(answers[index]).upper()
        data.loc[index, "pred_section"] = sections[index]

data.to_csv("results.csv")

In [9]:
results = pd.read_csv("results.csv")
for index, result in results.iterrows():
    results.loc[index, "pred_answer"] = result["pred_answer"].strip()
    if result["pred_answer"].startswith(
        (f"-{result['answer']}", f"{result['answer']}")
    ):
        results.loc[index, "pred_answer"] = result["answer"]
results.loc[results["answer"] != results["pred_answer"]]

Unnamed: 0.1,Unnamed: 0,document,section,question,answer,pred_answer,pred_section
12,12,https://arxiv.org/pdf/2210.05189,3 Experimental Results,How many layers are in the toy model (y = x^2)?,3,NOT FOUND,Caglar Aytekin AI Lead AAC Technologies
28,28,https://arxiv.org/pdf/2201.11903,3.1 Experimental Setup,How many large language models were evaluated?,5,3,Abstract
32,32,https://arxiv.org/pdf/2201.11903,5 Symbolic Reasoning,Which symbolic reasoning task is used as an ou...,A,YES,5 Symbolic Reasoning
33,33,https://arxiv.org/pdf/2201.11903,3.4 Robustness of Chain of Thought,How many annotators provided independent chain...,3,2,H Appendix: Alternate Annotators for MWP
34,34,https://arxiv.org/pdf/2201.11903,3.2 Results,How many random samples were examined to under...,100,NOT FOUND,Chain-of-Thought Prompting Elicits Reasoning i...
45,45,https://github.com/mozilla-ai/structured-qa/re...,CARD AND TILE EFFECTS,Which type of cards provide coins? -A: Gray -B...,B,NOT FOUND,4
48,48,https://github.com/mozilla-ai/structured-qa/re...,END OF THE GAME,Can the game end in a tie?,YES,NO,OVERVIEW AND GOAL
50,50,https://github.com/mozilla-ai/structured-qa/re...,LOOKOUT PHASE,What is the maximum number of cards a player m...,4,NOT FOUND,1 3 3 5 7 8
51,51,https://github.com/mozilla-ai/structured-qa/re...,LOOKOUT PHASE,Is there a limit to the number of cards a play...,NO,YES,CLEANUP PHASE players with extra Goods if enou...
54,54,https://github.com/mozilla-ai/structured-qa/re...,EXPEDITION PHASE,Can players conquer and pillage the same islan...,NO,YES,EXPEDITION PHASE


In [10]:
accuracy = sum(results["answer"] == results["pred_answer"]) / len(results)
accuracy

0.8640776699029126