For each QCM question, we have ONE response and need to generate 3 other valid (but false) answers from the same category, which we will call distractors

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
import os
import sys

In [3]:
project_root = Path.cwd().parent
sys.path.insert(0, str(project_root / "src"))

from qcmgen.qcm import generate_payloads, _normalize_answer, build_choices_with_arasaac, build_choices, QuestionType
from qcmgen.nlp import extract_facts

## Strategy with no LLM call

When we're not using a LLM to generate the answers, we extract the relevant information from the *generate_payloads* function. 

For each generated payload, we can generate a question, get the correct answer, and create the choices (with the 3 other distractors) via the *build_choices_with_arasaac* function.

In [4]:
facts = extract_facts("Le chat noir de Victor dort sur le canapé.")

for fact in facts:
    payloads = generate_payloads(fact)
    for payload in payloads:
        # generate question text
        question = payload.template.format(**payload.template_vars) #remplir les variables du template (ex : "Que {verb} {subj} ?".format(verb="voir", subj="Martin") => "Que voir Martin ?") 
        correct = _normalize_answer(payload.correct)
        if payload.qtype in (QuestionType.OBJECT, QuestionType.ADJ_NOUN):
            choices, answer_index = build_choices_with_arasaac(correct, k=3)
        else:
            choices, answer_index = build_choices(correct, payload.pool_name, k=3) 


        print("Question:", question)
        print("Correct answer:", correct)
        print("Choices:", choices[0])

        break 

Question: Quel animal est noir ?
Correct answer: chat
Choices: chaton


However, this logic is not robust and requires a lot of hardcoded rules, so it works well in a controled environment where questions all share the same structure, but doesn't extend well to more general type questions

In [5]:
facts = extract_facts("Jeremie et son frère vont au supermarché acheter des pommes")

for fact in facts:
    payloads = generate_payloads(fact)

    if len(payloads) == 0:
        print("No payloads could be generated for this fact")
        continue

    for payload in payloads:
        # generate question text
        question = payload.template.format(**payload.template_vars) #remplir les variables du template (ex : "Que {verb} {subj} ?".format(verb="voir", subj="Martin") => "Que voir Martin ?") 
        correct = _normalize_answer(payload.correct)
        if payload.qtype in (QuestionType.OBJECT, QuestionType.ADJ_NOUN):
            choices, answer_index = build_choices_with_arasaac(correct, k=3)
        else:
            choices, answer_index = build_choices(correct, payload.pool_name, k=3)        

        print("Question:", question)
        print("Correct answer:", correct)
        print("Choices:", choices[0])

        break

No payloads could be generated for this fact


## Let's make a more robust version

#### We start by setting a list of categories

For each of these categories, we'll call the arasaac api to generate a list of distractors which we will put in the cache, to avoid costly api calls later

In [6]:
CATEGORIES = {
    "animal": [
        "chat", "chien", "cheval", "vache", "mouton", "cochon",
        "lapin", "souris", "oiseau", "poisson", "lion", "éléphant"
    ],
    "food": [
        "pomme", "banane", "poire", "pain", "fromage", "gâteau",
        "riz", "pâtes", "soupe", "chocolat", "yaourt"
    ],
    "drink": [
        "eau", "lait", "jus", "café", "thé", "chocolat chaud"
    ],
    "furniture": [
        "table", "chaise", "lit", "canapé", "armoire", "bureau", "étagère"
    ],
    "clothing": [
        "pantalon", "tee-shirt", "robe", "pull", "manteau",
        "chaussures", "chaussettes", "chapeau"
    ],
    "profession": [
        "médecin", "infirmier", "enseignant", "policier",
        "pompier", "boulanger", "cuisinier", "chauffeur"
    ],
    "sport": [
        "football", "basket", "tennis", "natation",
        "vélo", "course", "judo", "gymnastique"
    ],
    "musical_instrument": [
        "piano", "guitare", "violon", "tambour", "trompette", "flûte"
    ],
    "color": [
        "rouge", "bleu", "vert", "jaune", "rose",
        "noir", "blanc", "gris", "orange"
    ],
    "shape": [
        "rond", "carré", "triangle", "rectangle", "étoile", "cœur"
    ],
    "weather": [
        "soleil", "pluie", "neige", "vent", "nuage", "orage"
    ],
    "emotion": [
        "content", "triste", "en colère", "peur", "surpris", "fatigué"
    ],
    "body_part": [
        "tête", "main", "pied", "bras", "jambe", "œil", "bouche", "nez"
    ],
    "vehicle": [
        "voiture", "vélo", "bus", "camion", "train", "moto", "avion"
    ],
    "place": [
        "maison", "école", "parc", "hôpital", "magasin", "piscine", "rue"
    ],
}


In [15]:
from qcmgen.pictos.resolve import resolve_term_to_picto_strict
from tqdm import tqdm
import json

arasaac_cache_path = project_root / "data" / "arasaac_cache_fr.json"

with open(arasaac_cache_path) as f:
    cached_pictos = json.load(f)

print('Caching pictograms for distractor categories...')
for category in tqdm(CATEGORIES):
    for word in CATEGORIES[category]:
        # check if picto is in cache, if not fetch from arasaac and store in cache
        if not word in cached_pictos:
            resolved_picto = resolve_term_to_picto_strict(word, expected_type=category)


Caching pictograms for distractor categories...


100%|██████████| 15/15 [00:03<00:00,  3.93it/s]


 You can directly run *python scripts/build_cache_from_categories.py* from the root of the repo to achieve the same result 

In [22]:
from dotenv import load_dotenv
from pathlib import Path
import os
import sys

load_dotenv(".env")  # loads from .env in OpenAI directory
api_key = os.getenv("OPENAI_API_KEY")

from openai import OpenAI
client = OpenAI(api_key=api_key)

In [None]:
llm_prompt = open("llm_prompt.txt").read().strip()

sentences = [
    "Papa a bu son café avec du sucre.",
    "Le chat noir dort avec les chatons."
]

resp = client.responses.create(
    model="gpt-4o-mini",
    instructions=llm_prompt,
    input="\n".join(f"- {s}" for s in sentences),
    #response_format={"type": "json"}
)

TypeError: Responses.create() got an unexpected keyword argument 'response_format'

In [41]:
question

{'question': "Qu'est-ce que papa a bu ?",
 'answer': 'Café',
 'category': 'drink'}

In [46]:
raw = resp.output_text

clean = raw.strip()
start = raw.find("[")
end = raw.rfind("]") + 1

clean = raw[start:end]

output_json = json.loads(clean)

from random import sample

for item in output_json:
    for question in item["questions"]:
        print("Question:", question["question"])
        print("Correct answer:", question["answer"])
        print("Question category:", question["category"])
        # sample 3 random distractors from category
        distractor_candidates = [elt for elt in CATEGORIES[question["category"]] if elt != question["answer"]]
        distractors = sample(distractor_candidates, k = 3)
        print("Distractors:", distractors)
        print()

Question: Qu'est-ce que papa a bu ?
Correct answer: Café
Question category: drink
Distractors: ['café', 'jus', 'thé']

Question: Qu'est-ce qu'il y a dans le café ?
Correct answer: Sucre
Question category: food
Distractors: ['soupe', 'riz', 'pomme']

Question: De quelle couleur est le chat ?
Correct answer: Noir
Question category: color
Distractors: ['rouge', 'gris', 'vert']

Question: Avec qui le chat dort-il ?
Correct answer: Les chatons
Question category: animal
Distractors: ['oiseau', 'lion', 'poisson']

