In [1]:
from prompting.datasets.wiki import WikiDataset
dataset = WikiDataset()

samples = []
for i in range(2):
    samples.append(dataset.random())

[32m2024-10-10 18:51:54.686[0m | [1mINFO    [0m | [36mprompting[0m:[36m<module>[0m:[36m17[0m - [1mProject version: 2.9.1[0m


In [3]:
for s in samples:
    print(s)

title='Inspector of the Navy' topic='Contents' subtopic='The Inspector of the Navy (German: Inspekteur der Marine) is the commander of the Navy of the modern-day German Armed Forces, the Bundeswehr. Since the various bodies responsible for the high command of the German Navy were merged in 2012, the Inspector has been based at the Navy Command at Rostock.[1] Before then, the Inspector was head of the Naval Staff of the Ministry of Defence, based in Bonn. Both the Inspector and his deputy hold the rank of vice admiral (German: Vizeadmiral).[2]\n' content='The Inspector of the Navy (German: Inspekteur der Marine) is the commander of the Navy of the modern-day German Armed Forces, the Bundeswehr. Since the various bodies responsible for the high command of the German Navy were merged in 2012, the Inspector has been based at the Navy Command at Rostock.[1] Before then, the Inspector was head of the Naval Staff of the Ministry of Defence, based in Bonn. Both the Inspector and his deputy hol

In [None]:
# Perform the analysis

import re
from typing import List
from collections import defaultdict
import matplotlib.pyplot as plt


def get_length_index(answers: List[str], correct: int) -> int:
    """Returns the index of the correct answer once the answers are sorted by string length."""
    assert len(answers) == 4
    sorted_by_length = sorted(answers, key=len)
    return sorted_by_length.index(answers[correct])


def plot_results(prompt_name, questions, answers):
    answer_map = {
        "A": 0,
        "B": 1,
        "C": 2,
        "D": 3
    }
    
    # Convert the answer into an index.
    correct_answers = [answer_map[a] for a in answers]

    r = "\[Input Question\]\s+(.*)\s+A\. (.*)\s+B\. (.*)\s+C\. (.*)\s+D\. (.*)"
    regex_trim = "\[Input Question\]\s+([\s\S]*)Answer:.*"

    number_patterns = []
    length_counts = defaultdict(list)

    for question, correct in zip(questions, correct_answers):
        match = re.search(r, question)
        if not match:
            print(f"WTF: {question}")
            continue
        
        trimmed_qa = re.search(regex_trim, question).groups()[0]
        four_answers = match.groups()[1:]
        length_counts[get_length_index(four_answers, correct)].append((trimmed_qa, correct))

    x_and_y = [(index, len(items)) for index, items in length_counts.items()]
    x, y = zip(*x_and_y)

    plt.bar(x, y) 
    plt.xlabel("Index of correct ans by answer length")
    plt.ylabel("Frequency")
    plt.title(prompt_name)
    plt.show()

In [None]:
from prompting import settings
import prompting
settings.settings = settings.Settings(mode="validator")
settings = settings.settings
from prompting.tasks.multi_choice import MultiChoiceTask

def create_multi_choice_question(sample, system_prompt, user_prompt):
    task = MultiChoiceTask()
    _ = task.make_query(sample, system_prompt, user_prompt)
    return (task.query, task.reference)

USER_PROMPT_WITH_LENGTH_PROMPT = """\
Create a multiple choice quiz based on the following context source from {source} about {title}. All answers must be of similar length.

[Input Context]
{context}
"""

SYSTEM_PROMPT_WITH_LENGTH_PROMPT = """You are a multiple choice quiz-generating expert.
Based on the input context, you must generate the question, exactly 4 possible answers (A, B, C, D), and the correct answer letter.
Ensure that all answers are of similar length.

[Example 1]
{
    "question": "What is the capital of Texas?",
    "A": "Paris",
    "B": "London",
    "C": "Austin",
    "D": "Houston",
    "answer": "C"
}

[Example 2]
{
    "question": "Which of the following best describes the primary driving force behind protein folding?",
    "A": "Covalent bond formation between amino acids",
    "B": "Hydrophobic interactions between nonpolar side chains",
    "C": "Hydrogen bonds between the protein backbone and side chains",
    "D": "Ionic interactions between charged side chains",
    "answer": "B"
}
"""

SYSTEM_PROMPT_MORE_EXAMPLES = """You are a multiple choice quiz-generating expert.
Based on the input context, you must generate the question, exactly 4 possible answers (A, B, C, D), and the correct answer letter.
Ensure that all answers are of similar length.

[Example 1]
{
    "question": "Which of the following is not an element of the redistribution-with-growth policy approach?",
    "A": "minimum wage legislation",
    "B": "land reform",
    "C": "progressive taxation",
    "D": "increased access to education",
    "answer": "A"
}

[Example 2]
{
    "question": "Which of the following best describes the primary driving force behind protein folding?",
    "A": "Covalent bond formation between amino acids",
    "B": "Hydrophobic interactions between nonpolar side chains",
    "C": "Hydrogen bonds between the protein backbone and side chains",
    "D": "Ionic interactions between charged side chains",
    "answer": "B"
}

[Example 3]
{
    "question": "What is the capital of Texas?",
    "A": "Paris",
    "B": "London",
    "C": "Austin",
    "D": "Houston",
    "answer": "C"
}

[Example 4]
{
    "question": "What interior discipline must be adopted to achieve spiritual liberation within Sikhism?",
    "A": "Remembering the Divine Name",
    "B": "Meditating on the sacred hymns",
    "C": "Remembering that death is inevitable",
    "D": "Meditating on the goodness of the created world",
    "answer": "A"
}
"""

prompts = {
    "default": (prompting.tasks.multi_choice.QUERY_SYSTEM_PROMPT, prompting.tasks.multi_choice.MultiChoiceTask.QUERY_USER_PROMPT),
    "length_instructions": (prompting.tasks.multi_choice.QUERY_SYSTEM_PROMPT, USER_PROMPT_WITH_LENGTH_PROMPT),
    "more_examples": (SYSTEM_PROMPT_MORE_EXAMPLES, prompting.tasks.multi_choice.MultiChoiceTask.QUERY_USER_PROMPT),
    "more_examples_and_length": (SYSTEM_PROMPT_MORE_EXAMPLES, USER_PROMPT_WITH_LENGTH_PROMPT),
}

for name, (system_prompt, user_prompt) in prompts.items():
    questions = []
    answers = []
    for sample in samples:
        try:
            question, answer = create_multi_choice_question(sample, system_prompt, user_prompt)
            questions.append(question)
            answers.append(answer)
        except Exception as e:
            print(f"Failed to create sample for {name}")      
            
    plot_results(name, questions, answers)  
