# Extract Questions and Context
Now that we have extracted questions and facts (ground_truth) we are able to form a dataset for testing. The dataset will be created in the following steps:

In [27]:
import json
import logging
import pandas as pd
from utils.config import ROOT_DIR
from utils.evaluation import load_object, load_questions_and_answers

In [28]:
# initialize logging for better debugging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

## Load Questions and Answers

In [29]:
dataset = "100_random"
model_name = "gpt-3.5-turbo"
filename=f"llm_qna_to_context_{model_name}_{dataset}.pkl"
folder="evaluation/data/"
questions = load_object(folder=folder, filename=filename)
data = []

In [30]:
logger.info(f"Number of examples: {len(questions.examples)}")

## Check Context
The context should contain the following labels:
- release_year
- title
- plot_length

In [31]:
def check_document(context_data):
    labels = ["release_year", "title", "plot_length"]
    labels_found = [label for label in labels if f"{label}: " in context_data]
    labels_missing = [label for label in labels if label not in labels_found]
    return {'labels_found': labels_found, 'labels_missing': labels_missing}

## Create Dataset
If the context only contains the label 'plot_length' the context will be added to the last created record. This is because it is a chunk that is referenced to another chunk.
Every record will be created with the following structure:

In [32]:
index = 0
for record in questions.examples:
    item = {
        'context': '',
        'reference_answers': [],
        'questions': [],
        'answers': [],
        'number_of_qnq': 0
    }

    # get context and add to item
    context = record.reference_contexts[0]

    # check labels in context
    check_labels = check_document(context)

    # the context contains 'release_year' and 'title'
    release_year_title = all(label in check_labels['labels_found'] for label in ["release_year", "title"])

    # the context only contains 'plot_length'
    plot_length_only = all(label in check_labels['labels_missing'] for label in ["release_year", "title"])

    # is query a question?
    is_q_o = record.query.startswith("Q1:")
    is_q = record.query.startswith(("Q1:", "Q2:", "Q3:"))

    # is query a ground_truth?
    is_gt = record.query.startswith(("A1:", "A2:", "A3:"))

    # query
    q = record.query[4:]

    if is_q_o and not plot_length_only:
        item['context'] = context  # add context to item
        data.append(item)  # append record
        logger.info(f"[{index}] Test-Record was created")
        index += 1  # increase index by 1
    elif is_q_o and plot_length_only:
        data[len(data)-1]['context'] += context

    # current record
    current_record = len(data) - 1  # represents the current index of the last created record

    # field
    field = 'questions' if is_q else 'reference_answers' if is_gt else ''

    # add data
    data[current_record][field].append(q)
    if field == 'questions':
        data[current_record]['number_of_qnq'] += 1
    logger.info(f"[{current_record}] Test-Record - added {field[:-1]}: {q}")

In [33]:
def save_dataset(file_path, data):
    with open(file_path, 'w') as file:
        json.dump(data, file, indent=4)

In [34]:
save_path = f"{ROOT_DIR}/evaluation/data/llm_qna_to_context_{model_name}_{dataset}.json"

In [35]:
save_dataset(save_path, data)

In [36]:
qa_data = load_questions_and_answers(save_path)

In [37]:
pd.DataFrame.from_dict(qa_data)

Now we have for every of the 100 movies 3 rows with context, ground_truth and a question.
- context
- ground_truth (facts for 3 questions)
- questions (3 questions)
- answers (0 answers)
We now need to split the questions and ground_truth into different rows.

In the next Step every test in \"evaluation/test\" will be executed and written to a sqlite

In [38]:
# Sum of all questions
print(f"Number of questions: {sum([item['number_of_qnq'] for item in data])}")

In [39]:
# Sum of all records
print(f"Number of records: {len(data)}")