# Generate Questions
For later Evaluation steps we need some questions in regard to the given context (movie). For each Movie we want to generate Facts so that later any of the model that will be evaluated a measurement can be made.

In [1]:
import logging
import nest_asyncio
from llama_index.core import PromptTemplate
from llama_index.llms.openai import OpenAI
from llama_index.core.llama_dataset.generator import RagDatasetGenerator
from utils.config import ROOT_DIR, DEBUG, OPENAI_API_KEY
from utils.evaluation import load_documents, load_object, store_object

In [2]:
# initialize logging for better debugging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [3]:
nest_asyncio.apply()

In [4]:
dataset = "100_random"

In [5]:
documents = load_documents(f"{ROOT_DIR}/data/wiki_movie_plots_{dataset}.json")

In [6]:
model_name = "gpt-3.5-turbo"

Generate Questions to the given context for 100 movies. For each of the 100 Movies 3 Questions and Facts (ground_truth) will be generated. We need an independent model outside the local ones. Therefore, we take are using OpenAI's GPT-3.5-turbo model.

In [25]:
data_generator = RagDatasetGenerator.from_documents(
    documents=documents,
    llm=OpenAI(
        model=model_name,
        api_key=OPENAI_API_KEY,
        temperature=0,
    ),
    transformations=[],
    text_question_template=PromptTemplate(
        template=(
            "Generate 3 questions and 3 Answers in regards to the given context. The context is below"
            "-----------"
            "{context_str}"
            "-----------"
            "\n No extra text. Only return the 3 questions and 3 answers below. Please elaborate on the answer in a single, context-focused sentence."
            "\n\n"
            "\nQ1: "
            "\nA1: "
            "\nQ2: "
            "\nA2: "
            "\nQ3: "
            "\nA3: "
        )
    ),
    num_questions_per_chunk=1,
    show_progress=DEBUG,
    workers=1
)

In [26]:
eval_questions = data_generator.generate_questions_from_nodes()

Save the generated Questions and Facts for later evaluation.

In [27]:
filename = f"llm_qna_to_context_{model_name}_{dataset}.pkl"
folder = "evaluation/data/"

In [28]:
store_object(obj=eval_questions, folder=folder, filename=filename)

In [29]:
load_object(folder=folder, filename=filename).to_pandas()