In [2]:
from groq import Groq
import json_repair
import os

key_groq = os.environ["GROQ_API_KEY"]
client = Groq(api_key=key_groq)

In [21]:

json_schema = """
  {
    "topic_description": {
      "type": "string",
      "description": "A sentence describing the sub-topic to which the question belongs. That means this sentence should specify in a granular level what specific sub-topic the question belongs to. It should be abstract in a way that other questions could be put in this description too. Use between 5 and 10 words."
    },
    "level": {
      "type": "string",
      "description": "The difficulty level of the question. It should be only one of the following options: 'beginner', 'intermediate', 'advanced'."
    },
    "question": {
      "type": "string",
      "description": "The actual question text. It should be a question of type TRUE or FALSE. It means that the questions should be an assertion that could be answered with TRUE or FALSE."
    },
    "answer_correct": {
      "type": "string",
      "description": "The correct answer to the question. It should be only one of the following options: TRUE or FALSE"
    },
    "explanation": {
      "type": "string",
      "description": "An explanation or solution to the question."
    }
  }
"""

prompt_question_generator = """
              TASK CONTEXT:
              I am studying machine learning and I need to practice some questions on various topics.
              
              TASK DESCRIPTION:
              I will provide you with a list of topics, and I would like you to generate a list of TRUE or FALSE questions.
              These questions should be interesting, creative, challenging and thought-provoking. 
              Each question should be in the form of a statement that could be either TRUE or FALSE.
              Feel free to be imaginative and attempt to confuse the student by blending related concepts or similar words.
              I will provide the topics in the DOMAIN KNOWLEDGE section.
              The questions should pertain to these topics, and you can use this knowledge as a foundation to create questions that delve deeper into the subject matter.
              
              ADDITIONAL TASK DESCRIPTION:
              {additional_task_description}
              
              TASK REQUIREMENTS:
              Please refrain from creating questions that require mathematical calculations, but you may create questions with mathematical formulas.
              You SHOULD use LATEX to write mathematical formulas and code, but you should use the Katex flavor.
              Also you should put $$ in the beggining of the katex code and $$ at the end of the code. This is necessary because the interpreter needs it.
              
              TASK DETAILS:
              You should create {quantity} questions of level {level}.
              
              DOMAIN KNOWLEDGE:
              {domain_knowledge}
              
              FORMAT OUTPUT INSTRUCTIONS:
              It should be formatted in list of JSON objects as described below.
              {json_schema}
          """


In [22]:
parameters = {
    "domain_knowledge": """
            Language Models and Probability
            Entropy
            Cross-Entropy
            Cross-Entropy Loss
            Perplexity
        """,
    "additional_task_description": "Create questions only about the definitions of the concepts, like mixing the definition of one with another, or mixing the use of one with the use of another. I need this to memorize this concepts.",
    "quantity": "1",
    "level": "easy",
}

In [23]:
prompt_question_generator_formatted = prompt_question_generator.format(
        additional_task_description=parameters["additional_task_description"],
        quantity=parameters["quantity"],
        level=parameters["level"],
        domain_knowledge=parameters["domain_knowledge"],
        json_schema=json_schema,
    )

In [24]:
response = client.chat.completions.create(
    model="llama3-8b-8192",
    messages=[{"role": "user", "content": prompt_question_generator_formatted}],
    temperature=1,
    max_tokens=2420,
    top_p=1,
    stream=False,
    response_format={"type": "json_object"},
    stop=None,
)

completion = response.choices[0].message
questions = json_repair.loads(completion.content)

In [25]:
questions

{'topic_description': 'Measuring model uncertainty',
 'level': 'easy',
 'question': 'Perplexity is a measure of model entropy, and thus is inversely proportional to cross-entropy loss.',
 'answer_correct': 'FALSE',
 'explanation': 'Perplexity measures how well a model predicts a dataset, but it is not directly related to model entropy or cross-entropy loss.'}

In [26]:
type(questions)

dict

In [27]:
questions

{'topic_description': 'Measuring model uncertainty',
 'level': 'easy',
 'question': 'Perplexity is a measure of model entropy, and thus is inversely proportional to cross-entropy loss.',
 'answer_correct': 'FALSE',
 'explanation': 'Perplexity measures how well a model predicts a dataset, but it is not directly related to model entropy or cross-entropy loss.'}

In [28]:
if not isinstance(questions, list):
    questions_ = [questions]

In [29]:
type(questions_)

list

In [31]:
for question in questions_:
    print(question)

{'topic_description': 'Measuring model uncertainty', 'level': 'easy', 'question': 'Perplexity is a measure of model entropy, and thus is inversely proportional to cross-entropy loss.', 'answer_correct': 'FALSE', 'explanation': 'Perplexity measures how well a model predicts a dataset, but it is not directly related to model entropy or cross-entropy loss.'}
