In [4]:
import os
import glob
from dotenv import load_dotenv
from pathlib import Path
import gradio as gr
from openai import OpenAI
from IPython.display import display, Markdown
import json
import random

In [139]:
# Setting up

load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")

# MODEL_q_generator = "gpt-5-nano-2025-08-07"
MODEL_q_generator = "o4-mini-2025-04-16"
MODEL_evaluator = "o4-mini-2025-04-16"

openai_client = OpenAI()
google_client = OpenAI(
    api_key=os.getenv("GOOGLE_API_KEY"),
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

OpenAI API Key exists and begins sk-proj-


In [None]:
system_q_generator_message = """
You are a python quiz generator. Your task is to generate a python code quiz based on several parmeters:
- Difficulty: Easy, Medium, Hard
- Subject: e.g., Python, Data Science, Machine Learning, etc.
- quiz_type: e.g., multiple choice, true/false, open question, Spot the Bug, etc.


### Important
- Ensure all newlines and quotes inside the JSON values are properly escaped so the JSON is valid.
- Do not output markdown code blocks (like ```json). Return raw JSON only.
- Question should be short and concise, such that the answer to is not more than 2-3 sentences.

Note that the user was asked about these previous codes, so please don't repeat them:

"""


In [None]:
CORE_TOPICS = [
    "String Slicing & Indexing",
    "f-strings and Formatting",
    "List Comprehensions",
    "Dictionary Methods (.get, .items)",
    "Set Operations (union, intersection)",
    "Tuple Unpacking",
    "While Loops & Break/Continue",
    "For Loops with enumerate() and zip()",
    "Function *args and **kwargs",
    "Lambda Functions",
    "Type Conversion (int vs str vs float)",
    "Basic File I/O (open/read/write)",
    "The 'in' operator keyword",
    "Boolean Logic (and, or, not)"
]

# DS_TOPICS = [
#     "Pandas: DataFrame Filtering (.loc vs .iloc)",
#     "Pandas: GroupBy and Aggregation",
#     "Pandas: Handling Missing Values (fillna, dropna)",
#     "NumPy: Array Broadcasting",
#     "NumPy: Vectorized Operations",
#     "Matplotlib: Basic Plotting commands",
#     "Datetime Module manipulation",
#     "JSON parsing (json.loads/dumps)",
#     "Random Module (choice, shuffle, randint)",
#     "Regular Expressions (re module)"
# ]
ADVANCED_TOPICS = [
    "Decorators (@wraps)",
    "Generators and the 'yield' keyword",
    "Context Managers ('with' statement)",
    "Class Inheritance & super()",
    "Dunder Methods (__init__, __str__, __len__)",
    "Mutable vs Immutable types (pass-by-reference)",
    "Default Mutable Arguments trap",
    "Global vs Local Scope",
    "Exception Handling (try/except/else/finally)",
    "The 'is' vs '==' operator",
    "Recursion"
]

In [None]:
def system_generator_message(history=None, system_prefix=system_q_generator_message):
    if not history:
        history = []
    system_message = system_prefix + "\n".join(history)

    return system_message 

def get_topics(level):
    if level == "Easy":
        return CORE_TOPICS
    elif level == "Medium":
        return CORE_TOPICS
    elif level == "Hard":
        return ADVANCED_TOPICS + CORE_TOPICS
    else:
        return CORE_TOPICS


In [119]:
json_q_generator_schema = {
    "name": "python_quiz",
    "strict": True,  # Highly recommended for reliability
    "schema": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "A beautiful markdown text with the question regarding the subject (if code needed, include it as markdown code). If quiz_type is multiple choices, include here the choices with line breaks.",
            },
            "hint": {
                "type": "string",
                "description": "a hint to help the user if he struggles."
            },
            "answer": {
                "type": "string",
                "description": "The correct answer to the question."
            }
        },
        "required": [
            "question", 
            "hint", 
            "answer"],
        "additionalProperties": False  # REQUIRED when strict is True
    }
}


In [114]:
def user_input(subject=None, difficulty=None, quiz_type=None):
    for param in [subject, difficulty, quiz_type]:
        if not param:
            param = f"no specific {param} was given."
            
    return f"""
    Please, generate a unique python code question based on the following parameters:
    - Subject: {subject}
    - Difficulty: {difficulty}
    - quiz type: {quiz_type}

    The question should not be too long, such that an answer to is not more than 2-3 sentences.
    Be creative and unique, don't repeat codes and think about this random seed to encourage you to be creative: {random.random()}

    You can use this bank of topics as a reference to choose the code to be tested (be sure to follow the requested subject):
    {random.choices(get_topics(difficulty), k=4)}
    """

In [None]:
def generate_question(subject=None, difficulty=None, quiz_type=None, history=None, to_stream=False):
    user_prompt = user_input(subject, difficulty, quiz_type)
    
    try:
        messages = [
            {"role": "system", "content": system_generator_message(history)},
            {"role": "user", "content": user_prompt}
        ]
        
        response = openai_client.chat.completions.create(
            model=MODEL_q_generator,
            messages=messages,
            response_format={"type": "json_schema", "json_schema": json_q_generator_schema},
            # presence_penalty=0.6
        )
        
        # Parse the JSON string into a Python Dictionary
        result_content = response.choices[0].message.content
        quest_dict = json.loads(result_content)

        quest_dict['question'] = quest_dict['question'].replace('\\n', '\n')
        # quest_dict['code'] = quest_dict['code'].replace('\\n', '\n')
        return quest_dict
        
    except Exception as e:
        return f"Error: {str(e)}", ""

In [85]:
question = generate_question("Python", "easy", "multiple choice")
display(Markdown(question['question']))

# Python Quiz ‚Äî Sets, "in" and Boolean Logic üåü

Consider the following code:

```python
s1 = {'sun', 'moon', 'star'}
s2 = {'comet', 'star'}

result = ('sun' in s1 and 'comet' in s2) or ('planet' in s1 and not ('star' in s2))
print(result)
```

What does this print?

Choices:
A) True
B) False
C) Raises a KeyError
D) None

In [86]:
question['answer']

'A) True'

# Set Evaluator

In [87]:
system_evaluator_message = """
You are a python quiz evaluator. Your task is to evaluate a the user's answer according to the true answer provided.
You are given the next parameters:
- question: The question asked to the user.
- true_answer: The correct answer to the question.
- user_answer: The user's answer to the question.

## important: if the answer seems like a choice in a multiple choices question, it is enough for the user to specify the number/letter of the choice.
Your response should be a json object containing exactly these keys:
{
    "verdict": "verdict keyword selected from: 'Correct', 'Wrong', 'Partial'"
    "explanation": "A short markdown text with the explanation regarding the user's answer. Use evaluating phrases such as 'Good job!', 'Almost there!', etc. elaborate only if not correct."
}

"""

json_evaluator_schema = {
    "name": "evaluator_json",
    "strict": True,  # Highly recommended for reliability
    "schema": {
        "type": "object",
        "properties": {
            "verdict": {
                "type": "string",
                "description": "verdict keyword selected from: 'Correct', 'Wrong', 'Partial'"
            },
            "explanation": {
                "type": "string",
                "description": "A markdown text with the explanation regarding the user's answer. Use evaluating phrases such as 'Good job!', 'Almost there!', etc."
            }
        },
        "required": ["verdict", "explanation"],
        "additionalProperties": False  # REQUIRED when strict is True
    }
}

In [94]:
def evaluate(question, true_answer, user_answer):
    user_prompt = f""" Please evaluate my answer to the following question:
    - question: {question}
    - true_answer: {true_answer}
    - user_answer: {user_answer}
    """

    try:
        messages = [
            {"role": "system", "content": system_evaluator_message},
            {"role": "user", "content": user_prompt}
        ]
        
        response = openai_client.chat.completions.create(
            model=MODEL_evaluator,
            messages=messages,
            response_format={"type": "json_schema", "json_schema": json_evaluator_schema}
        )
        
        # Parse the JSON string into a Python Dictionary
        result_content = response.choices[0].message.content
        parsed_data = json.loads(result_content)
        
        return parsed_data
        
    except Exception as e:
        return f"Error: {str(e)}", ""

# Some tests

In [93]:
question = generate_question("Python - dicts", "medium", "multiple choice")
display(Markdown(question['question']))

# Dict normalization puzzle üß©

Consider the following Python code:

```python
pairs = [('1', 10), (1, '20'), ('2', 30.0), (3.0, '40')]
extra = {'2', 3, 4}
# build a dict normalizing keys and values
d = {int(float(k)): str(int(float(v))) for k, v in pairs}
# now update with extras (note set semantics and type conversions)
d.update({int(float(k)): f"extra-{int(float(k))}" for k in extra})
print(d)
```

What is printed by this code? Choose one:

A) {1: '10', 2: '30', 3: '40', 4: 'extra-4'}
B) {1: '20', 2: 'extra-2', 3: 'extra-3', 4: 'extra-4'}
C) {1: '20', 2: '30', 3: '40'}
D) {1: '20', 2: '30', 3: 'extra-3', 4: 'extra-4'}

In [95]:
my_answer = "B"
evaluate(question['question'], question['answer'], my_answer)

{'verdict': 'Correct',
 'explanation': 'Good job! Your answer matches the expected output.'}

In [96]:
my_answer = "{1: '20', 2: 'extra-2', 3: 'extra-3', 4: 'extra-4'}"
evaluate(question['question'], question['answer'], my_answer)

{'verdict': 'Correct',
 'explanation': 'Good job! Your answer matches the expected output.'}

In [97]:
my_answer = "A"
evaluate(question['question'], question['answer'], my_answer)

{'verdict': 'Wrong',
 'explanation': "Not quite. The first comprehension actually produces {1: '20', 2: '30', 3: '40'} (because (1, '20') overwrites ('1', 10)), then updating with the extras replaces keys 2 and 3 and adds 4, resulting in {1: '20', 2: 'extra-2', 3: 'extra-3', 4: 'extra-4'}."}

In [98]:
my_answer = "{1: '10', 2: 'extra-2', 3: 'extra-3', 4: 'extra-4'}"
evaluate(question['question'], question['answer'], my_answer)

{'verdict': 'Wrong',
 'explanation': "Wrong. The initial dict comprehension produces key 1 with value '20' (not '10') because the pair (1,'20') overrides ('1',10). After updating with extras, the final dict is {1:'20', 2:'extra-2', 3:'extra-3', 4:'extra-4'}."}

# Set Quiz Class

In [145]:
class quizzer:
    def __init__(self):
        self.history_questions = []
        self.user_score = 0
        self.total_score = 0
        self.current_hint = None
        self.current_question = None
        self.current_answer = None
        self.last_subject = None
        self.last_difficulty = None
        self.last_quiz_type = None

        self.already_answered = False

    def reset_quiz(self):
        self.history_questions = []
        self.user_score = 0
        self.total_score = 0
        self.current_hint = None
        self.current_question = None
        self.current_answer = None
        self.last_subject = None
        self.last_difficulty = None
        self.last_quiz_type = None

        self.already_answered = False
    
    def new_question(self, subject=None, difficulty=None, quiz_type=None):
        self.already_answered = False
        output = generate_question(subject, difficulty, quiz_type, self.history_questions)
        self.current_question = output["question"].replace("\\n", "\n")
        self.history_questions.append(self.current_question)
        self.current_hint = output["hint"]
        self.current_answer = output["answer"]
        return self.current_question
    
    def evaluate_user_answer(self, user_answer):
        if self.already_answered:
            return "This question was already answered, please move to the next question.", ""
        self.already_answered = True
        evaluation = evaluate(self.current_question,self.current_answer, user_answer)
        verdict = evaluation["verdict"]
        explanation = evaluation["explanation"]
        self.total_score += 1

        if verdict == "Correct":
            self.user_score += 1
        elif verdict == "Partial":
            self.user_score += 0.5
        else:
            self.user_score += 0
        return verdict, explanation
    
    def get_current_hint(self):
        return self.current_hint

In [146]:
def create_app():
    with gr.Blocks(theme=gr.themes.Soft()) as demo:
        # State Management: Initialize the class for every unique user session
        quiz_state = gr.State(quizzer())

        gr.Markdown("# üß† AI Quiz Generator")

        with gr.Row():
            with gr.Column(scale=1):
                # Inputs
                inp_subject = gr.Textbox(label="1. Topic/Subject", placeholder="e.g. Python Lists")
                inp_difficulty = gr.Dropdown(choices=["Easy", "Medium", "Hard"], value="Medium", label="2. Difficulty")
                inp_type = gr.Dropdown(choices=["Multiple Answers", "Open Question", "True/False", "Spot the Bug"], value="Open Question", label="3. Question Type")
                
                btn_generate = gr.Button("Generate Question", variant="primary")
                
                # Stats Display
                out_score = gr.Markdown("### Score: 0/0")
                btn_reset = gr.Button("Reset Quiz", variant="stop")

            with gr.Column(scale=2):
                # Question Area
                out_question = gr.Markdown("### Question will appear here...", min_height=190)
                
                # Hint Area
                with gr.Accordion("Need help?", open=False):
                    btn_hint = gr.Button("Get Hint", size="sm")
                    out_hint = gr.Markdown("")

                # Answer Area
                gr.Markdown("---")
                inp_answer = gr.Textbox(label="4. Your Answer", placeholder="Type here...", lines=3)
                btn_submit = gr.Button("Submit Answer")
                
                # Evaluation Area
                out_eval_verdict = gr.Markdown("")
                out_eval_explanation = gr.Markdown("")

        # --- EVENT FUNCTIONS ---
        
        def on_generate(quiz, subject, diff, q_type):
            # Logic wrapper
            q_text = quiz.new_question(subject, diff, q_type)
            print(quiz.history_questions)
            return (
                quiz,                           # Update State
                q_text,                         # Show Question
                "",                             # Clear Hint
                "",                             # Clear Answer box
                "",                             # Clear Verdict
                ""                              # Clear Explanation
            )

        def on_hint(quiz):
            return quiz.get_current_hint()

        def on_submit(quiz, answer):
            verdict, explanation = quiz.evaluate_user_answer(answer)
            score_text = f"### Score: {quiz.user_score}/{quiz.total_score} ({quiz.user_score/quiz.total_score*100}%)"
            
            # Formatting verdict color
            if verdict == "Correct":
                verdict_md = f"### ‚úÖ {verdict}"
            elif verdict == "Answered":
                verdict_md = f"### ‚ö†Ô∏è {verdict}"
            else:
                verdict_md = f"### ‚ùå {verdict}"
            
            return quiz, verdict_md, explanation, score_text

        def on_reset(quiz):
            quiz.reset_quiz()
            return (
                quiz, 
                "### Score: 0/0", 
                "### Question will appear here...", 
                "", 
                "", 
                "", 
                ""
            )

        # --- WIRING BUTTONS ---
        
        btn_generate.click(
            on_generate, 
            inputs=[quiz_state, inp_subject, inp_difficulty, inp_type], 
            outputs=[quiz_state, out_question, out_hint, inp_answer, out_eval_verdict, out_eval_explanation]
        )

        btn_hint.click(
            on_hint,
            inputs=[quiz_state],
            outputs=[out_hint]
        )

        btn_submit.click(
            on_submit,
            inputs=[quiz_state, inp_answer],
            outputs=[quiz_state, out_eval_verdict, out_eval_explanation, out_score]
        )

        btn_reset.click(
            on_reset,
            inputs=[quiz_state],
            outputs=[quiz_state, out_score, out_question, out_hint, inp_answer, out_eval_verdict, out_eval_explanation]
        )

    return demo

if __name__ == "__main__":
    app = create_app()
    app.launch(debug=True)

  with gr.Blocks(theme=gr.themes.Soft()) as demo:


* Running on local URL:  http://127.0.0.1:7875
* To create a public link, set `share=True` in `launch()`.


["# üêº Pandas Indexing Challenge üêæ\n\n```python\nimport pandas as pd\ndf = pd.DataFrame({'A': [5, 15, 25], 'B': [10, 20, 30]}, index=[0, 1, 2])\n```\n\nDescribe the outputs of `df.loc[0:1]` vs `df.iloc[0:1]` and explain why they differ."]
Keyboard interruption in main thread... closing server.
