# AI QuizGen 2025

## Dependencies
Uncomment and run to install

In [None]:
# !pip install openai
# !pip install xlsxwriter
# !pip install openpyxl
# !pip install ffmpeg
# !pip install ffmpeg-python

## Convert to Mp3
Convert Mp4 to Mp3 if needed

In [None]:
import ffmpeg

def convert_to_mp3(input_file, output_file):
  try:
      ffmpeg.input(input_file).output(output_file, codec='libmp3lame', bitrate='128k').run(overwrite_output=True)
      print(f"Converted {input_file} to {output_file}")
  except ffmpeg.Error as e:
      print(f"Error: {e.stderr.decode()}")

## Create Transcript
Provide the filename of the mp3 and the directory in which it's found

In [5]:
from openai import OpenAI
client = OpenAI()

def create_transcript_file(filename, content_dir, transcripts_dir):
    video_file = open(f"{content_dir}/{filename}.mp3", "rb")
    response = client.audio.translations.create(
        model="whisper-1", 
        file=video_file, 
        response_format="text"
    )
    with open(f"{transcripts_dir}/{filename}.txt", "w") as f:
        f.write(response)

## Generate Questions
Use OpenAI Structured Outputs to generate questions based on a JSON Schema </br>
https://platform.openai.com/docs/guides/structured-outputs

In [3]:
from pydantic import BaseModel, Field
from typing import Union, List
from openai import OpenAI
client = OpenAI()

class QuestionSchema(BaseModel):
    Group: str = Field(..., description="The video from which the question is derived.")
    Type: str = Field(..., description="The type of question, e.g., TMC (Multiple Choice), TTF (True/False), TMCMA (Multiple Answers).")
    Question: str = Field(..., description="The text of the question.")
    CorAns: str = Field(..., description="The correct answer(s) represented by the answer number(s), comma-separated if multiple.")
    Answer1: str = Field(..., description="First answer choice.")
    Answer2: str = Field(..., description="Second answer choice.")
    Answer3: str = Field(..., description="Third answer choice.")
    Answer4: str = Field(..., description="Fourth answer choice.")
    CorrectExplanation: Union[str, None] = Field(None, description="Explanation provided when the correct answer is selected, can be null.")
    IncorrectExplanation: Union[str, None] = Field(None, description="Explanation provided when an incorrect answer is selected, can be null.")

class QuestionListSchema(BaseModel):
    questions: List[QuestionSchema]

def generate_quiz(filename, transcripts_dir, questions_dir):
    if not filename:
        raise ValueError("Input data is empty. Please provide valid data for quiz generation.")

    input_file = f"{transcripts_dir}/{filename}.txt"
    output_file = f"{questions_dir}/{filename}.json"
    client = OpenAI()
    with open(input_file, "r") as file:
        data = file.read()

    try:
        print(filename)
        response = client.beta.chat.completions.parse(
            model="gpt-4o",
            messages=[
                {"role": "user", "content": f"Create 3 multiple-choice questions based on the story here: {data}"}
            ],
            response_format=QuestionListSchema,
            # temperature=0.7
        )

        # Write output to file
        print(response.choices)
        with open(output_file, "w") as quiz_file:
            quiz_file.write(response.choices[0].message.content)

        print(f"Quiz successfully generated and saved to: {output_file}")

    except ValueError as ve:
        print(f"Input Error: {ve}")

    except FileNotFoundError:
        print(f"Error: The specified output path '{output_file}' is invalid.")

    except OSError as e:
        print(f"File system error: {e}")

    except Exception as e:
        print(f"An unexpected error occurred: {e}")  

## Run All the Things!
Define vars and run functions here!

In [7]:
filename = "japanese" # the naked filename of the .mp3 file. Extensions can be modified in their respective functions.
transcripts_dir = "transcripts"
questions_dir = "questions"
content_dir = "content"
# convert_to_mp3(f"{content_dir}/{filename}.mp4", f"{content_dir}/{filename}.mp3") # If using an mp4 file, this will convert it. will create 
create_transcript_file(filename, content_dir, transcripts_dir) # provide filename and the directory in which it resides. Will create <filename>.txt in the transcripts directory.
generate_quiz(filename, transcripts_dir, questions_dir) # will generate the quiz and create <filename.json> in the "questions" directory.

japanese
[ParsedChoice[QuestionListSchema](finish_reason='stop', index=0, logprobs=None, message=ParsedChatCompletionMessage[QuestionListSchema](content='{"questions":[{"Group":"Daily Routine","Type":"TMC","Question":"What activity did Naoko do first upon waking up?","CorAns":"1","Answer1":"She washed her face and brushed her teeth.","Answer2":"She drank a cup of water.","Answer3":"She changed her clothes.","Answer4":"She put on makeup.","CorrectExplanation":"Correct! Naoko mentioned the first thing she did after waking up was washing her face and brushing her teeth.","IncorrectExplanation":"Not quite right. Naoko prioritized hygiene by washing her face and brushing her teeth first."},{"Group":"Daily Activities","Type":"TMC","Question":"Which meal did Naoko prepare for lunch?","CorAns":"3","Answer1":"Chikuzen-ni","Answer2":"Leftover rice and miso soup","Answer3":"Tomato sauce, anchovy, and cheese pasta","Answer4":"Half-price sashimi from the supermarket","CorrectExplanation":"Correct! 