In [31]:
import os
import re
from dotenv import load_dotenv
from typing import List, Optional
from dataclasses import dataclass, field
from langchain_openai import OpenAI, ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage

load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

In [25]:
@dataclass
class MCQGenerator:
    api_key: str
    model: str = 'gpt-4o-mini'
    temperature: float = 0.3
    max_retries: int = 10
    llm: OpenAI = field(init=False)
    prompt: PromptTemplate = field(init=False)
    output_parser: StructuredOutputParser = field(init=False)
    

    def __post_init__(self):
        self.llm = ChatOpenAI(api_key=self.api_key, model=self.model, temperature=self.temperature)
        self.setup_parser()

    def setup_parser(self):
        response_schemas = [
            ResponseSchema(name="question", description="The multiple choice question"),
            ResponseSchema(name="options", description="A python list of 4 options for the question. Separated by comma(,)"),
            ResponseSchema(name="answer", description="The correct option (A, B, C, or D)"),
            ResponseSchema(name="explanation", description="Explanation of why the answer is correct")
        ]
        self.output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
        
        self.prompt = PromptTemplate(
            template = (
                "Generate a unique multiple-choice question about {topic} with an initial complexity level of {complexity}.\n"
                "Question requirements:\n"
                "1. Adjust the difficulty based on the complexity level (1-5, where 1 is very simple and 5 is very complex).\n"
                "2. For lower complexity (1-2), focus on basic facts, definitions, or simple concepts.\n"
                "3. For higher complexity (4-5), include more advanced concepts, require analysis, or combine multiple ideas.\n"
                "4. Review this history of previous questions:\n{history}\n"
                "5. If your generated question is similar to any in the history, INCREASE THE COMPLEXITY LEVEL BY 1 (up to a maximum of 5) and generate a new, more advanced question.\n"
                "6. Repeat step 5 until you generate a question that is significantly different from all questions in the history.\n"
                "7. Ensure the final question explores aspects or applications of the topic not covered in previous questions.\n"
                "8. Make all options plausible, but only one should be correct.\n"
                "{format_instructions}\n"
                "Formatting rules:\n"
                "1. Present the options as a Python list: ['A. option', 'B. option', 'C. option', 'D. option']\n"
                "2. The answer must be a single letter: 'A', 'B', 'C', or 'D'.\n"
                "3. Provide a clear explanation for the correct answer, matching the final complexity level.\n"
                "4. Include the final complexity level (1-5) used to generate the question in your response.\n"
                "5. Strictly adhere to the specified output format.\n"
                "6. Ensure the JSON is valid and can be parsed by a standard JSON parser.\n"
                "\nBefore submitting, double-check that your question is not repeating or closely resembling any in the history."
            ),
            input_variables=['topic', 'complexity', 'history'],
            partial_variables={'format_instructions': self.output_parser.get_format_instructions()}
        ) 

    def generate_mcq(self, topic: str, complexity: int, history: List[str]) -> dict:
        retries = 0
        while retries < self.max_retries:
            try:
                formatted_history = "\n".join([f"- {q}" for q in history])
                _input = self.prompt.format(topic=topic, complexity=complexity, history=formatted_history)
                output = self.llm.invoke(_input)
                
                if isinstance(output, AIMessage):
                    content = output.content
                    # print(content)
                elif isinstance(output, list) and len(output) > 0 and isinstance(output[0], AIMessage):
                    content = output[0].content
                else:
                    raise ValueError("Unexpected output format from ChatOpenAI")
                # print(content)
                try:
                    result = self.output_parser.parse(content)
                except:
                        
                    def preprocess_json(json_string):
                        # Replace single quotes with double quotes, but only for strings
                        return re.sub(r"'([^']*)'", r'"\1"', json_string)

                    preprocessed_content = preprocess_json(content)
                    result = self.output_parser.parse(preprocessed_content)
                # print(content)
                if isinstance(result.get('options'), list) and len(result['options']) == 4 and result['question'].lower() not in history:
                    return result

                print("Error: Options are not in the correct format. Retrying...")
                retries += 1

            except Exception as e:
                print(f"Parsing failed with error: {e}. Retrying... ({retries + 1}/{self.max_retries})")
                retries += 1

        print("Failed to generate a valid MCQ after maximum retries. Click next and try again.")
        return {
            'question': 'Could not generate a question. Press Next to try Again.',
            'options': ['Option A', 'Option B', 'Option C', 'Option D'],
            'answer': 'A',
            'explanation': 'No explanation available.'
        }


In [26]:

def format_mcq(mcq_dict):
    formatted_string = f"""Question: {mcq_dict['question']}

        Options:
        {mcq_dict['options'][0]}
        {mcq_dict['options'][1]}
        {mcq_dict['options'][2]}
        {mcq_dict['options'][3]}

        Correct Answer: {mcq_dict['answer']}

        Explanation: {mcq_dict['explanation']}"""

    return formatted_string

In [29]:
mcq_gen = MCQGenerator(api_key=api_key, temperature=0.7, max_retries=10)
topic = "london"
mcq = mcq_gen.generate_mcq(topic, 10,  [])
print(format_mcq(mcq))

Question: How did the Great Fire of London in 1666 influence urban planning and fire safety regulations in modern London?

        Options:
        A. It led to the establishment of the London Fire Brigade in 1833.
        B. It resulted in the creation of the first building codes to prevent wooden structures.
        C. It caused the complete relocation of the city center to the West End.
        D. It initiated the construction of the River Thames barrier.

        Correct Answer: B

        Explanation: The Great Fire of London in 1666 destroyed a significant portion of the city, leading to the implementation of the first building codes that mandated the use of brick and stone in construction to prevent future fires. This was a pivotal moment in urban planning and fire safety, directly influencing the architectural landscape of modern London and laying the groundwork for future regulations.
