In [142]:
# !pip install langchain langchain-google-genai -qU
# !pip install pymupdf4llm -qU

In [187]:
from langchain.chat_models import init_chat_model

llm = init_chat_model("gemini-2.5-flash-preview-04-17",
                                 model_provider="google_genai",
                                 temperature=0,
                                 api_key="AIzaSyBu-YrleskC6sIRlxCOW54OaEapIjCFYjE")

In [188]:
from IPython.display import Markdown, display

def md(text):
    display(Markdown(text))

In [189]:
# import pymupdf4llm
# md_text = pymupdf4llm.to_markdown("input.pdf")

In [190]:
# md(md_text)

In [191]:
with open("test_content.txt", encoding="utf-8") as f:
    content = f.read()

In [192]:
from langchain_core.messages import HumanMessage, SystemMessage

messages = [
    SystemMessage(
        content=(
            "You are an expert in preparing quiz for the students to test their knowledge."
            "Prepare quiz only from the given content. " 
            "Do not miss out any possible question that can be asked from the given content. "
            "Provide your response in the structured format. "
            "Questions can be in MCQ(Multiple Choice Questions) or MSQ(Multiple Select Questions) format. "
            "MCQs can also have True or False type questions. "
            "Only one option in MCQs should be the right answer. "
            "Whereas in MSQs, multiple answers can be the right ones. "
            "Questions and answers should be generic in nature but should be taken from the given content only. "
            "For example even if the user has not read the given content but read some other related sources, " 
            "they should be able to understand and answer the questions. "
            "Do not include phrases such as 'according to the given text...' or 'based on the given content etc.,' "
            "Do not disclose to the user that the question, answer or explanation is taken from the given content. ")
    ),
    HumanMessage(
        content=content
    )
]

In [193]:
from pydantic import BaseModel, Field
from typing import Optional, Literal, List

class QuestionOption(BaseModel):
    option_id: str = Field(..., description="Unique identifier for the option. Goes like a, b, c etc.,")
    text: str = Field(..., description="Text content of the option.")

class Question(BaseModel):
    question_id: str = Field(..., description="Unique identifier for the question starting from 1.")
    text: str = Field(..., description="The actual text of the question.")
    type: Literal["mcq", "msq"] = Field(..., description="Type of the question. Can be 'mcq' or 'msq'.")
    options: Optional[List[QuestionOption]] = Field(..., description="List of possible options.")
    correct_option_ids: list[str] = Field(..., description="Correct options' IDs. Single value for MCQ and multiple values from MSQs. Goes like a, b, c etc.,")
    correct_answer: List[str] = Field(..., description="List of correct answer(s). For MCQ it typically has one value; for MSQ, it can have multiple acceptable answers.")
    explanation: Optional[str] = Field(..., description="Short explanation for the answer. Do not quote from the given content. Give general explanation. Even if you take it from the given content, do not reveal to the user.")

class Questions(BaseModel):
    questions: list[Question] = Field(..., description="List of questions")


In [194]:
structured_llm = llm.with_structured_output(Questions)

In [195]:
ai_msg = structured_llm.invoke(messages)

In [196]:
questions = ai_msg.questions

In [197]:
len(questions)

23

In [198]:
for question in questions:
    print(question.text)
    print(question.options)
    print(question.correct_answer)
    print(question.correct_option_ids)
    print(question.explanation)
    print("\n")

What is the primary goal of database normalization?
[QuestionOption(option_id='a', text='To reduce redundancy and eliminate undesirable characteristics like anomalies'), QuestionOption(option_id='b', text='To combine all data into a single large table'), QuestionOption(option_id='c', text='To increase the complexity of database queries'), QuestionOption(option_id='d', text='To store duplicate data efficiently')]
['To reduce redundancy and eliminate undesirable characteristics like anomalies']
['a']
Normalization is a process of organizing data in a database to reduce redundancy and improve data integrity by eliminating insertion, update, and deletion anomalies.


Normalization involves breaking down large tables into smaller ones.
[QuestionOption(option_id='a', text='True'), QuestionOption(option_id='b', text='False')]
['True']
['a']
Normalization involves breaking down large tables into smaller, well-structured ones and defining relationships between them to reduce redundancy and impr