# This notebook aims to create a function to prepare a CBSE question paper on the fly --> educhain

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1nwpjaV14HJLwrM8U-Xy1RrTCJ6yvKJOT?usp=sharing)

## Installing Libraries

In [1]:
!pip install python-docx langchain langchain-community openai --quiet


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m678.9 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m997.8/997.8 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m29.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m362.5/362.5 kB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.9/318.9 kB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

## Installing Libraries

In [3]:
import os
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
from pydantic import BaseModel, Field
from typing import List, Union,Optional
from docx import Document
from docx.shared import Pt
from docx.oxml.ns import qn
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from google.colab import userdata

# Set your OpenAI API key
openai_api_key = userdata.get("OPENAI_API_KEY")
# Set up the OpenAI API key
os.environ["OPENAI_API_KEY"] = openai_api_key

## Models.py

In [4]:
class MCQQuestion(BaseModel):
    question: str
    options: List[str]
    correct_answer: str

class AssertionReasonQuestion(BaseModel):
    assertion: str
    reason: str
    correct_answer: str

class ShortAnswerQuestion(BaseModel):
    question: str
    max_marks: int

class LongAnswerQuestion(BaseModel):
    question: str
    max_marks: int

class SourceBasedQuestion(BaseModel):
    source_text: str
    questions: List[ShortAnswerQuestion]

# Pydantic model for the entire question paper
class QuestionPaper(BaseModel):
    section_a: List[Union[MCQQuestion, AssertionReasonQuestion]] = Field(..., min_items=20, max_items=20)
    section_b: List[ShortAnswerQuestion] = Field(..., min_items=5, max_items=5)
    section_c: List[ShortAnswerQuestion] = Field(..., min_items=6, max_items=6)
    section_d: List[LongAnswerQuestion] = Field(..., min_items=4, max_items=4)
    section_e: List[SourceBasedQuestion] = Field(..., min_items=3, max_items=3)

In [8]:
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from pylatexenc.latex2text import LatexNodes2Text
from pylatexenc.latex2text import LatexNodes2Text

latex_converter = LatexNodes2Text()

# Initialize the ChatOpenAI model
chat = ChatOpenAI(model="gpt-4o-mini", temperature=0.5)

# Create a prompt template
def create_prompt_template(subject, topic, grade, difficulty=None, custom_instructions=None, highlight_topics=None):
    difficulty_str = f"The difficulty level should be {difficulty}." if difficulty else ""
    custom_instructions_str = f"Custom Instructions: {custom_instructions}" if custom_instructions else ""
    highlight_topics_str = f"Highlight the following topics in the questions: {highlight_topics}" if highlight_topics else ""

    prompt_template = ChatPromptTemplate.from_template(
        f"""Generate a {subject} question paper for Class {grade} with the following structure:
        Section A: 18 MCQs and 2 Assertion-Reason questions (1 mark each)
        Section B: 5 Very Short Answer questions (2 marks each)
        Section C: 6 Short Answer questions (3 marks each)
        Section D: 4 Long Answer questions (5 marks each)
        Section E: 3 source-based/case-based/passage-based questions (4 marks each)

        Ensure that the questions cover the following topic: {topic}.
        {difficulty_str}

        {custom_instructions_str}

        {highlight_topics_str}

        Format each question as follows:

        Section A:
        MCQ1: Question text
        A) Option A
        B) Option B
        C) Option C
        D) Option D
        Correct Answer: X

        AR1: Assertion: Assertion text
        Reason: Reason text
        Correct Answer: True/False

        Section B:
        B1: Question text (2 marks)

        Section C:
        C1: Question text (3 marks)

        Section D:
        D1: Question text (5 marks)

        Section E:
        E1: Source text
        a) Question 1 (1 mark)
        b) Question 2 (1 mark)
        c) Question 3 (1 mark)
        d) Question 4 (1 mark)

        Generate the complete question paper following this format.
        """
    )
    return prompt_template
# Generate the question paper
def generate_question_paper(subject, topic, grade, difficulty=None, custom_instructions=None, highlight_topics=None):
    # Create a prompt template with user-defined parameters
    prompt_template = create_prompt_template(subject, topic, grade, difficulty, custom_instructions, highlight_topics)

    # Generate the question paper content using ChatOpenAI
    chain = LLMChain(llm=chat, prompt=prompt_template)
    response = chain.run({})

    # Format the response into a structured format
    content = response.strip()
    return content

# Add lines between sections and handle markdown-like syntax
def format_with_lines(content):
    sections = content.split('\nSection ')
    formatted_content = sections[0]
    for section in sections[1:]:
        formatted_content += '\n\n' + '─' * 80 + '\n\nSection ' + section
    return formatted_content

def apply_markdown_formatting(doc, line):
    if line.startswith('# '):
        # Heading 1
        p = doc.add_paragraph(latex_converter.latex_to_text(line[2:]))
        p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
        run = p.runs[0]
        run.bold = True
        run.font.size = Pt(16)
    elif line.startswith('## '):
        # Heading 2
        p = doc.add_paragraph(latex_converter.latex_to_text(line[3:]))
        run = p.runs[0]
        run.bold = True
        run.font.size = Pt(14)
    elif line.startswith('### '):
        # Heading 3
        p = doc.add_paragraph(latex_converter.latex_to_text(line[4:]))
        run = p.runs[0]
        run.bold = True
        run.font.size = Pt(12)
    elif '**' in line:
        # Bold text
        parts = line.split('**')
        p = doc.add_paragraph()
        for i, part in enumerate(parts):
            run = p.add_run(latex_converter.latex_to_text(part))
            if i % 2 == 1:
                run.bold = True
    else:
        # Normal text
        doc.add_paragraph(latex_converter.latex_to_text(line))

# Save the content to a .txt file
def save_to_txt(filename, content):
    with open(filename, "w") as f:
        f.write(content)

# Save the content to a .docx file
def save_to_docx(filename, content):
    doc = Document()
    for line in content.splitlines():
        if line.strip() == "─" * 80:
            doc.add_paragraph()
            doc.add_paragraph("─" * 80)
        else:
            apply_markdown_formatting(doc, line)
    doc.save(filename)

# Generate the question paper based on user input and print it
subject = "Mathematics"
topic = "Calculus"
grade = "XII"
difficulty = "Medium"  # Optional
custom_instructions = "Focus on application-based questions."  # Optional
highlight_topics = "Integration, Differentiation"  # Optional

generated_paper = generate_question_paper(subject, topic, grade, difficulty, custom_instructions, highlight_topics)
formatted_paper = format_with_lines(generated_paper)
print(formatted_paper)

# Save the formatted paper to a .txt file
save_to_txt("question_paper.txt", formatted_paper)

# Save the formatted paper to a .docx file
save_to_docx("question_paper.docx", formatted_paper)

# Class XII Mathematics Question Paper: Calculus

**Instructions:** All questions are compulsory. The question paper consists of 5 sections. 

---

### Section A: (20 Marks)

**MCQ1:** What is the derivative of \( f(x) = x^3 + 3x^2 - 5x + 7 \)?  
A) \( 3x^2 + 6x - 5 \)  
B) \( 3x^2 + 5 \)  
C) \( 3x^2 + 6x + 7 \)  
D) \( 3x^3 + 2x \)  
**Correct Answer:** A

**MCQ2:** Which of the following is the integral of \( \sin(x) \)?  
A) \( -\cos(x) + C \)  
B) \( \cos(x) + C \)  
C) \( \sin(x) + C \)  
D) \( -\sin(x) + C \)  
**Correct Answer:** A

**MCQ3:** If \( f(x) = e^{2x} \), what is \( f'(x) \)?  
A) \( 2e^{2x} \)  
B) \( e^{2x} \)  
C) \( 2xe^{2x} \)  
D) \( 2e^{x} \)  
**Correct Answer:** A

**MCQ4:** The area under the curve \( y = x^2 \) from \( x = 0 \) to \( x = 2 \) is:  
A) \( \frac{8}{3} \)  
B) \( 2 \)  
C) \( 4 \)  
D) \( \frac{4}{3} \)  
**Correct Answer:** A

**MCQ5:** What is the second derivative of \( f(x) = 3x^4 - 4x^3 + 2x - 1 \)?  
A) \( 36x^2 - 24x + 2 \)  
B) \( 12x

# pylatexenc testing

In [6]:
!pip install pylatexenc --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/162.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/162.6 kB[0m [31m620.7 kB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.0/162.6 kB[0m [31m793.4 kB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━[0m [32m143.4/162.6 kB[0m [31m1.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.6/162.6 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for pylatexenc (setup.py) ... [?25l[?25hdone


In [7]:
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from pylatexenc.latex2text import LatexNodes2Text
from pylatexenc.latex2text import LatexNodes2Text

latex_converter = LatexNodes2Text()

def apply_markdown_formatting(doc, line):
    if line.startswith('# '):
        # Heading 1
        p = doc.add_paragraph(latex_converter.latex_to_text(line[2:]))
        p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
        run = p.runs[0]
        run.bold = True
        run.font.size = Pt(16)
    elif line.startswith('## '):
        # Heading 2
        p = doc.add_paragraph(latex_converter.latex_to_text(line[3:]))
        run = p.runs[0]
        run.bold = True
        run.font.size = Pt(14)
    elif line.startswith('### '):
        # Heading 3
        p = doc.add_paragraph(latex_converter.latex_to_text(line[4:]))
        run = p.runs[0]
        run.bold = True
        run.font.size = Pt(12)
    elif '**' in line:
        # Bold text
        parts = line.split('**')
        p = doc.add_paragraph()
        for i, part in enumerate(parts):
            run = p.add_run(latex_converter.latex_to_text(part))
            if i % 2 == 1:
                run.bold = True
    else:
        # Normal text
        doc.add_paragraph(latex_converter.latex_to_text(line))

doc = Document()

# Sample text (a portion of what you provided)
sample_text = """
### Mathematics Question Paper - Class XII

#### Section A:
**MCQ1:** If \( f(x) = x^3 - 3x^2 + 5 \), then \( f'(2) \) is:
A) 1
B) 2
C) 5
D) 7
Correct Answer: D

**MCQ2:** The integral of \( \int e^{2x} \, dx \) is:
A) \( \frac{1}{2} e^{2x} + C \)
B) \( 2e^{2x} + C \)
C) \( e^{2x} + C \)
D) \( \frac{1}{2} e^{x} + C \)
Correct Answer: A

**MCQ3:** The derivative of \( \sin(x^2) \) with respect to \( x \) is:
A) \( \cos(x^2) \)
B) \( 2x \cos(x^2) \)
C) \( -2x \cos(x^2) \)
D) \( 2x \sin(x^2) \)
Correct Answer: B
"""

# Apply formatting to each line
for line in sample_text.split('\n'):
    apply_markdown_formatting(doc, line)

# Save the document
doc.save('formatted_math_paper.docx')