# Preview Quiz prompt

In [18]:
from pydantic import BaseModel
from openai import OpenAI
from openai.lib._parsing import type_to_response_format_param
import json
from getpass import getpass

openai_api_key = getpass("OPENAI_API_KEY")
client = OpenAI(api_key=openai_api_key)

In [19]:
from jinja2 import Template

prompt_template = Template("""
[PURPOSE]
Preview lesson content through quizzes to give a sneak peek before the actual lesson.
The quizzes must ask key expressions/the most grammatically important phrases themselves written in or based on the GOAL/DESCRIPTION. You must also provide Korean translation of each quiz.

[QUIZ GUIDELINES]
- Create two preview quizzes per topic. One from KEY_EXPRESSION, the other from SUB_EXPRESSIONS.
- Put blanks on the KEY_EXPRESSION and SUB_EXPRESSION itself, no elsewhere.
- No blanks or tildes in Korean translation
- No tildes in the quiz.
- You must make sure you have NO multiple "equally valid" answers
- Answer index = the position number of the answer
- You must make sure that the quiz asks the exact grammar written in the description.
- No proper nouns can be tested.
- Create SAMPLE SCRIPT for each topic at the end. SAMPLE SCRIPT must be 2 turns per group. The two groups MUST NOT be related in terms of the conversation flow, but maintain the grammatical structure. They must not be separated, but in one line.
  e.g.) "sample_script": 동사의 과거형, "A: What did you do last weekend? B: I met my friends and had lunch. A: Where did you travel last summer? B: I went to Vietnam with my family."


[POLICY]
Each quiz needs:
- Modify DESCRIPTION using '~해요' tone in Korean.
- ALL Korean translations must use '~해요' tone, with honorifics.
- A blank (test key patterns itself)
- Korean translation MUST NOT INCLUDE blanks or tildes.
- Three options (one correct, two wrong)
- No multiple "equally valid" answers/options.
- Wrong options must be grammatically wrong when put in the blank.
- Shuffle the order of answer choices='answer indexes (1-3)' to avoid predictability.
- You must count the number of each answer index to make sure the indexes are equally distributed.
- SAMPLE SCRIPT must not be the same as the quizzes. You must change one or two words to create a dialogue of different topic, but maintain the structure.

[Example]
```
Input: 
    {
    "DESCRIPTION": "동사의 과거형",
    "KEY_EXPRESSION": "My morning began peacefully.",
    "KEY_EXPRESSION_KOR": "내 아침은 평화롭게 시작했어요.",
    "SUB_EXPRESSION": "He prepared a simple breakfast yesterday. She finished her homework before dinner.",
    "SUB_EXPRESSION_ENG": "그는 어제 간단한 아침을 준비했어요. 그녀는 저녁 전에 숙제를 끝냈어요."
  }


Output:
    {
    "goal": [
      "동사의 과거형을 사용할 수 있어요."
    ],
    "preview_quizzes": [
      {
        "fill_in_blank_quiz": "I ___ a stylish notebook from the discount shop.",
        "korean_translation": "나는 할인 가게에서 멋진 공책을 샀어요.",
        "options": ["buy", "bought", "brought"],
        "answer": "bought",
        "answer_index": "2"         
      },
      {
        "fill_in_blank_quiz": "He ___ his money wisely at the store.",
        "korean_translation": "그는 가게에서 돈을 현명하게 썼어요.",
        "options": ["spin", "spend", "spent"],
        "answer": "spent",
        "answer_index": "3"              
      }
    ]
    "sample_script" : [
      {
        "sample_script_eng" : "A: What did you do last weekend? B: I met my friends and had lunch. A: Where did you travel last summer? B: I went to Vietnam with my family.",
        "sample_script_kor" : "A: 저번 주말에 무엇을 했어요? B: 친구들을 만나서 점심을 먹었어요. A: 지난 여름에 어디로 여행갔어요? B: 가족들과 베트남에 갔어요."
      }                       
    ]
  }
}
```
```
Input: 
    {
    "DESCRIPTION": "비교급, 최상급",
    "KEY_EXPRESSION": "She worked more steadily than anyone else.",
    "KEY_EXPRESSION_KOR": "그녀는 누구보다도 꾸준히 일했어요.",
    "SUB_EXPRESSION": "He ran more quickly than his classmates. Dreaming big is the best way.",
    "SUB_EXPRESSION_ENG": "그는 반 친구들보다 더 빠르게 달렸어요. 크게 꿈꾸는 것이 최고예요."
  }


Output:
    {
    "goal": [
        "비교급과 최상급을 사용할 수 있어요."
    ],
    "preview_quizzes": [
        {
            "fill_in_blank_quiz": "She worked ___ steadily than anyone else.",
            "korean_translation": "그녀는 누구보다도 꾸준하게 일했어요.",
            "options": [
                "better",
                "more",
                "good"
            ],
            "answer": "more",
            "answer_index": "2",
        },
        {
            "fill_in_blank_quiz": "Dreaming big is the ___ way.",
            "korean_translation": "큰 꿈을 꾸는 것이 최고인 방법이에요.",
            "options": [
                "best",
                "better",
                "most"
            ],
            "answer": "best",
            "answer_index": "1",
        }
    ],
    "sample_script": [
        {
            "sample_script_eng": "A: Has she won the race? B: No, there was someone faster than her. A: You are interested in physics, aren't you? B: Yes, but I am more interested in math.",
            "sample_script_kor": "A: 그녀가 경주를 이겼나요? B: 아니요, 그녀보다 빠른 사람이 있었어요. A: 당신은 물리학에 관심이 있어요, 그렇죠? B: 네, 그렇지만 저는 수학에 더 관심이 있어요."
        }
    ]
}
```                           

[Input]
```
DESCRIPTION: {{description}}
KEY EXPRESSION: {{key_expression}}
SUB EXPRESSION: {{sub_expression}}
```

""")

In [20]:
prompt = prompt_template.render(
    description = "현재진행형",
    key_expression = "I'm exploring the traditional art gallery.",
    key_expression_kor = "나는 전통 미술관을 탐험하고 있어요.",
    sub_expression = "I'm visiting the local heritage site. I'm tasting authentic street cuisine.",
    sub_expression_kor = "나는 지역 유산지를 방문하고 있어요. 나는 정통 길거리 음식을 맛보고 있어요."
)

In [21]:
print(prompt)


[PURPOSE]
Preview lesson content through quizzes to give a sneak peek before the actual lesson.
The quizzes must ask key expressions/the most grammatically important phrases themselves written in or based on the GOAL/DESCRIPTION. You must also provide Korean translation of each quiz.

[QUIZ GUIDELINES]
- Create two preview quizzes per topic. One from KEY_EXPRESSION, the other from SUB_EXPRESSIONS.
- Put blanks on the KEY_EXPRESSION and SUB_EXPRESSION itself, no elsewhere.
- No blanks or tildes in Korean translation
- No tildes in the quiz.
- You must make sure you have NO multiple "equally valid" answers
- Answer index = the position number of the answer
- You must make sure that the quiz asks the exact grammar written in the description.
- No proper nouns can be tested.
- Create SAMPLE SCRIPT for each topic at the end. SAMPLE SCRIPT must be 2 turns per group. The two groups MUST NOT be related in terms of the conversation flow, but maintain the grammatical structure. They must not be

In [22]:
class PreviewQuiz(BaseModel):
    fill_in_blank_quiz : str
    korean_translation : str
    options : list[str]
    answer : str
    answer_index : int

class SampleScript(BaseModel):
    sample_script_eng : str
    sample_script_kor : str

class QuizResponse(BaseModel):
    goal : list[str]
    preview_quizzes : list[PreviewQuiz]
    sample_script : list[SampleScript]

In [23]:
response_format = type_to_response_format_param(QuizResponse)

In [24]:
response_format

{'type': 'json_schema',
 'json_schema': {'schema': {'$defs': {'PreviewQuiz': {'properties': {'fill_in_blank_quiz': {'title': 'Fill In Blank Quiz',
       'type': 'string'},
      'korean_translation': {'title': 'Korean Translation', 'type': 'string'},
      'options': {'items': {'type': 'string'},
       'title': 'Options',
       'type': 'array'},
      'answer': {'title': 'Answer', 'type': 'string'},
      'answer_index': {'title': 'Answer Index', 'type': 'integer'}},
     'required': ['fill_in_blank_quiz',
      'korean_translation',
      'options',
      'answer',
      'answer_index'],
     'title': 'PreviewQuiz',
     'type': 'object',
     'additionalProperties': False},
    'SampleScript': {'properties': {'sample_script_eng': {'title': 'Sample Script Eng',
       'type': 'string'},
      'sample_script_kor': {'title': 'Sample Script Kor', 'type': 'string'}},
     'required': ['sample_script_eng', 'sample_script_kor'],
     'title': 'SampleScript',
     'type': 'object',
     '

In [25]:
def completion(prompt : str) -> str:
    response = client.beta.chat.completions.parse(
        model = 'o3-mini',
        reasoning_effort='low',
        messages = [
            {"role" : "system", "content" : "You will provide quizzes related to the topics students have learned, appropriately matching their CEFR level for middle school students."},
            {"role" : "user", "content" : prompt}
        ],
        response_format = QuizResponse,
    )
    return response.choices[0].message.parsed

In [26]:
response = completion(prompt)

In [27]:
response

QuizResponse(goal=['현재진행형을 사용할 수 있어요.'], preview_quizzes=[PreviewQuiz(fill_in_blank_quiz="I'm ___ the traditional art gallery.", korean_translation='저는 전통 미술관을 탐방하고 있어요.', options=['explored', 'exploring', 'explore'], answer='exploring', answer_index=2), PreviewQuiz(fill_in_blank_quiz="I'm ___ authentic street cuisine.", korean_translation='저는 정통 길거리 음식을 맛보고 있어요.', options=['tasting', 'taste', 'tasted'], answer='tasting', answer_index=1)], sample_script=[SampleScript(sample_script_eng='A: Are you studying in the library now? B: Yes, I am reading a fascinating novel. A: Is your friend cooking dinner currently? B: Yes, she is preparing pasta.', sample_script_kor='A: 지금 도서관에서 공부하고 계세요? B: 네, 매력적인 소설을 읽고 있어요. A: 친구분이 지금 저녁을 준비하고 계세요? B: 네, 파스타를 만들고 있어요.')])

In [28]:
response_output = json.dumps(response.dict(), ensure_ascii=False, indent = 4)
print(response_output)

{
    "goal": [
        "현재진행형을 사용할 수 있어요."
    ],
    "preview_quizzes": [
        {
            "fill_in_blank_quiz": "I'm ___ the traditional art gallery.",
            "korean_translation": "저는 전통 미술관을 탐방하고 있어요.",
            "options": [
                "explored",
                "exploring",
                "explore"
            ],
            "answer": "exploring",
            "answer_index": 2
        },
        {
            "fill_in_blank_quiz": "I'm ___ authentic street cuisine.",
            "korean_translation": "저는 정통 길거리 음식을 맛보고 있어요.",
            "options": [
                "tasting",
                "taste",
                "tasted"
            ],
            "answer": "tasting",
            "answer_index": 1
        }
    ],
    "sample_script": [
        {
            "sample_script_eng": "A: Are you studying in the library now? B: Yes, I am reading a fascinating novel. A: Is your friend cooking dinner currently? B: Yes, she is preparing pasta.",
            "sampl

In [29]:
print(response)

goal=['현재진행형을 사용할 수 있어요.'] preview_quizzes=[PreviewQuiz(fill_in_blank_quiz="I'm ___ the traditional art gallery.", korean_translation='저는 전통 미술관을 탐방하고 있어요.', options=['explored', 'exploring', 'explore'], answer='exploring', answer_index=2), PreviewQuiz(fill_in_blank_quiz="I'm ___ authentic street cuisine.", korean_translation='저는 정통 길거리 음식을 맛보고 있어요.', options=['tasting', 'taste', 'tasted'], answer='tasting', answer_index=1)] sample_script=[SampleScript(sample_script_eng='A: Are you studying in the library now? B: Yes, I am reading a fascinating novel. A: Is your friend cooking dinner currently? B: Yes, she is preparing pasta.', sample_script_kor='A: 지금 도서관에서 공부하고 계세요? B: 네, 매력적인 소설을 읽고 있어요. A: 친구분이 지금 저녁을 준비하고 계세요? B: 네, 파스타를 만들고 있어요.')]


# for문 돌려서 Batch API jsonl 파일 형식 만들기

확인 사항
- A1, A2, B1, B2, C1, C2 CEFR level 별로 퀴즈만 만들어지면 되는건가?
- 여섯개의 레벨 모두 만들어야하는지, 일부 레벨(A1, B1, C1)만 만들면 되는지도 결정

1. 커리큘럼 정보 csv 파일 읽어오기
2. for문으로 한줄씩 순회하면서 프롬프트 만들어주기들어주기
3. 렌더링한 프롬프트 파일에 써주기 (처음엔 조금만, ex. 5개 커리큘럼)
4. openai batch API 요청해서 task_id 받아오기 
5. batch task 완료되면 결과 받아오고 검수하기

In [30]:
def QuizMake(data, output_filename):
    jsonl_data = []

    for i in range(len(data)):
        prompt = prompt_template.render(
            description = data.loc[i,"DESCRIPTION"],
            key_expression = data.loc[i,"Key Expression (영어)"],
            sub_expression = [
                data.loc[i,"Sub Expression (영어)"]
            ]
        )

        quiz_request = {
            "custom_id" : f"request-{i+1}",
            "method" : "POST",
            "url" : "/v1/chat/completions",
            "body" : {
                "model" : "o3-mini-2025-01-31",
                "messages" : [
                    {"role": "system", "content": "You will provide quizzes related to the topics students have learned, appropriately matching their CEFR level for middle school students."},
                    {"role": "user", "content": prompt}
                ],
                "response_format" : response_format
            }
        }

        jsonl_data.append(quiz_request)

        with open(output_filename, 'w', encoding='utf-8') as jsonl_file:
            for item in jsonl_data:
                jsonl_file.write(json.dumps(item, ensure_ascii=False) + '\n')

    print(f'JSONL 파일 생성 완료 : {output_filename}-{i+1}')

## 중학교 jsonl 파일

In [31]:
import pandas as pd

raw_data = pd.read_csv("../../Structure/Mid/MergedData_Mid.csv")
df = raw_data.loc[raw_data['구분'] == 'structure']
df = df.reset_index()

display(df.head(3))

Unnamed: 0,index,textbook ID,학년,출판사,교육과정,Lesson,구분,Title,DESCRIPTION,Key Expression (영어),...,Index 1,Quiz 2,Korean 2,Option 2-1,Option 2-2,Option 2-3,Answer 2,Index 2,샘플 스크립트 (영어),샘플 스크립트 (한국어)
0,495,701.0,중1,능률(김),2015.0,1.0,structure,Welcome to My World,be동사의 긍정문과 부정문,I am excited to share my story.,...,,,,,,,,,,
1,496,701.0,중1,능률(김),2015.0,2.0,structure,Discover Your Culture,현재진행형,I'm exploring the traditional art gallery.,...,,,,,,,,,,
2,497,701.0,중1,능률(김),2015.0,3.0,structure,"Spend Smart, Save Smart",동사의 과거형,I bought a stylish notebook from the discount ...,...,,,,,,,,,,


In [32]:
df.columns

Index(['index', 'textbook ID', '학년', '출판사', '교육과정', 'Lesson', '구분', 'Title',
       'DESCRIPTION', 'Key Expression (영어)', 'Key Expression (한국어)',
       'Sub Expression (영어)', 'Sub Expression (한국어)', '학습목표', 'Quiz 1',
       'Korean 1', 'Option 1-1', 'Option 1-2', 'Option 1-3', 'Answer 1',
       'Index 1', 'Quiz 2', 'Korean 2', 'Option 2-1', 'Option 2-2',
       'Option 2-3', 'Answer 2', 'Index 2', '샘플 스크립트 (영어)', '샘플 스크립트 (한국어)'],
      dtype='object')

In [33]:
print(len(raw_data))
print(len(df))
print(len(raw_data) - len(df))

988
493
495


In [34]:
QuizMake(df, output_filename = 'PreviewQuiz_Structure_Mid_batch.jsonl')

JSONL 파일 생성 완료 : PreviewQuiz_Structure_Mid_batch.jsonl-493
