# Preview Quiz prompt

In [1]:
from pydantic import BaseModel
from openai import OpenAI
from openai.lib._parsing import type_to_response_format_param
import json
from getpass import getpass

openai_api_key = getpass("OPENAI_API_KEY")
client = OpenAI(api_key=openai_api_key)

In [2]:
from jinja2 import Template

prompt_template = Template("""
[PURPOSE]
Preview lesson content through quizzes to give a sneak peek before the actual lesson.
The quizzes must ask key expressions/the most grammatically important phrases themselves written in or based on the GOAL/DESCRIPTION. You must also provide Korean translation of each quiz.

[QUIZ GUIDELINES]
- Create two preview quizzes per topic. One from KEY_EXPRESSION, the other from SUB_EXPRESSIONS.
- Put blanks on the KEY_EXPRESSION and SUB_EXPRESSION itself, no elsewhere.
- No blanks or tildes in Korean translation
- No tildes in the quiz.
- You must make sure you have NO multiple "equally valid" answers
- Answer index = the position number of the answer
- You must make sure that the quiz asks the exact grammar written in the description.
- No proper nouns can be tested.
- Create SAMPLE SCRIPT for each topic at the end. SAMPLE SCRIPT must be 2 turns per group. The two groups MUST NOT be related in terms of the conversation flow, but maintain the grammatical structure. They must not be separated, but in one line.
  e.g.) "sample_script": 동사의 과거형, "A: What did you do last weekend? B: I met my friends and had lunch. A: Where did you travel last summer? B: I went to Vietnam with my family."


[POLICY]
Each quiz needs:
- Modify DESCRIPTION using '~해요' tone in Korean.
- ALL Korean translations must use '~해요' tone, with honorifics.
- A blank (test key patterns itself)
- Korean translation MUST NOT INCLUDE blanks or tildes.
- Three options (one correct, two wrong)
- No multiple "equally valid" answers/options.
- Wrong options must be grammatically wrong when put in the blank.
- Shuffle the order of answer choices='answer indexes (1-3)' to avoid predictability.
- You must count the number of each answer index to make sure the indexes are equally distributed.
- SAMPLE SCRIPT must not be the same as the quizzes. You must change one or two words to create a dialogue of different topic, but maintain the structure.

[Example]
```
Input: 
    {
    "DESCRIPTION": "동사의 과거형",
    "KEY_EXPRESSION": "My morning began peacefully.",
    "SUB_EXPRESSION": "He prepared a simple breakfast yesterday. She finished her homework before dinner."
  }


Output:
    {
    "goal": [
      "동사의 과거형을 사용할 수 있어요."
    ],
    "preview_quizzes": [
      {
        "fill_in_blank_quiz": "I ___ a stylish notebook from the discount shop.",
        "korean_translation": "나는 할인 가게에서 멋진 공책을 샀어요.",
        "options": ["buy", "bought", "brought"],
        "answer": "bought",
        "answer_index": "2"
      },
      {
        "fill_in_blank_quiz": "He ___ his money wisely at the store.",
        "korean_translation": "그는 가게에서 돈을 현명하게 썼어요.",
        "options": ["spin", "spend", "spent"],
        "answer": "spent",
        "answer_index": "3"     
      }
    ]
    "sample_script" : [
      {
        "sample_script_eng" : "A: What did you do last weekend? B: I met my friends and had lunch. A: Where did you travel last summer? B: I went to Vietnam with my family.",
        "sample_script_kor" : "A: 저번 주말에 무엇을 했어요? B: 친구들을 만나서 점심을 먹었어요. A: 지난 여름에 어디로 여행갔어요? B: 가족들과 베트남에 갔어요."
      }                       
    ]
  }
}
```
```
Input: 
    {
    "DESCRIPTION": "비교급, 최상급",
    "KEY_EXPRESSION": "She worked more steadily than anyone else.",
    "SUB_EXPRESSION": "He ran more quickly than his classmates. Dreaming big is the best way."
  }


Output:
    {
    "goal": [
        "비교급과 최상급을 사용할 수 있어요."
    ],
    "preview_quizzes": [
        {
            "fill_in_blank_quiz": "She worked ___ steadily than anyone else.",
            "korean_translation": "그녀는 누구보다도 꾸준하게 일했어요.",
            "options": [
                "better",
                "more",
                "good"
            ],
            "answer": "more",
            "answer_index": "2",
        },
        {
            "fill_in_blank_quiz": "Dreaming big is the ___ way.",
            "korean_translation": "큰 꿈을 꾸는 것이 최고인 방법이에요.",
            "options": [
                "best",
                "better",
                "most"
            ],
            "answer": "best",
            "answer_index": "1",
        }
    ],
    "sample_script": [
        {
            "sample_script_eng": "A: Has she won the race? B: No, there was someone faster than her. A: You are interested in physics, aren't you? B: Yes, but I am more interested in math.",
            "sample_script_kor": "A: 그녀가 경주를 이겼나요? B: 아니요, 그녀보다 빠른 사람이 있었어요. A: 당신은 물리학에 관심이 있어요, 그렇죠? B: 네, 그렇지만 저는 수학에 더 관심이 있어요."
        }
    ]
}
```                           

[Input]
```
DESCRIPTION: {{description}}
KEY EXPRESSION: {{key_expression}}
SUB EXPRESSION: {{sub_expression}}
```

""")

In [42]:
prompt = prompt_template.render(
    description = "관계부사 where",
    key_expression = "This is the park where we played as kids.",
    sub_expression = "I remember the café where we first met. She visited the city where her parents were born."
)

In [4]:
print(prompt)


[PURPOSE]
Preview lesson content through quizzes to give a sneak peek before the actual lesson.
The quizzes must ask key expressions/the most grammatically important phrases themselves written in or based on the GOAL/DESCRIPTION. You must also provide Korean translation of each quiz.

[QUIZ GUIDELINES]
- Create two preview quizzes per topic. One from KEY_EXPRESSION, the other from SUB_EXPRESSIONS.
- Put blanks on the KEY_EXPRESSION and SUB_EXPRESSION itself, no elsewhere.
- No blanks or tildes in Korean translation
- No tildes in the quiz.
- You must make sure you have NO multiple "equally valid" answers
- Answer index = the position number of the answer
- You must make sure that the quiz asks the exact grammar written in the description.
- No proper nouns can be tested.
- Create SAMPLE SCRIPT for each topic at the end. SAMPLE SCRIPT must be 2 turns per group. The two groups MUST NOT be related in terms of the conversation flow, but maintain the grammatical structure. They must not be

In [5]:
class PreviewQuiz(BaseModel):
    fill_in_blank_quiz : str
    korean_translation : str
    options : list[str]
    answer : str
    answer_index : int

class SampleScript(BaseModel):
    sample_script_eng : str
    sample_script_kor : str

class QuizResponse(BaseModel):
    goal : list[str]
    preview_quizzes : list[PreviewQuiz]
    sample_script : list[SampleScript]

In [6]:
response_format = type_to_response_format_param(QuizResponse)

In [7]:
response_format

{'type': 'json_schema',
 'json_schema': {'schema': {'$defs': {'PreviewQuiz': {'properties': {'fill_in_blank_quiz': {'title': 'Fill In Blank Quiz',
       'type': 'string'},
      'korean_translation': {'title': 'Korean Translation', 'type': 'string'},
      'options': {'items': {'type': 'string'},
       'title': 'Options',
       'type': 'array'},
      'answer': {'title': 'Answer', 'type': 'string'},
      'answer_index': {'title': 'Answer Index', 'type': 'integer'}},
     'required': ['fill_in_blank_quiz',
      'korean_translation',
      'options',
      'answer',
      'answer_index'],
     'title': 'PreviewQuiz',
     'type': 'object',
     'additionalProperties': False},
    'SampleScript': {'properties': {'sample_script_eng': {'title': 'Sample Script Eng',
       'type': 'string'},
      'sample_script_kor': {'title': 'Sample Script Kor', 'type': 'string'}},
     'required': ['sample_script_eng', 'sample_script_kor'],
     'title': 'SampleScript',
     'type': 'object',
     '

In [8]:
def completion(prompt : str) -> str:
    response = client.beta.chat.completions.parse(
        model = 'o3-mini',
        reasoning_effort='low',
        messages = [
            {"role" : "system", "content" : "You will provide quizzes related to the topics students have learned, appropriately matching their CEFR level for high school students."},
            {"role" : "user", "content" : prompt}
        ],
        response_format = QuizResponse,
    )
    return response.choices[0].message.parsed

In [43]:
response = completion(prompt)

In [10]:
response

QuizResponse(goal=['현재완료 진행형을 사용할 수 있어요.'], preview_quizzes=[PreviewQuiz(fill_in_blank_quiz='I’ve ___ looking for new hobbies lately.', korean_translation='저는 최근에 새로운 취미를 찾고 있어요.', options=['had', 'been', 'was'], answer='been', answer_index=2), PreviewQuiz(fill_in_blank_quiz='She has ___ learning how to cook.', korean_translation='그녀는 요리하는 법을 배우고 있어요.', options=['been', 'did', 'was'], answer='been', answer_index=1)], sample_script=[SampleScript(sample_script_eng='A: Have you been studying for the exams? B: Yes, I have been reviewing my notes. A: Are you trying a new sport? B: Yes, I have been learning tennis.', sample_script_kor='A: 시험 준비를 하고 있었어요? B: 네, 노트를 복습하고 있었어요. A: 새로운 스포츠를 시도하고 있었어요? B: 네, 테니스를 배우고 있었어요.')])

In [44]:
response_output = json.dumps(response.dict(), ensure_ascii=False, indent = 4)
print(response_output)

{
    "goal": [
        "관계부사 where를 사용할 수 있어요."
    ],
    "preview_quizzes": [
        {
            "fill_in_blank_quiz": "This is the park ___ we played as kids.",
            "korean_translation": "이곳은 우리가 어렸을 때 놀았던 공원이예요.",
            "options": [
                "when",
                "where",
                "which"
            ],
            "answer": "where",
            "answer_index": 2
        },
        {
            "fill_in_blank_quiz": "I remember the café ___ we first met.",
            "korean_translation": "나는 우리가 처음 만났던 카페를 기억해요.",
            "options": [
                "where",
                "which",
                "when"
            ],
            "answer": "where",
            "answer_index": 1
        }
    ],
    "sample_script": [
        {
            "sample_script_eng": "A: This is the bookstore where I discovered a rare book. B: Yes, it is the shop where I frequently browse. A: I visited the landmark where history comes alive. B: Indeed, it is the 

In [12]:
print(response)

goal=['부정어구 도치를 사용할 수 있어요.'] preview_quizzes=[PreviewQuiz(fill_in_blank_quiz='Not only ___ we prepare the dish carefully, but it turned out to be delicious.', korean_translation='우리는 음식을 조심스럽게 준비했을 뿐만 아니라 정말 맛있게 됐어요.', options=['did', 'do', 'done'], answer='did', answer_index=2), PreviewQuiz(fill_in_blank_quiz='Not only ___ the chef experiment with fresh herbs, but the recipe was a hit.', korean_translation='요리사는 신선한 허브로 실험했을 뿐만 아니라 그 레시피는 성공적이었어요.', options=['did', 'do', 'does'], answer='did', answer_index=1)] sample_script=[SampleScript(sample_script_eng='A: Not only did you finish the project early, but you also presented it clearly. B: Yes, I worked hard and organized everything well. A: Not only did they launch the new product successfully, but they also received great feedback. B: Indeed, their efforts were highly appreciated.', sample_script_kor='A: 프로젝트를 일찍 끝냈을 뿐만 아니라 명확하게 발표했어요? B: 네, 열심히 일하고 모든 것을 잘 조직했어요. A: 그들은 신제품을 성공적으로 출시했을 뿐만 아니라 훌륭한 반응도 받았어요? B: 맞아요, 그들의 노력이 크게 인정받았어요.

# for문 돌려서 Batch API jsonl 파일 형식 만들기

확인 사항
- A1, A2, B1, B2, C1, C2 CEFR level 별로 퀴즈만 만들어지면 되는건가?
- 여섯개의 레벨 모두 만들어야하는지, 일부 레벨(A1, B1, C1)만 만들면 되는지도 결정

1. 커리큘럼 정보 csv 파일 읽어오기
2. for문으로 한줄씩 순회하면서 프롬프트 만들어주기들어주기
3. 렌더링한 프롬프트 파일에 써주기 (처음엔 조금만, ex. 5개 커리큘럼)
4. openai batch API 요청해서 task_id 받아오기 
5. batch task 완료되면 결과 받아오고 검수하기

In [28]:
def QuizMake(data, output_filename):
    jsonl_data = []

    for i in range(len(data)):
        prompt = prompt_template.render(
            description = data.loc[i,"DESCRIPTION"],
            key_expression = data.loc[i,"KEY_EXPRESSION"],
            sub_expression = [
                data.loc[i,"SUB_EXPRESSION"]
            ]
        )

        quiz_request = {
            "custom_id" : f"request-{i+1}",
            "method" : "POST",
            "url" : "/v1/chat/completions",
            "body" : {
                "model" : "o3-mini-2025-01-31",
                "messages" : [
                    {"role": "system", "content": "You will provide quizzes related to the topics students have learned, appropriately matching their CEFR level for middle school students."},
                    {"role": "user", "content": prompt}
                ],
                "response_format" : response_format
            }
        }

        jsonl_data.append(quiz_request)

        with open(output_filename, 'w', encoding='utf-8') as jsonl_file:
            for item in jsonl_data:
                jsonl_file.write(json.dumps(item, ensure_ascii=False) + '\n')

    print(f'JSONL 파일 생성 완료 : {output_filename}-{i+1}')

## 고등학교 jsonl 파일

In [29]:
import pandas as pd

df = pd.read_csv("../../../Structure/High/Structure_Curriculum_High.csv")

display(df.head(3))

Unnamed: 0,제목,출판사(저자),교육과정,Lesson,Title,DESCRIPTION,KEY_EXPRESSION,KEY_EXPRESSION_Kor,SUB_EXPRESSION,SUB_EXPRESSION_Kor
0,영어,능률(김),2015,1,The Part You Play,전치사의 목적어로서의 동명사,I enjoy playing in team sports.,나는 팀 스포츠를 즐기는 것을 좋아해요.,She is excited about scoring goals in matches....,그녀는 경기에서 골을 넣는 것이 신나요. 그는 토너먼트에 참가하는 것이 떨려요.
1,영어,능률(김),2015,2,The Power of Creativity,명사를 수식하는 과거분사(구),The old bike fixed by my father is working smo...,아버지께서 고쳐주신 오래된 자전거가 부드럽게 작동해요.,The delicious cake baked by my teacher won fir...,선생님께서 구워주신 맛있는 케이크가 1등을 했어요. 학생이 쓴 감동적인 이야기가 많...
2,영어,능률(김),2015,3,Sound Life,사역동사+목적어+동사원형,They let me choose a beautiful song.,그들은 내가 아름다운 노래를 선택하게 해줬어요.,My teacher made me listen to a cheerful melody...,선생님은 내가 기분 좋은 멜로디를 듣게 했어요. 내 친구는 내가 상쾌한 커피를 마시...


In [30]:
df.columns

Index(['제목', '출판사(저자)', '교육과정', 'Lesson', 'Title', 'DESCRIPTION',
       'KEY_EXPRESSION', 'KEY_EXPRESSION_Kor', 'SUB_EXPRESSION',
       'SUB_EXPRESSION_Kor'],
      dtype='object')

In [31]:
print(len(df))

329


In [32]:
QuizMake(df, output_filename = 'PreviewQuiz_Structure_High_batch.jsonl')

JSONL 파일 생성 완료 : PreviewQuiz_Structure_High_batch.jsonl-329
