# Voca Quiz Mid and High prompt

In [1]:
from pydantic import BaseModel
from openai import OpenAI
from openai.lib._parsing import type_to_response_format_param
import json
from getpass import getpass

openai_api_key = getpass("OPENAI_API_KEY")
client = OpenAI(api_key=openai_api_key)

In [66]:
from jinja2 import Template

prompt_template = Template("""
[PURPOSE]
```
You are going to generate fill-in-the-blank quiz and its "korean_translation" for vocabularies based on the given input.
The students will guess the answer by reading the "korean_translation".
```

[QUIZ GUIDELINES]
```
- You must create one quizzes per word.
- A quiz sentence must not be over 8 words.
- You must put blanks on the GIVEN word, NOWHERE ELSE.
- You must put blanks on the matching part of "korean_translation".
- There must not be any English in the "korean_translation". 
- You must choose randomly ONLY ONE type of hints.
- You must not put any blanks in "korean_translation" but give full sentence. 
- The overall quizzes must be in the level of CEFR A1 to A2.
```

[HINT 1: ALPHABET]
```
- As a hint, provide the first letter of the word.
```

[HINT 2: UNSCRAMBLE]
```
- As a hint, scramble the letters of the word.
```
		
[HINT 3: LETTER]
```
- This only applies to a word that is under 8 letters, and non-phrases. 
- As a hint, provide the number of the letters in the word.
```

[HINT 4: EXPLAIN]
```
- As a hint, explain the word without mentioning it. W
- hen you explain, you must make sure your explanation is CEFR A2 level or easier.
```

[POLICY]
```
Each quiz needs:
- A blank to test the word. You must not include any grammatical suffix if any.
- Shuffle the hint types to prevent getting boring.
```

[Example : HINT 1]
```
Input: 
    {
    "WordText": "accent",
    "WordMeaning": "강세",
    "PoSText": "명사"
  }

Output:
    {
    "voca_quizzes":
      {
        "fill_in_blank_quiz": "She speaks with a unique ___.",
        "korean_translation": "그녀는 독특한 강세로 말해요.",
        "answer": "accent",
      },
    "hint": 
	    {
		    "hint_eng": "It starts with 'a'!"
		    "hint_kor": "b로 시작하는 단어예요!"
		  }

}
```

[Example : HINT 2]
```
Input: 
    {
    "WordText": "command",
    "WordMeaning": "명령하다",
    "PoSText": "동사"
  }

Output:
    {
    "voca_quizzes":
      {
        "fill_in_blank_quiz": "The officer ___ed the soldiers to move forward.",
        "korean_translation": "장교는 군인들에게 전진하라고 명령했어요.",
        "answer": "command",
      },
    "hint": 
	    {
		    "hint_eng": "Rearrange these letters!: O A N M M C D"
		    "hint_kor": "다음 글자들을 재배열 해보세요!: O A N M M C D"
		  }

}
```
[Example : HINT 3]
```
Input: 
    {
    "WordText": "young",
    "WordMeaning": "(동물의) 새끼",
    "PoSText": "명사"
  }

Output:
    {
    "voca_quizzes":
      {
        "fill_in_blank_quiz": "The ___ lamb stayed near its mother.",
        "korean_translation": "어미 양 옆에 새끼가 있었어요.",
        "answer": "young",
      },
    "hint": 
	    {
		    "hint_eng": "It has 5 letters!"
		    "hint_kor": "5글자예요!"
		  }

}
```

[Example : HINT 4]
```
Input: 
    {
    "WordText": "sunglasses",
    "WordMeaning": "선글라스",
    "PoSText": "명사"
  }

Output:
    {
    "voca_quizzes":
      {
        "fill_in_blank_quiz": "She wears ____ on sunny days.",
        "korean_translation": "그녀는 맑은 날에 선글라스를 써요.",
        "answer": "sunglasses",
      },
    "hint": 
	    {
		    "hint_eng": "Glasses that block sunlight."
		    "hint_kor": "햇빛을 막아주는 안경."
		  }

}
```

[Input]
```
WordText : {{WordText}}
WordMeaning : {{WordMeaning}}
PoSText : {{PoSText}}
```
"""
)

In [67]:
prompt = prompt_template.render(
    WordText = "work out",
    WordMeaning = "(일이) 잘 풀리다",
    PoSText = "숙어/관용어"
) 

In [68]:
print(prompt)


[PURPOSE]
```
You are going to generate fill-in-the-blank quiz and its "korean_translation" for vocabularies based on the given input.
The students will guess the answer by reading the "korean_translation".
```

[QUIZ GUIDELINES]
```
- You must create one quizzes per word.
- A quiz sentence must not be over 8 words.
- You must put blanks on the GIVEN word, NOWHERE ELSE.
- You must put blanks on the matching part of "korean_translation".
- There must not be any English in the "korean_translation". 
- You must choose randomly ONLY ONE type of hints.
- You must not put any blanks in "korean_translation" but give full sentence. 
- The overall quizzes must be in the level of CEFR A1 to A2.
```

[HINT 1: ALPHABET]
```
- As a hint, provide the first letter of the word.
```

[HINT 2: UNSCRAMBLE]
```
- As a hint, scramble the letters of the word.
```
		
[HINT 3: LETTER]
```
- This only applies to a word that is under 8 letters, and non-phrases. 
- As a hint, provide the number of the letters i

In [69]:
class VocaQuiz(BaseModel):
    fill_in_blank_quiz : str
    korean_translation : str
    answer : str

class QuizHint(BaseModel):
    hint_eng : str
    hint_kor : str

class QuizResponse(BaseModel):
    voca_quizzes : list[VocaQuiz]
    hint : list[QuizHint]

In [70]:
response_format = type_to_response_format_param(QuizResponse)

In [71]:
response_format

{'type': 'json_schema',
 'json_schema': {'schema': {'$defs': {'QuizHint': {'properties': {'hint_eng': {'title': 'Hint Eng',
       'type': 'string'},
      'hint_kor': {'title': 'Hint Kor', 'type': 'string'}},
     'required': ['hint_eng', 'hint_kor'],
     'title': 'QuizHint',
     'type': 'object',
     'additionalProperties': False},
    'VocaQuiz': {'properties': {'fill_in_blank_quiz': {'title': 'Fill In Blank Quiz',
       'type': 'string'},
      'korean_translation': {'title': 'Korean Translation', 'type': 'string'},
      'answer': {'title': 'Answer', 'type': 'string'}},
     'required': ['fill_in_blank_quiz', 'korean_translation', 'answer'],
     'title': 'VocaQuiz',
     'type': 'object',
     'additionalProperties': False}},
   'properties': {'voca_quizzes': {'items': {'$ref': '#/$defs/VocaQuiz'},
     'title': 'Voca Quizzes',
     'type': 'array'},
    'hint': {'items': {'$ref': '#/$defs/QuizHint'},
     'title': 'Hint',
     'type': 'array'}},
   'required': ['voca_quizzes

In [72]:
def completion(prompt : str) -> str:
    response = client.beta.chat.completions.parse(
        model = 'o3-mini',
        reasoning_effort='low',
        messages = [
            {"role" : "system", "content" : "You will provide a fill-in-the-blank quiz for each given word, a Korean translation, and a hint."},
            {"role" : "user", "content" : prompt}
        ],
        response_format = QuizResponse,
    )
    return response.choices[0].message.parsed

In [79]:
response = completion(prompt)

In [74]:
response

QuizResponse(voca_quizzes=[VocaQuiz(fill_in_blank_quiz='I believe it will ___ soon.', korean_translation='나는 곧 일이 잘 풀릴 거라고 믿어요.', answer='work out')], hint=[QuizHint(hint_eng='Rearrange these letters!: O T W K U R O', hint_kor='다음 글자들을 재배열 해보세요!: O T W K U R O')])

In [80]:
response_output = json.dumps(response.dict(), ensure_ascii=False, indent = 4)
print(response_output)

{
    "voca_quizzes": [
        {
            "fill_in_blank_quiz": "I hope it will ___ soon.",
            "korean_translation": "나는 일이 잘 풀리길 바래요.",
            "answer": "work out"
        }
    ],
    "hint": [
        {
            "hint_eng": "When things succeed with good results.",
            "hint_kor": "일이 좋은 결과를 내며 잘 될 때를 말해요."
        }
    ]
}


# for문 돌려서 Batch API jsonl 파일 형식 만들기

확인 사항
- A1, A2, B1, B2, C1, C2 CEFR level 별로 퀴즈만 만들어지면 되는건가?
- 여섯개의 레벨 모두 만들어야하는지, 일부 레벨(A1, B1, C1)만 만들면 되는지도 결정

1. 커리큘럼 정보 csv 파일 읽어오기
2. for문으로 한줄씩 순회하면서 프롬프트 만들어주기들어주기
3. 렌더링한 프롬프트 파일에 써주기 (처음엔 조금만, ex. 5개 커리큘럼)
4. openai batch API 요청해서 task_id 받아오기 
5. batch task 완료되면 결과 받아오고 검수하기

In [81]:
def VocaQuizMake(data, output_filename):
    jsonl_data = []

    for i in range(len(data)):
        prompt = prompt_template.render(
            WordText = data.loc[i,"WordText"],
            WordMeaning = data.loc[i,"WordMeaning"],
            PoSText = data.loc[i,"품사"]
        )

        quiz_request = {
            "custom_id" : f"request-{i+1}",
            "method" : "POST",
            "url" : "/v1/chat/completions",
            "body" : {
                "model" : "o3-mini-2025-01-31",
                "messages" : [
                    {"role": "system", "content": "You will provide a fill-in-the-blank quiz for each given word, a Korean translation, and a hint."},
                    {"role": "user", "content": prompt}
                ],
                "response_format" : response_format
            }
        }

        jsonl_data.append(quiz_request)

        with open(output_filename, 'w', encoding='utf-8') as jsonl_file:
            for item in jsonl_data:
                jsonl_file.write(json.dumps(item, ensure_ascii=False) + '\n')

    print(f'JSONL 파일 생성 완료 : {output_filename}-{i+1}')

In [82]:
import pandas as pd

df = pd.read_csv('Final_Voca_MidHigh.csv', skiprows=2)
print(len(df))
df1 = df[:1000]
print(len(df1))
df2 = df[1000:2000]
print(len(df2))
df3 = df[2000:3000]
print(len(df3))
df4 = df[3000:4000]
print(len(df4))
df5 = df[4000:5000]
print(len(df5))
df6 = df[5000:6000]
print(len(df6))
df7 = df[6000:]
print(len(df7))

print(f'total : {len(df1) + len(df2) + len(df3) + len(df4) + len(df5) + len(df6) + len(df7)}')

#인덱스 리셋
df2 = df2.reset_index()
df3 = df3.reset_index()
df4 = df4.reset_index()
df5 = df5.reset_index()
df6 = df6.reset_index()
df7 = df7.reset_index()

7251
1000
1000
1000
1000
1000
1000
1251
total : 7251


In [83]:
VocaQuizMake(df1,"VocaQuiz_MidHigh_batch_1.jsonl")
VocaQuizMake(df2,"VocaQuiz_MidHigh_batch_2.jsonl")
VocaQuizMake(df3,"VocaQuiz_MidHigh_batch_3.jsonl")
VocaQuizMake(df4,"VocaQuiz_MidHigh_batch_4.jsonl")
VocaQuizMake(df5,"VocaQuiz_MidHigh_batch_5.jsonl")
VocaQuizMake(df6,"VocaQuiz_MidHigh_batch_6.jsonl")
VocaQuizMake(df7,"VocaQuiz_MidHigh_batch_7.jsonl")

JSONL 파일 생성 완료 : VocaQuiz_MidHigh_batch_1.jsonl-1000
JSONL 파일 생성 완료 : VocaQuiz_MidHigh_batch_2.jsonl-1000
JSONL 파일 생성 완료 : VocaQuiz_MidHigh_batch_3.jsonl-1000
JSONL 파일 생성 완료 : VocaQuiz_MidHigh_batch_4.jsonl-1000
JSONL 파일 생성 완료 : VocaQuiz_MidHigh_batch_5.jsonl-1000
JSONL 파일 생성 완료 : VocaQuiz_MidHigh_batch_6.jsonl-1000
JSONL 파일 생성 완료 : VocaQuiz_MidHigh_batch_7.jsonl-1251
