# Preview Quiz prompt

In [1]:
from pydantic import BaseModel
from openai import OpenAI
from openai.lib._parsing import type_to_response_format_param
import json
from getpass import getpass

openai_api_key = getpass("OPENAI_API_KEY")
client = OpenAI(api_key=openai_api_key)

In [12]:
from jinja2 import Template

prompt_template = Template("""
[PURPOSE]
Please translate the content in SUB_EXPRESSION naturally into Korean.  
Make it sound casual, like talking to a friend.

[Example]
```
Input: 
    {
    "SUB_EXPRESSION": "A: What is this? B: It's a ~. A: What is that? B: It's a ~."
  }

Output:
    {
    "sub_expression_kor": "A: 이것은 뭐야? B: 이것은 ~야. A: 저것은 뭐야? B: 저것은 ~야."
  }
}
```
```
Input: 
    {
    "SUB_EXPRESSION": "A: I enjoy ~. A: I'm fond of ~."
  }

Output:
    {
    "sub_expression_kor": "A: 나는 ~을 즐겨. A: 나는 ~을 좋아해."
  }
}
```
```
Input: 
    {
    "SUB_EXPRESSION": "It’s no big deal. Don’t mention it."
  }

Output:
    {
    "sub_expression_kor": "별거 아니야. 천만에."
  }
}
```                        


[Input]
```
DESCRIPTION: {{description}}
KEY EXPRESSION: {{key_expression}}
SUB EXPRESSION: {{sub_expression}}
```

""")

In [13]:
prompt = prompt_template.render(
    sub_expression = [
        "A: Hi, I’m (). I come from (). B: Nice to meet you. A: Hello, my name is (). I’m from (). B: Great to meet you."
    ]
)

In [14]:
print(prompt)


[PURPOSE]
Please translate the content in SUB_EXPRESSION naturally into Korean.  
Make it sound casual, like talking to a friend.

[Example]
```
Input: 
    {
    "SUB_EXPRESSION": "A: What is this? B: It's a ~. A: What is that? B: It's a ~."
  }

Output:
    {
    "sub_expression_kor": "A: 이것은 뭐야? B: 이것은 ~야. A: 저것은 뭐야? B: 저것은 ~야."
  }
}
```
```
Input: 
    {
    "SUB_EXPRESSION": "A: I enjoy ~. A: I'm fond of ~."
  }

Output:
    {
    "sub_expression_kor": "A: 나는 ~을 즐겨. A: 나는 ~을 좋아해."
  }
}
```
```
Input: 
    {
    "SUB_EXPRESSION": "It’s no big deal. Don’t mention it."
  }

Output:
    {
    "sub_expression_kor": "별거 아니야. 천만에."
  }
}
```                        


[Input]
```
DESCRIPTION: 
KEY EXPRESSION: 
SUB EXPRESSION: ['A: Hi, I’m (). I come from (). B: Nice to meet you. A: Hello, my name is (). I’m from (). B: Great to meet you.']
```



In [15]:
class Translate(BaseModel):
    sub_expression_kor : str

In [16]:
response_format = type_to_response_format_param(Translate)

In [17]:
response_format

{'type': 'json_schema',
 'json_schema': {'schema': {'properties': {'sub_expression_kor': {'title': 'Sub Expression Kor',
     'type': 'string'}},
   'required': ['sub_expression_kor'],
   'title': 'Translate',
   'type': 'object',
   'additionalProperties': False},
  'name': 'Translate',
  'strict': True}}

In [18]:
def completion(prompt : str) -> str:
    response = client.beta.chat.completions.parse(
        model = 'o3-mini',
        reasoning_effort='low',
        messages = [
            {"role" : "system", "content" : "You will provide a Korean translation."},
            {"role" : "user", "content" : prompt}
        ],
        response_format = Translate,
    )
    return response.choices[0].message.parsed

In [19]:
response = completion(prompt)

In [20]:
response

Translate(sub_expression_kor='A: 안녕, 난 ()야. ()에서 왔어. B: 만나서 반가워. A: 안녕, 내 이름은 ()야. ()에서 왔어. B: 만나서 정말 반가워.')

In [21]:
response_output = json.dumps(response.dict(), ensure_ascii=False, indent = 4)
print(response_output)

{
    "sub_expression_kor": "A: 안녕, 난 ()야. ()에서 왔어. B: 만나서 반가워. A: 안녕, 내 이름은 ()야. ()에서 왔어. B: 만나서 정말 반가워."
}


In [22]:
print(response)

sub_expression_kor='A: 안녕, 난 ()야. ()에서 왔어. B: 만나서 반가워. A: 안녕, 내 이름은 ()야. ()에서 왔어. B: 만나서 정말 반가워.'


# for문 돌려서 Batch API jsonl 파일 형식 만들기

확인 사항
- A1, A2, B1, B2, C1, C2 CEFR level 별로 퀴즈만 만들어지면 되는건가?
- 여섯개의 레벨 모두 만들어야하는지, 일부 레벨(A1, B1, C1)만 만들면 되는지도 결정

1. 커리큘럼 정보 csv 파일 읽어오기
2. for문으로 한줄씩 순회하면서 프롬프트 만들어주기들어주기
3. 렌더링한 프롬프트 파일에 써주기 (처음엔 조금만, ex. 5개 커리큘럼)
4. openai batch API 요청해서 task_id 받아오기 
5. batch task 완료되면 결과 받아오고 검수하기

In [29]:
def Translate(data, output_filename):
    jsonl_data = []

    for i in range(len(data)):
        prompt = prompt_template.render(
            sub_expression = data.loc[i,"Sub Expression (영어)"]
        )

        quiz_request = {
            "custom_id" : f"request-{i+1}",
            "method" : "POST",
            "url" : "/v1/chat/completions",
            "body" : {
                "model" : "o3-mini-2025-01-31",
                "messages" : [
                    {"role": "system", "content": "You will provide a Korean translation."},
                    {"role": "user", "content": prompt}
                ],
                "response_format" : response_format
            }
        }

        jsonl_data.append(quiz_request)

        with open(output_filename, 'w', encoding='utf-8') as jsonl_file:
            for item in jsonl_data:
                jsonl_file.write(json.dumps(item, ensure_ascii=False) + '\n')

    print(f'JSONL 파일 생성 완료 : {output_filename}-{i+1}')

## 초등학교 jsonl 파일

In [None]:
import pandas as pd

df = pd.read_csv('MergedData_Mid.csv')
function_df = df.loc[df['구분'] == 'function']
structure_df = df.loc[df['구분'] == 'structure']

drop_col = ['textbook ID', '학년', '출판사', '교육과정', 'Lesson', '구분', 'Title',
       'DESCRIPTION', 'Key Expression (영어)', 'Key Expression (한국어)','학습목표', 'Quiz 1',
       'Korean 1', 'Option 1-1', 'Option 1-2', 'Option 1-3', 'Answer 1',
       'Index 1', 'Quiz 2', 'Korean 2', 'Option 2-1', 'Option 2-2',
       'Option 2-3', 'Answer 2', 'Index 2', '샘플 스크립트 (영어)', '샘플 스크립트 (한국어)', 'Sub Expression (한국어)'
       ]
function_df.drop(columns=drop_col, inplace=True)

print(len(function_df))

494


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  function_df.drop(columns=drop_col, inplace=True)


In [26]:
function_df.head()

Unnamed: 0,Sub Expression (영어)
0,"A: Hi, I’m (). I come from ().\n B: Nice to me..."
1,A: What do you like the most?\n B: I enjoy ()....
2,A: What are your plans for ~? B: I’m planning ...
3,You can find () ().\n () has ().
4,A: It’s a good idea to ().\n B: I’ll give it a...


In [31]:
Translate(function_df, output_filename = 'Translate_Mid_batch.jsonl')

JSONL 파일 생성 완료 : Translate_Mid_batch.jsonl-494
