# Voca 중고딩 예문 양산

In [22]:
from pydantic import BaseModel
from openai import OpenAI
from openai.lib._parsing import type_to_response_format_param
import json
from getpass import getpass

openai_api_key = getpass("OPENAI_API_KEY")
client = OpenAI(api_key=openai_api_key)

In [23]:
from jinja2 import Template

prompt_template = Template("""
You are going to be given a set of English words and Korean meaning. Generate English sentences using each of those words, and provide Korean translation, too. For contextual reference, see the given Korean meaning.

[POLICY]
1. A sentence must be about 15 words.
2. Separate the sentence with a tab in between.
3. For Korean translation, use '~요' at the end of each translated sentence to soften the tone. Make it natural.

[EXAMPLE]
```
Input:
    {
    "WORDTEXT" : "challenge",
    "WORDMEANING" : "난제",
    "POSTEXT" : "명사"
    }

Output:
    {
     "example_sentence" : "This difficult problem was truly a challenge, and I had to think a lot to solve it.",
     "example_sentence_kor" : "이 어려운 문제는 정말 큰 난제였고, 그것을 해결하기 위해 많은 고민을 했어요."
    }
```                           
                           
[Input]
```
WORDTEXT : {{Wordtext}}
WORDMEANING : {{WordMeaning}}
POSTEXT : {{PoSText}}
```                   

""")

In [24]:
prompt = prompt_template.render(
    Wordtext = "alarm",
    WordMeaning = "놀람,경보",
    PoSText = "명사",
)

In [25]:
print(prompt)


You are going to be given a set of English words and Korean meaning. Generate English sentences using each of those words, and provide Korean translation, too. For contextual reference, see the given Korean meaning.

[POLICY]
1. A sentence must be about 15 words.
2. Separate the sentence with a tab in between.
3. For Korean translation, use '~요' at the end of each translated sentence to soften the tone. Make it natural.

[EXAMPLE]
```
Input:
    {
    "WORDTEXT" : "challenge",
    "WORDMEANING" : "난제",
    "POSTEXT" : "명사"
    }

Output:
    {
     "example_sentence" : "This difficult problem was truly a challenge, and I had to think a lot to solve it.",
     "example_sentence_kor" : "이 어려운 문제는 정말 큰 난제였고, 그것을 해결하기 위해 많은 고민을 했어요."
    }
```                           
                           
[Input]
```
WORDTEXT : alarm
WORDMEANING : 놀람,경보
POSTEXT : 명사
```                   



In [26]:
class Example(BaseModel):
    example_sentence : str
    example_sentence_kor : str

In [27]:
response_format = type_to_response_format_param(Example)

In [28]:
response_format

{'type': 'json_schema',
 'json_schema': {'schema': {'properties': {'example_sentence': {'title': 'Example Sentence',
     'type': 'string'},
    'example_sentence_kor': {'title': 'Example Sentence Kor',
     'type': 'string'}},
   'required': ['example_sentence', 'example_sentence_kor'],
   'title': 'Example',
   'type': 'object',
   'additionalProperties': False},
  'name': 'Example',
  'strict': True}}

In [29]:
def completion(prompt : str) -> str:
    response = client.beta.chat.completions.parse(
        model = 'o3-mini',
        reasoning_effort='low',
        messages = [
            {"role" : "system", "content" : "Generate an example sentence for each word."},
            {"role" : "user", "content" : prompt}
        ],
        response_format = Example,
    )
    return response.choices[0].message.parsed

In [30]:
response = completion(prompt)

In [31]:
print(response)

example_sentence='During the morning meeting, the loud alarm unexpectedly startled everyone in the bustling busy office.' example_sentence_kor='아침 회의 중에 시끄러운 경보가 갑자기 울리면서 모든 사무실 직원들을 놀라게 했어요.'


In [32]:
response_output = json.dumps(response.dict(), ensure_ascii=False, indent = 4)
print(response_output)
type(response_output)

{
    "example_sentence": "During the morning meeting, the loud alarm unexpectedly startled everyone in the bustling busy office.",
    "example_sentence_kor": "아침 회의 중에 시끄러운 경보가 갑자기 울리면서 모든 사무실 직원들을 놀라게 했어요."
}


str

# for문 돌려서 Batch API jsonl 파일 형식 만들기

In [46]:
def SentenceMake(data, output_filename):
    jsonl_data = []

    for i in range(len(data)):
        prompt = prompt_template.render(
            Wordtext = data.loc[i,"WordText"],
            WordMeaning = data.loc[i,"WordMeaning"],
            PoSText = data.loc[i,"품사"],
        )

        quiz_request = {
            "custom_id" : f"request-{i+1}",
            "method" : "POST",
            "url" : "/v1/chat/completions",
            "body" : {
                "model" : "o3-mini-2025-01-31",
                "messages" : [
                    {"role": "system", "content": "Generate an example sentence for each word."},
                    {"role": "user", "content": prompt}
                ],
                "response_format" : response_format
            }
        }

        jsonl_data.append(quiz_request)

        with open(output_filename, 'w', encoding='utf-8') as jsonl_file:
            for item in jsonl_data:
                jsonl_file.write(json.dumps(item, ensure_ascii=False) + '\n')

    print(f'JSONL 파일 생성 완료 : {output_filename}-{i+1}')

In [56]:
import pandas as pd

df = pd.read_csv('Voca_MidHigh.csv', skiprows=1)
sample = df[:10]
print(len(df))
df1 = df[:700]
print(len(df1))
df2 = df[700:1400]
print(len(df2))
df3 = df[1400:2100]
print(len(df3))
df4 = df[2100:]
print(len(df4))
print(f'total : {len(df1) +len(df2) + len(df3) + len(df4)}')

#인덱스 리셋
df2 = df2.reset_index()
df3 = df3.reset_index()
df4 = df4.reset_index()

2842
700
700
700
742
total : 2842


In [57]:
SentenceMake(df1,"VocaList_batch_1.jsonl")
SentenceMake(df2,"VocaList_batch_2.jsonl")
SentenceMake(df3,"VocaList_batch_3.jsonl")
SentenceMake(df4,"VocaList_batch_4.jsonl")

JSONL 파일 생성 완료 : VocaList_batch_1.jsonl-700
JSONL 파일 생성 완료 : VocaList_batch_2.jsonl-700
JSONL 파일 생성 완료 : VocaList_batch_3.jsonl-700
JSONL 파일 생성 완료 : VocaList_batch_4.jsonl-742
