# Elem Example Sentence Prompt

In [53]:
import pandas as pd
from openai import OpenAI
from jinja2 import Template
from pydantic import BaseModel
from openai.lib._parsing import type_to_response_format_param
import json
from getpass import getpass

In [2]:
openai_api_key = getpass("OPENAI_API_KEY")
client = OpenAI(api_key=openai_api_key)

# 초등 prompt 설정

In [3]:
prompt_voca_sentence_elem_template = Template("""
[PERSONA]
You are going to be given a set of English words and some of their Korean meaning. As an elementary school English teacher, generate English sentences using each of those words, and provide Korean translation, too. You must refer to the given Korean meaning to generate the examples. Overall sentences must be very easy and kids-friendly. You must refer to the given Korean meaning to generate the examples.


[POLICY]
1. A sentence must not be over 6 words.
2. Separate the sentence with a tab in between.
3. For Korean translation, use '~요' at the end of each translated sentence to soften the tone. Make it natural.
4. The whole data must strictly follow CEFR A1 level.

[EXAMPLE]
```
Input:
    {
    "voca_NO" : 123
    "WordText" : "cat",
    "WordMeaning" : "고양이",
    "PoSText" : "명사"
    }

Output:
    {
     "example_sentence" : "The cat sleeps on the bed.",
     "example_sentence_kor" : "고양이가 침대에서 자요."
    }
```                           
                           
[Input]
```
voca_NO : {{voca_NO}}
WordText : {{WordText}}
WordMeaning : {{WordMeaning}}
PoSText : {{PoSText}}
```                   
"""                                
)


- 예시 출력

In [4]:
prompt_voca_sentence_elem = prompt_voca_sentence_elem_template.render(
    voca_NO = "123",
    WordText = "everyone",
    WordMeaning = "모든 사람",
    PoSText = "대명사"
)

In [5]:
print(prompt_voca_sentence_elem)


[PERSONA]
You are going to be given a set of English words and some of their Korean meaning. As an elementary school English teacher, generate English sentences using each of those words, and provide Korean translation, too. You must refer to the given Korean meaning to generate the examples. Overall sentences must be very easy and kids-friendly. You must refer to the given Korean meaning to generate the examples.


[POLICY]
1. A sentence must not be over 6 words.
2. Separate the sentence with a tab in between.
3. For Korean translation, use '~요' at the end of each translated sentence to soften the tone. Make it natural.
4. The whole data must strictly follow CEFR A1 level.

[EXAMPLE]
```
Input:
    {
    "voca_NO" : 123
    "WordText" : "cat",
    "WordMeaning" : "고양이",
    "PoSText" : "명사"
    }

Output:
    {
     "example_sentence" : "The cat sleeps on the bed.",
     "example_sentence_kor" : "고양이가 침대에서 자요."
    }
```                           
                           
[Input]


In [6]:
class ExampleSentence(BaseModel):
    example_sentence : str
    example_sentence_kor : str

In [7]:
response_format = type_to_response_format_param(ExampleSentence)

In [8]:
response_format

{'type': 'json_schema',
 'json_schema': {'schema': {'properties': {'example_sentence': {'title': 'Example Sentence',
     'type': 'string'},
    'example_sentence_kor': {'title': 'Example Sentence Kor',
     'type': 'string'}},
   'required': ['example_sentence', 'example_sentence_kor'],
   'title': 'ExampleSentence',
   'type': 'object',
   'additionalProperties': False},
  'name': 'ExampleSentence',
  'strict': True}}

In [9]:
def completion(prompt : str, response_format : str) -> str:
    response = client.beta.chat.completions.parse(
        model = 'o3-mini',
        reasoning_effort = 'low',
        messages = [
            {"role" : "system", "content" : "Generate easy example sentences."},
            {"role" : "user", "content" : prompt}
        ],
        response_format = response_format
    )
    return response.choices[0].message.parsed

In [10]:
response = completion(prompt_voca_sentence_elem, response_format = ExampleSentence)

APIConnectionError: Connection error.

In [None]:
print(response)

example_sentence='Everyone plays in the park.' example_sentence_kor='모든 사람이 공원에서 놀아요.'


In [None]:
response_output = json.dumps(response.dict(), ensure_ascii=False, indent=4)
print(response_output)

{
    "example_sentence": "Everyone plays in the park.",
    "example_sentence_kor": "모든 사람이 공원에서 놀아요."
}


# data 추가

In [42]:
file_name = "Final_ExampleSentence_Voca_Elem.csv"
df = pd.read_csv(file_name)
df.head(5)

Unnamed: 0,구분,voca NO,grade ID,grade NO,WordText,WordMeaning,PoS,품사,image ID,WordID(SoundID),WordSymbol,result_json,example_english,example_korean,단어 여부 검수,초등800 포함여부
0,초등800,10000,초3,1,a,하나의,10,관사,10743,6216,[ə],"{'example_sentence_eng': 'He has a car.', 'exa...",He has a car.,그는 차가 있어요.,True,True
1,초등800,10001,초3,1,about,~에 대하여,6,전치사,9612,3825,[əbáut],{'example_sentence_eng': 'We talk about the ga...,We talk about the game.,우리는 게임에 대하여 이야기해요.,True,True
2,초등800,10002,초3,1,about,대략,2,명사,884,16729,[əbáut],{'example_sentence_eng': 'I have about ten fri...,I have about ten friends.,나는 대략 열 명의 친구가 있어요.,True,True
3,초등800,10003,초등,5,above,위에,6,전치사,2574,4278,[əbʌ́v],{'example_sentence_eng': 'The bird is above th...,The bird is above the tree.,새가 나무 위에 있어요.,True,True
4,초등800,10004,초6,4,across,맞은편에,6,전치사,5589,16730,[əkrɔ́:s; əkrás],{'example_sentence_eng': 'The ball goes across...,The ball goes across street.,공이 길 맞은편에 가요.,True,True


In [43]:
len(df)

2453

# jsonl 파일 변환

In [44]:
def VocaQuizMake(data, output_filename):
    jsonl_data = []

    for i in range(len(data)):
        prompt = prompt_voca_sentence_elem_template.render(
            id = data.loc['voca NO'],
            word = data.loc[i,"WordText"],
            meaning = data.loc[i,"WordMeaning"],
            part_of_speech = data.loc[i,"품사"]
        )

        quiz_request = {
            "custom_id" : f"request-{i+1}",
            "method" : "POST",
            "url" : "/v1/chat/completions",
            "body" : {
                "model" : "o3-mini-2025-01-31",
                "messages" : [
                    {"role": "system", "content": "Generate easy example sentences."},
                    {"role": "user", "content": prompt}
                ],
                "response_format" : response_format
            }
        }

        jsonl_data.append(quiz_request)

        with open(output_filename, 'w', encoding='utf-8') as jsonl_file:
            for item in jsonl_data:
                jsonl_file.write(json.dumps(item, ensure_ascii=False) + '\n')

    print(f'JSONL 파일 생성 완료 : {output_filename}-{i+1}')

# 범용 데이터 전처리

In [45]:
df.columns

Index(['구분', 'voca NO', 'grade ID', 'grade NO', 'WordText', 'WordMeaning',
       'PoS', '품사', 'image ID', 'WordID(SoundID)', 'WordSymbol', 'result_json',
       'example_english', 'example_korean', '단어 여부 검수', '초등800 포함여부'],
      dtype='object')

In [46]:
drop_col = ['구분', 'grade ID', 'grade NO','PoS', 'image ID', 'WordID(SoundID)', 'WordSymbol','example_english', 'example_korean', '초등800 포함여부', 'result_json', '단어 여부 검수']
df.drop(columns=drop_col, inplace=True)
df.head(3)

Unnamed: 0,voca NO,WordText,WordMeaning,품사
0,10000,a,하나의,관사
1,10001,about,~에 대하여,전치사
2,10002,about,대략,명사


In [47]:
df = df.rename({"품사" : "PoSText",
                "voca NO" : "id"}, axis=1)
df.head(3)

Unnamed: 0,id,WordText,WordMeaning,PoSText
0,10000,a,하나의,관사
1,10001,about,~에 대하여,전치사
2,10002,about,대략,명사


In [48]:
print(f"총 사이즈 {len(df)}")
df1 = df[:500]
df2 = df[500:1000]
df3 = df[1000:1500]
df4 = df[1500:2000]
df5 = df[2000:2433]
df6 = df[2433:]

print(f"df1 사이즈 {len(df1)}")
print(f"df2 사이즈 {len(df2)}")
print(f"df3 사이즈 {len(df3)}")
print(f"df4 사이즈 {len(df4)}")
print(f"df5 사이즈 {len(df5)}")
print(f"df6 사이즈 {len(df6)}")

총 사이즈 2453
df1 사이즈 500
df2 사이즈 500
df3 사이즈 500
df4 사이즈 500
df5 사이즈 433
df6 사이즈 20


In [49]:
df1.to_csv("df1.csv", index=False, encoding='utf-8-sig')
df2.to_csv("df2.csv", index=False, encoding='utf-8-sig')
df3.to_csv("df3.csv", index=False, encoding='utf-8-sig')
df4.to_csv("df4.csv", index=False, encoding='utf-8-sig')
df5.to_csv("df5.csv", index=False, encoding='utf-8-sig')
df6.to_csv("df6.csv", index=False, encoding='utf-8-sig')

In [52]:
df6.head(15)

Unnamed: 0,id,WordText,WordMeaning,PoSText
2433,12433,draw a picture,그림을 그리다,숙어/관용어
2434,12434,ink,잉크,명사
2435,12435,o'clock,시,명사
2436,12436,play badminton,배드민턴을 하다,숙어/관용어
2437,12437,play baseball,야구를 하다,숙어/관용어
2438,12438,play basketball,농구를 하다,숙어/관용어
2439,12439,play soccer,축구를 하다,숙어/관용어
2440,12440,raise,"올리다,일으키다",동사
2441,12441,raise,돈을 모으다,동사
2442,12442,raise,키우다,동사
