# Structure pormpt

In [109]:
from pydantic import BaseModel
from openai import OpenAI
from openai.lib._parsing import type_to_response_format_param
import json
from getpass import getpass
from jinja2 import Template

openai_api_key = getpass('OPENAI_API_KEY')
client = OpenAI(api_key=openai_api_key)

In [110]:
prompt_template = Template("""
You are going to be given a set of data. Refine it according to the following instruction.

[PERSONA]
```
You are an English teacher. Your students are basic English learners, Korean middle school students.
Korean middle school students whose CEFR level is A2.
You must produce a set of material in order to teach them effectively.
The material should consist of [DESCRIPTION], [KEY_EXPRESSION], [SUB_EXPRESSION].
```
                           
[DESCRIPTION]
```
You must provide the "given description" as [DESCRIPTION].
e.g) • be동사의 긍정문과 부정문
    • 동사의 과거형 
```

[KEY_EXPRESSION]
```
Understand the sentence structure and create a sentence using the given grammar and the given sentences. Make it different from the given sentence while keeping the sentence structure. Stay related to 'Title' topic, but do not explicitly mention it.
Provide Korean translation using '~해요' tone. Make it natural.
e.g) Input: 'Title	Spend Smart, Save Smart'
	'동사의 과거형	We sold many things at the flea market.'
	 Output: 'I spent 5000 won to buy this diary.'
					 '이 다이어리를 사기 위해 5000원을 썼어요.'
```
                           
[SUB_EXPRESSION]
```
You must generate two new sentences using the grammar mentioned. Stay related to 'Title' topic, but do not explicitly mention it. Keep the word count and the difficulty similar to the given CEFR level.
Provide Korean translation using '~해요' tone. Make it natural.
e.g) Input: 'Title	Spend Smart, Save Smart'
	 '동사의 과거형	We sold many things at the flea market.'
	 Output: 'He bought fresh vegetables from the local farmer.'
			 'She picked up a pretty hat at the flea market.'
			 '그는 지역 농부에게서 신선한 야채를 샀어요'
			 '그녀는 벼룩시장에서 예쁜 모자를 집어들었어요.'
```
                           
[EXAMPLE]
```
Input:
    {
    "TEXTBOOK_ID" : "701",
    "GRADE" : "중1",
    "PUBLISHER" : "능률(김)",
    "EDUCATION" : "2015",
    "LESSON" : "1"                    
    "TITLE" : "Follow Your Dreams",
    "STRUCTURES" : "비교급, 최상급 The other team's players ran faster than us."
    }

Output:
    {
    "TEXTBOOK_ID" : "701",
    "GRADE" : "중1",
    "PUBLISHER" : "능률(김)",
    "EDUCATION" : "2015",
    "LESSON" : "1"                    
    "TITLE" : "Follow Your Dreams",
    "DESCRIPTION" : "비교급, 최상급",
    "KEY_EXPRESSION" : "I have practiced harder than anyone.",
    "KEY_EXPRESSION_Kor" : "나는 누구보다 열심히 연습했어요.",                           
    "SUB_EXPRESSION" : "He always practices longer than me. Following dreams is the best choice."
    "SUB_EXPRESSION_Kor" : "그는 항상 저보다 더 오래 연습해요. 꿈을 좇는 것이 최선의 선택이에요."
'''
                           
[Input]
```
TEXTBOOK_ID : {{textbook_id}}
GRADE : {{grade}}
PUBLISHER : {{publisher}}
EDUCATION : {{education}}
LESSON : {{lesson}}
TITLE: {{title}}
STRUCTURES: {{structures}}
```
                           
"""
)

In [111]:
prompt = prompt_template.render(
    textbook_id = "801",
    grade = "중2",
    publisher = "능률(김)",
    education = "2015",
    lesson = "1",
    title = "Express Yourself",
    structures = "주어 역할을 하는 동명사 Taking selfies is part of daily life for many teens."
)

In [112]:
print(prompt)


You are going to be given a set of data. Refine it according to the following instruction.

[PERSONA]
```
You are an English teacher. Your students are basic English learners, Korean middle school students.
Korean middle school students whose CEFR level is A2.
You must produce a set of material in order to teach them effectively.
The material should consist of [DESCRIPTION], [KEY_EXPRESSION], [SUB_EXPRESSION].
```
                           
[DESCRIPTION]
```
You must provide the "given description" as [DESCRIPTION].
e.g) • be동사의 긍정문과 부정문
    • 동사의 과거형 
```

[KEY_EXPRESSION]
```
Understand the sentence structure and create a sentence using the given grammar and the given sentences. Make it different from the given sentence while keeping the sentence structure. Stay related to 'Title' topic, but do not explicitly mention it.
Provide Korean translation using '~해요' tone. Make it natural.
e.g) Input: 'Title	Spend Smart, Save Smart'
	'동사의 과거형	We sold many things at the flea market.'
	 Outp

In [113]:
class Structure(BaseModel):
    TEXTBOOK_ID :int
    GRADE : str
    PUBLISHER : str
    EDUCATION : int
    LESSON : int
    TITLE: str
    DESCRIPTION : str
    KEY_EXPRESSION : str
    KEY_EXPRESSION_Kor : str
    SUB_EXPRESSION : str
    SUB_EXPRESSION_Kor : str

In [114]:
response_format = type_to_response_format_param(Structure)

In [115]:
response_format

{'type': 'json_schema',
 'json_schema': {'schema': {'properties': {'TEXTBOOK_ID': {'title': 'Textbook Id',
     'type': 'integer'},
    'GRADE': {'title': 'Grade', 'type': 'string'},
    'PUBLISHER': {'title': 'Publisher', 'type': 'string'},
    'EDUCATION': {'title': 'Education', 'type': 'integer'},
    'LESSON': {'title': 'Lesson', 'type': 'integer'},
    'TITLE': {'title': 'Title', 'type': 'string'},
    'DESCRIPTION': {'title': 'Description', 'type': 'string'},
    'KEY_EXPRESSION': {'title': 'Key Expression', 'type': 'string'},
    'KEY_EXPRESSION_Kor': {'title': 'Key Expression Kor', 'type': 'string'},
    'SUB_EXPRESSION': {'title': 'Sub Expression', 'type': 'string'},
    'SUB_EXPRESSION_Kor': {'title': 'Sub Expression Kor', 'type': 'string'}},
   'required': ['TEXTBOOK_ID',
    'GRADE',
    'PUBLISHER',
    'EDUCATION',
    'LESSON',
    'TITLE',
    'DESCRIPTION',
    'KEY_EXPRESSION',
    'KEY_EXPRESSION_Kor',
    'SUB_EXPRESSION',
    'SUB_EXPRESSION_Kor'],
   'title': 'Str

In [116]:
def completion(prompt : str) -> str:
    response = client.beta.chat.completions.parse(
        model = 'o3-mini-2025-01-31',
        reasoning_effort='low',
        messages = [
            {"role" : "system", "content" : "You are an English teacher. Your students are basic English learners, Korean middle school students."},
            {"role" : "system", "content" : "You must produce a set of material in order to teach them effectively."},
            {"role" : "system", "content" : "The material should consist of [DESCRIPTION], [KEY_EXPRESSION], [SUB_EXPRESSION]."},
            {"role" : "user", "content" : prompt}
        ],
        response_format = Structure,
    )
    return response.choices[0].message.parsed

In [117]:
response = completion(prompt)

In [118]:
response_output = json.dumps(response.dict(), ensure_ascii=False, indent=2)
print(response_output)

{
  "TEXTBOOK_ID": 801,
  "GRADE": "중2",
  "PUBLISHER": "능률(김)",
  "EDUCATION": 2015,
  "LESSON": 1,
  "TITLE": "Express Yourself",
  "DESCRIPTION": "주어 역할을 하는 동명사",
  "KEY_EXPRESSION": "Drawing sketches is a way of expressing feelings.",
  "KEY_EXPRESSION_Kor": "스케치를 그리는 것이 감정을 표현하는 방법이에요.",
  "SUB_EXPRESSION": "Writing diaries is how some people share their thoughts. Cooking meals is a fun way of showing creativity.",
  "SUB_EXPRESSION_Kor": "일기를 쓰는 것이 어떤 사람들이 생각을 나누는 방법이에요. 맛있는 음식을 만드는 것이 창의력을 보여주는 재미있는 방법이에요."
}


# for문 돌려서 Batch API jsonl 파일 형식 만들기

- jsonl 파일 만드는 함수

In [119]:
def StructureCuriMake(data , output_filename : str) -> str:
    jsonl_data = []

    for i in range(len(data)):
        prompt = prompt_template.render(
            textbook_id = data.loc[i,"textbook ID"],
            grade = data.loc[i,"학년"],
            publisher = data.loc[i,"출판사"],
            education = data.loc[i,"교육과정"],
            lesson = data.loc[i,"Lesson"],
            title = data.loc[i,"Title"],
            structures = data.loc[i,"Structures"],
        )

        structure_request = {
            "custom_id" : f"request-{i+1}",
            "method" : "POST",
            "url" : "/v1/chat/completions",
            "body" : {
                "model" : "o3-mini-2025-01-31",
                "reasoning_effort" : "low",
                "messages" : [
                    {"role" : "system", "content" : "You are an English teacher. Your students are basic English learners, Korean middle school students."},
                    {"role" : "system", "content" : "You must produce a set of material in order to teach them effectively."},
                    {"role" : "system", "content" : "The material should consist of [DESCRIPTION], [KEY_EXPRESSION], [SUB_EXPRESSION]."},
                    {"role": "user", "content": prompt}
                ],
                "response_format" : response_format
            }
        }

        jsonl_data.append(structure_request)

    with open(output_filename, 'w', encoding='utf-8') as jsonl_file:
        for item in jsonl_data:
            jsonl_file.write(json.dumps(item, ensure_ascii=False) + '\n')

    print(f'JSONL 파일 생성 완료 : {output_filename}-{i+1}')

- 기존 raw_data 불러오기

In [120]:
import pandas as pd

raw_data = pd.read_csv("Structure_Extract_Mid.csv")
data_Mid1 = raw_data.loc[raw_data['학년'] == '중1']
data_Mid2 = raw_data.loc[raw_data['학년'] == '중2']
data_Mid3 = raw_data.loc[raw_data['학년'] == '중3']

# index 초기화화
data_Mid1 = data_Mid1.reset_index(drop=True)
data_Mid2 = data_Mid2.reset_index(drop=True)
data_Mid3 = data_Mid3.reset_index(drop=True)

display(data_Mid1.head(3))
display(data_Mid2.head(3))
display(data_Mid3.head(3))

Unnamed: 0.1,Unnamed: 0,textbook ID,학년,출판사,교육과정,Lesson,Title,Structures
0,0,701,중1,능률(김),2015,1,Welcome to My World,be동사의 긍정문과 부정문 He is from France. She is not g...
1,1,701,중1,능률(김),2015,2,Discover Your Culture,현재진행형 I'm enjoying my trip to Korea.
2,2,701,중1,능률(김),2015,3,"Spend Smart, Save Smart",동사의 과거형 We sold many things at the flea market.


Unnamed: 0.1,Unnamed: 0,textbook ID,학년,출판사,교육과정,Lesson,Title,Structures
0,7,801,중2,능률(김),2015,1,Express Yourself,주어 역할을 하는 동명사 Taking selfies is part of daily ...
1,8,801,중2,능률(김),2015,2,"Eat Right, Be Happy",주격 관계대명사 What do students who live in other co...
2,9,801,중2,능률(김),2015,3,Understand the World,"현재완료 For a long time, Koreans have thought tha..."


Unnamed: 0.1,Unnamed: 0,textbook ID,학년,출판사,교육과정,Lesson,Title,Structures
0,14,901,중3,능률(김),2015,1,A Life Full of Experiences,현재완료진행형 I have been writing my shopping lists ...
1,15,901,중3,능률(김),2015,2,Take Care of Yourself,선행사를 부연 설명하는 계속적 용법의 관계사 Walnuts also have wri...
2,16,901,중3,능률(김),2015,3,"Always Aware, Always Prepared","과거완료 One night in February, after I had gone t..."


- 함수를 이용해서 raw_data를 jsonl 파일로 변환

In [121]:
StructureCuriMake(data_Mid1, output_filename = 'Structure_Mid1_batch.jsonl')
StructureCuriMake(data_Mid2, output_filename = 'Structure_Mid2_batch.jsonl')
StructureCuriMake(data_Mid3, output_filename = 'Structure_Mid3_batch.jsonl')

JSONL 파일 생성 완료 : Structure_Mid1_batch.jsonl-207
JSONL 파일 생성 완료 : Structure_Mid2_batch.jsonl-144
JSONL 파일 생성 완료 : Structure_Mid3_batch.jsonl-142


In [122]:
import json

with open("Structure_Mid2_batch.jsonl","r", encoding="utf-8") as f:
    for line in f:
        mid1_json_data = json.loads(line)
        # print(json.dumps(mid1_json_data, indent=2, ensure_ascii=False))
        print(json.dumps(mid1_json_data['body'], indent=2, ensure_ascii=False))
        break

{
  "model": "o3-mini-2025-01-31",
  "reasoning_effort": "low",
  "messages": [
    {
      "role": "system",
      "content": "You are an English teacher. Your students are basic English learners, Korean middle school students."
    },
    {
      "role": "system",
      "content": "You must produce a set of material in order to teach them effectively."
    },
    {
      "role": "system",
      "content": "The material should consist of [DESCRIPTION], [KEY_EXPRESSION], [SUB_EXPRESSION]."
    },
    {
      "role": "user",
      "content": "\nYou are going to be given a set of data. Refine it according to the following instruction.\n\n[PERSONA]\n```\nYou are an English teacher. Your students are basic English learners, Korean middle school students.\nKorean middle school students whose CEFR level is A2.\nYou must produce a set of material in order to teach them effectively.\nThe material should consist of [DESCRIPTION], [KEY_EXPRESSION], [SUB_EXPRESSION].\n```\n                       

- sample data 만들기

In [123]:
sample = data_Mid1[:10]
StructureCuriMake(sample, output_filename='Structure_sample_batch.jsonl')

JSONL 파일 생성 완료 : Structure_sample_batch.jsonl-10
