# Uploading Batch Input File
 - batch file 보낼 때 너무 많이 보내지 말고 구분해서 보내기

In [2]:
from getpass import getpass

openai_api_key = getpass("OPENAI_API_KEY")

In [3]:
from openai import OpenAI
import json
import pandas as pd

client = OpenAI(api_key=openai_api_key)

In [9]:
batch_input_file = client.files.create(
    file=open("Translate_High_Function_batch.jsonl","rb"),
    purpose="batch"
)

print(batch_input_file)

FileObject(id='file-YNxqCy2Kp4CXpmSmzbkX8Y', bytes=481464, created_at=1739778832, filename='Translate_High_Function_batch.jsonl', object='file', purpose='batch', status='processed', status_details=None)


# Create the Batch

In [10]:
# 업로드한 배치 입력 파일 ID 저장
batch_input_file_id = batch_input_file.id
print(batch_input_file_id)

file-YNxqCy2Kp4CXpmSmzbkX8Y


In [11]:
batch_info = client.batches.create(
    input_file_id = batch_input_file_id,
    endpoint = "/v1/chat/completions", # 챗봇 api
    completion_window = "24h", # 현재는 이 기능만 가능
    metadata = {
        "description": "translate"
    }
)

In [12]:
response_json = json.loads(batch_info.json())
response_output = json.dumps(response_json, ensure_ascii=False, indent = 2)
print(response_output)

{
  "id": "batch_67b2eb1a730081908d18a26c22462d60",
  "completion_window": "24h",
  "created_at": 1739778842,
  "endpoint": "/v1/chat/completions",
  "input_file_id": "file-YNxqCy2Kp4CXpmSmzbkX8Y",
  "object": "batch",
  "status": "validating",
  "cancelled_at": null,
  "cancelling_at": null,
  "completed_at": null,
  "error_file_id": null,
  "errors": null,
  "expired_at": null,
  "expires_at": 1739865242,
  "failed_at": null,
  "finalizing_at": null,
  "in_progress_at": null,
  "metadata": {
    "description": "translate"
  },
  "output_file_id": null,
  "request_counts": {
    "completed": 0,
    "failed": 0,
    "total": 0
  }
}


# Checking the Status of Batch
 - validating : the input file is being validated before the batch can begin
 - failed : the input file has failed the validation process
 - in_progress : the input file was successfully validated and the batch is currently being run
 - finalizing : the batch has completed and the results are being prepared
 - completed : the batch has been completed and the results are ready
 - expired : the batch was not able to be completed within the 24-hour time window
 - cancelling : the batch is being cancelled (may take up to 10 minutes)
 - cancelled : the batch was cancelled

- batch_id = 'batch_67b2ea20b91c81908c1476a03211c887' : previewquiz_high
- batch_id = 'batch_67b2eb1a730081908d18a26c22462d60' : translate_high

In [25]:
batch_id = "batch_67b2ea20b91c81908c1476a03211c887"
batch = client.batches.retrieve(batch_id)
batch_json = json.loads(batch.json())
batch_json

{'id': 'batch_67b2ea20b91c81908c1476a03211c887',
 'completion_window': '24h',
 'created_at': 1739778592,
 'endpoint': '/v1/chat/completions',
 'input_file_id': 'file-7et6iU7Xsvek2fiuh9pqyf',
 'object': 'batch',
 'status': 'completed',
 'cancelled_at': None,
 'cancelling_at': None,
 'completed_at': 1739779540,
 'error_file_id': None,
 'errors': None,
 'expired_at': None,
 'expires_at': 1739864992,
 'failed_at': None,
 'finalizing_at': 1739779513,
 'in_progress_at': 1739778593,
 'metadata': {'description': 'preview quiz'},
 'output_file_id': 'file-8SmXqTfkKLD7wWRrg3N4eo',
 'request_counts': {'completed': 326, 'failed': 0, 'total': 326}}

In [10]:
batch_json['completed_at']

1739264134

## 시간 확인

In [44]:
from datetime import datetime
import pytz

kst = pytz.timezone('Asia/Seoul')

# 변환 함수
def convert_timestamp(starttime, endtime):
    st = datetime.utcfromtimestamp(starttime)
    end = datetime.utcfromtimestamp(endtime)
    dt_kst_st = st.replace(tzinfo=pytz.utc).astimezone(kst)
    dt_kst_end = end.replace(tzinfo=pytz.utc).astimezone(kst)

    return f"시작 시간 : {dt_kst_st}, 끝나는 시간 : {dt_kst_end}"

convert_timestamp(starttime = batch_json['created_at'], endtime = batch_json['completed_at'])


  st = datetime.utcfromtimestamp(starttime)
  end = datetime.utcfromtimestamp(endtime)


'시작 시간 : 2025-02-11 18:26:02+09:00, 끝나는 시간 : 2025-02-11 18:58:31+09:00'

# Retrieving the Results
 - file-EVCbFfNykgV9HphK2h9wsk

In [26]:
file_response = client.files.content('file-8SmXqTfkKLD7wWRrg3N4eo') # output_file_id 이용
print(file_response.text)

{"id": "batch_req_67b2edb9ad8c81908cafbbd929d49c04", "custom_id": "request-1", "response": {"status_code": 200, "request_id": "bcbfd7e46eafff89e1a0c4f0311176c6", "body": {"id": "chatcmpl-B1qCTMiPZDnoheih9ecE2JPESXGy1", "object": "chat.completion", "created": 1739778685, "model": "o3-mini-2025-01-31", "choices": [{"index": 0, "message": {"role": "assistant", "content": "{\n  \"goal\": [\n    \"be going to ~\ub97c \uc0ac\uc6a9\ud558\uc5ec \uc790\uc2e0\uc758 \uc758\ub3c4\ub97c \ud45c\ud604\ud560 \uc218 \uc788\uc5b4\uc694.\",\n    \"\uacc4\ud68d\uc774\ub098 \uc758\ub3c4\ub97c \ub098\ud0c0\ub0b4\ub294 \ub2e4\uc591\ud55c \ud45c\ud604\uc744 \uc0ac\uc6a9\ud560 \uc218 \uc788\uc5b4\uc694.\"\n  ],\n  \"preview_quizzes\": [\n    {\n      \"fill_in_blank_quiz\": \"I\u2019m ___ write a letter to explain my concerns clearly.\",\n      \"korean_translation\": \"\ub098\ub294 \ub0b4 \uc6b0\ub824\ub97c \uba85\ud655\ud788 \uc124\uba85\ud558\uae30 \uc704\ud574 \ud3b8\uc9c0\ub97c \uc4f8 \uac70\uc57c.\",\n  

In [27]:
sample_list = file_response.text.split("\n")
sample_list = [item for item in sample_list if item.strip()]
sample_list

['{"id": "batch_req_67b2edb9ad8c81908cafbbd929d49c04", "custom_id": "request-1", "response": {"status_code": 200, "request_id": "bcbfd7e46eafff89e1a0c4f0311176c6", "body": {"id": "chatcmpl-B1qCTMiPZDnoheih9ecE2JPESXGy1", "object": "chat.completion", "created": 1739778685, "model": "o3-mini-2025-01-31", "choices": [{"index": 0, "message": {"role": "assistant", "content": "{\\n  \\"goal\\": [\\n    \\"be going to ~\\ub97c \\uc0ac\\uc6a9\\ud558\\uc5ec \\uc790\\uc2e0\\uc758 \\uc758\\ub3c4\\ub97c \\ud45c\\ud604\\ud560 \\uc218 \\uc788\\uc5b4\\uc694.\\",\\n    \\"\\uacc4\\ud68d\\uc774\\ub098 \\uc758\\ub3c4\\ub97c \\ub098\\ud0c0\\ub0b4\\ub294 \\ub2e4\\uc591\\ud55c \\ud45c\\ud604\\uc744 \\uc0ac\\uc6a9\\ud560 \\uc218 \\uc788\\uc5b4\\uc694.\\"\\n  ],\\n  \\"preview_quizzes\\": [\\n    {\\n      \\"fill_in_blank_quiz\\": \\"I\\u2019m ___ write a letter to explain my concerns clearly.\\",\\n      \\"korean_translation\\": \\"\\ub098\\ub294 \\ub0b4 \\uc6b0\\ub824\\ub97c \\uba85\\ud655\\ud788 \\uc124

In [28]:
sample_output = []

for sample in sample_list:
    for i in range(len(sample_list)):
        sample_json = json.loads(sample)
        content = sample_json["response"]["body"]["choices"][0]["message"]["content"]
        content_json = json.loads(content)
        sample_output.append(f"{i} 번째")
        sample_output.append(content_json)
        #print(json.dumps(content_json, indent=2, ensure_ascii=False))

# print(json.dumps(sample_output))


# batch 결과

In [29]:
for sample in sample_list:
    sample_json = json.loads(sample)
    content = sample_json["response"]["body"]["choices"][0]["message"]["content"]
    content_json = json.loads(content)
    print(json.dumps(content_json, indent=2, ensure_ascii=False))
    break

{
  "goal": [
    "be going to ~를 사용하여 자신의 의도를 표현할 수 있어요.",
    "계획이나 의도를 나타내는 다양한 표현을 사용할 수 있어요."
  ],
  "preview_quizzes": [
    {
      "fill_in_blank_quiz": "I’m ___ write a letter to explain my concerns clearly.",
      "korean_translation": "나는 내 우려를 명확히 설명하기 위해 편지를 쓸 거야.",
      "options": [
        "plan to",
        "going to",
        "intending to"
      ],
      "answer": "going to",
      "answer_index": 2
    },
    {
      "fill_in_blank_quiz": "I ___ in order to express my intentions.",
      "korean_translation": "나는 나의 의도를 표현하기 위해 그렇게 할 거야.",
      "options": [
        "intending to",
        "plan to",
        "going to"
      ],
      "answer": "plan to",
      "answer_index": 2
    }
  ],
  "sample_script": [
    {
      "sample_script_eng": "A: I’m going to study for the upcoming exam. B: That sounds like a solid plan! A: I plan to review all my notes tonight to ensure success. B: I'm sure it will pay off.",
      "sample_script_kor": "A: 다가오는 시험을 위해 공부할 거야. B: 그거

# CSV 파일로 변환

In [30]:
for sample in sample_list:
    sample_json = json.loads(sample)
    content = sample_json["response"]["body"]["choices"][0]["message"]["content"]
    content_json = json.loads(content)
    # print(json.dumps(content_json, indent=2, ensure_ascii=False))
    # break

In [31]:
content = sample_json["response"]["body"]["choices"][0]["message"]["content"]
content_json = json.loads(content)

In [32]:
def JsonToCSV(data:json, file_name:str) -> json:
    data_list = []

    for sample in data:
        # json 변환환
        sample_json = json.loads(sample)
        content = sample_json["response"]["body"]["choices"][0]["message"]["content"]
        content_json = json.loads(content)

        # 학습 목표
        goal = "\n".join(content_json["goal"])

        # 퀴즈 1
        quiz1 = content_json["preview_quizzes"][0]
        quiz1_data = {
            "Quiz 1": quiz1["fill_in_blank_quiz"],
            "Korean 1": quiz1["korean_translation"],
            "Option 1-1": quiz1["options"][0],
            "Option 1-2": quiz1["options"][1],
            "Option 1-3": quiz1["options"][2],
            "Answer 1": quiz1["answer"],
            "Index 1": quiz1["answer_index"]
        }

        # 퀴즈 2
        quiz2 = content_json["preview_quizzes"][1]
        quiz2_data = {
            "Quiz 2": quiz2["fill_in_blank_quiz"],
            "Korean 2": quiz2["korean_translation"],
            "Option 2-1": quiz2["options"][0],
            "Option 2-2": quiz2["options"][1],
            "Option 2-3": quiz2["options"][2],
            "Answer 2": quiz2["answer"],
            "Index 2": quiz2["answer_index"]
        }

        # script
        sample_script_eng = content_json["sample_script"][0]["sample_script_eng"]
        sample_script_kor = content_json["sample_script"][0]["sample_script_kor"]

        # 데이터 만들기
        csv_data = {
            "학습목표": goal,
            **quiz1_data,
            **quiz2_data,
            "샘플 스크립트 (영어)": sample_script_eng,
            "샘플 스크립트 (한국어)": sample_script_kor
        }

        data_list.append(csv_data)

    df = pd.DataFrame(data_list)

    # CSV 저장
    df.to_csv(f"{file_name}.csv", index=False, encoding='utf-8-sig')

    print(f"CSV 변환 완료 파일명: {file_name}.csv")


In [21]:
def JsonToCSV_Translate(data:json, file_name:str) -> json:
    data_list = []

    for sample in data:
        # json 변환환
        sample_json = json.loads(sample)
        content = sample_json["response"]["body"]["choices"][0]["message"]["content"]
        content_json = json.loads(content)

        # expression
        sub_expression_kor = content_json["sub_expression_kor"]

        # 데이터 만들기
        csv_data = {
            "Sub Expression (한국어)": sub_expression_kor
        }

        data_list.append(csv_data)

    df = pd.DataFrame(data_list)

    # CSV 저장
    df.to_csv(f"{file_name}.csv", index=False, encoding='utf-8-sig')

    print(f"CSV 변환 완료 파일명: {file_name}.csv")


In [33]:
name = 'PreviewQuiz_High_Function'
JsonToCSV(sample_list, name)

CSV 변환 완료 파일명: PreviewQuiz_High_Function.csv


In [34]:
df = pd.read_csv(f'{name}.csv')
df.head(3)

Unnamed: 0,학습목표,Quiz 1,Korean 1,Option 1-1,Option 1-2,Option 1-3,Answer 1,Index 1,Quiz 2,Korean 2,Option 2-1,Option 2-2,Option 2-3,Answer 2,Index 2,샘플 스크립트 (영어),샘플 스크립트 (한국어)
0,be going to ~를 사용하여 자신의 의도를 표현할 수 있어요.\n계획이나 의...,I’m ___ write a letter to explain my concerns ...,나는 내 우려를 명확히 설명하기 위해 편지를 쓸 거야.,plan to,going to,intending to,going to,2,I ___ in order to express my intentions.,나는 나의 의도를 표현하기 위해 그렇게 할 거야.,intending to,plan to,going to,plan to,2,A: I’m going to study for the upcoming exam. B...,A: 다가오는 시험을 위해 공부할 거야. B: 그거 좋은 계획이야! A: 오늘 밤 ...
1,부사 maybe를 사용하여 가능성의 정도를 표현할 수 있어요.\n부사 perhaps...,___ this event will occur as planned.,이 행사가 계획대로 진행될 수도 있다.,Perhaps,Maybe,Probably,Maybe,2,___ ~ will work out in the expected way.,어쩌면 일이 예상대로 진행될 거야.,Certainly,Perhaps,Maybe,Perhaps,2,A: Maybe the plan will be executed perfectly. ...,A: 어쩌면 계획이 완벽하게 실행될 거야. B: 정말 기대돼! A: 어쩌면 모든 세...
2,‘Why don’t you ~’ 표현을 활용하여 제안을 할 수 있어요.\nYou c...,Why don’t you ______ taking a short break?,짧은 휴식을 취하는 것을 고려해 보는 건 어때?,avoid,consider,skip,consider,2,Why don’t you ______ doing exercise?,운동하는 걸 해보는 건 어때?,forget,try,quit,try,2,A: Why don’t you consider going for a walk? B:...,"A: 산책을 고려해 보는 건 어때? B: 좋은 생각이야, 나도 같이 가볼게! A: ..."


# Cancelling a Batch

In [29]:
# cancel_batch = client.batches.cancel("")
# batch_json = json.loads(cancel_batch.json())
# batch_ouput = json.dumps(batch_json, ensure_ascii=False, indent=2)
# print(batch_ouput)