# Uploading Batch Input File

In [1]:
from openai.lib._parsing import type_to_response_format_param
from pydantic import BaseModel
from getpass import getpass
from openai import OpenAI
import json
import pandas as pd

In [45]:
openai_api_key = getpass("OPENAI_API_KEY")
client = OpenAI(api_key=openai_api_key)

In [46]:
batch_input_file = client.files.create(
    file=open('sample_voca.jsonl','rb'),
    purpose='batch'
)

print(batch_input_file)

FileObject(id='file-UwQG9EmV39VNZKT53f8vHk', bytes=40791, created_at=1739434851, filename='sample_voca.jsonl', object='file', purpose='batch', status='processed', status_details=None)


# Create the Batch

In [47]:
batch_input_file_id = batch_input_file.id
batch_input_file_id

'file-UwQG9EmV39VNZKT53f8vHk'

In [48]:
batch_info = client.batches.create(
    input_file_id = batch_input_file_id,
    endpoint = "/v1/chat/completions", # 챗봇 api,
    completion_window = "24h", # 현재는 이 기능만 가능
    metadata = {
        "voca tag" : "mid voca tag"
    }
)

In [49]:
response_json = json.loads(batch_info.json())
response_output = json.dumps(response_json, ensure_ascii=False, indent=2)
print(response_output)

{
  "id": "batch_67adab678bb08190b6ba59dc2b95a0d7",
  "completion_window": "24h",
  "created_at": 1739434855,
  "endpoint": "/v1/chat/completions",
  "input_file_id": "file-UwQG9EmV39VNZKT53f8vHk",
  "object": "batch",
  "status": "validating",
  "cancelled_at": null,
  "cancelling_at": null,
  "completed_at": null,
  "error_file_id": null,
  "errors": null,
  "expired_at": null,
  "expires_at": 1739521255,
  "failed_at": null,
  "finalizing_at": null,
  "in_progress_at": null,
  "metadata": {
    "voca tag": "mid voca tag"
  },
  "output_file_id": null,
  "request_counts": {
    "completed": 0,
    "failed": 0,
    "total": 0
  }
}


# Checking the Status of Batch

- batch_id : sample : 'batch_67ad9b5f1cf48190b786b780cc7d2bd5'
- batch_id : sample_basemodel : 'batch_67ad9f549890819090f5c4555d02093b'

In [51]:
batch_id = 'batch_67adab678bb08190b6ba59dc2b95a0d7'
batch = client.batches.retrieve(batch_id)
batch_json = json.loads(batch.json())
batch_json

{'id': 'batch_67adab678bb08190b6ba59dc2b95a0d7',
 'completion_window': '24h',
 'created_at': 1739434855,
 'endpoint': '/v1/chat/completions',
 'input_file_id': 'file-UwQG9EmV39VNZKT53f8vHk',
 'object': 'batch',
 'status': 'completed',
 'cancelled_at': None,
 'cancelling_at': None,
 'completed_at': 1739434926,
 'error_file_id': None,
 'errors': None,
 'expired_at': None,
 'expires_at': 1739521255,
 'failed_at': None,
 'finalizing_at': 1739434924,
 'in_progress_at': 1739434856,
 'metadata': {'voca tag': 'mid voca tag'},
 'output_file_id': 'file-QD2jc2MCG3JDCuZf24R1ub',
 'request_counts': {'completed': 13, 'failed': 0, 'total': 13}}

# Retrieving the Results
- batch_id : sample : 'file-HvfB4XnYoZ9eP5Bbuzhgww'

In [52]:
file_response = client.files.content('file-QD2jc2MCG3JDCuZf24R1ub') # output_file_id 이용
print(file_response.text)

{"id": "batch_req_67adabacfe30819097a35b52aaa58261", "custom_id": "request-1", "response": {"status_code": 200, "request_id": "41c5af17e4818b859e727bf39c92ba82", "body": {"id": "chatcmpl-B0Ol7zN1iFYyEixB92NDqTbU2Lfpp", "object": "chat.completion", "created": 1739434873, "model": "o3-mini-2025-01-31", "choices": [{"index": 0, "message": {"role": "assistant", "content": "{\"voca_id\": 1843}", "refusal": null}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 851, "completion_tokens": 83, "total_tokens": 934, "prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0}, "completion_tokens_details": {"reasoning_tokens": 64, "audio_tokens": 0, "accepted_prediction_tokens": 0, "rejected_prediction_tokens": 0}}, "service_tier": "default", "system_fingerprint": "fp_8bcaa0ca21"}}, "error": null}
{"id": "batch_req_67adabad0e2481909b7e582c7b91ba3f", "custom_id": "request-2", "response": {"status_code": 200, "request_id": "cae0b32c872f33457fe62175d1d95a3c", "body": {"id": "chatcmpl-B0OlAQ

# batch 결과

In [53]:
sample_list = file_response.text.split("\n")
sample_list = [item for item in sample_list if item.strip()]

In [54]:
for s in sample_list:
    s_json = json.loads(s)
    contents = s_json['response']['body']
    print(contents["choices"][0]["message"]["content"])

{"voca_id": 1843}
{"voca_id": 1843}
{"voca_id": 1849}
{"voca_id": null}
{"voca_id": 1845}
{
  "voca_id": 832
}
{"voca_id": 827}
{"voca_id": 828}
{"voca_id": 827}
{"voca_id": 827}
{"voca_id": 826}
{"voca_id": 832}
{"voca_id": 832}
