In [24]:
from dotenv import load_dotenv
from glob import glob
from openai import OpenAI

import os
import json

## Open-AI Setup

In [25]:
load_dotenv()

True

In [26]:
client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY")
    )

## Parameters

In [27]:
# dataset = "explanations"
# model = "gpt-4.1-mini-2025-04-14"
# prompt_version = "wiki"

# dataset = "tgbl-wiki"
# model = "gpt-4o-mini-2024-07-18"
# prompt_version = "base_v1_icl"

dataset = "tgbl-lastfm"
model = "gpt-4.1-mini-2025-04-14"
prompt_version = "base_v1_icl"

folder = f"./output/{dataset}/{model}/{prompt_version}"

In [28]:
result_folder = f"./result/{dataset}/{model}/{prompt_version}"
os.makedirs(result_folder, exist_ok=True)

batch_input_files_folder = f"{result_folder}/batch_input_files"
os.makedirs(batch_input_files_folder, exist_ok=True)

os.makedirs(f"{result_folder}/output", exist_ok=True)
os.makedirs(f"{result_folder}/error", exist_ok=True)

batch_requests_folder = f"{result_folder}/batch_requests"
os.makedirs(batch_requests_folder, exist_ok=True)


## Batch File Upload

In [None]:
batch_files = glob(f"{folder}/*.jsonl")

In [None]:
batch_files

In [None]:
batch_input_files = []
batch_requests = []

for batch in batch_files:
    batch_input_file = client.files.create(
        file=open(batch, "rb"),
        purpose="batch",
    )

    batch_input_files.append(batch_input_file)

    batch_request = client.batches.create(
        input_file_id=batch_input_file.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={
            "description": f"{dataset} {model} {prompt_version}",
        }
    )
    batch_requests.append(batch_request)

    print(f"Batch input file created: {batch_input_file.id}")
    print(f"Batch request created: {batch_request.id}")

In [None]:
# Save batch_input_file ids
for batch_input_file in batch_input_files:
    with open(f"{batch_input_files_folder}/{batch_input_file.id}", "w") as f:
        f.write(str(batch_input_file))

# Save batch_requests ids
for batch_request in batch_requests:
    with open(f"{batch_requests_folder}/{batch_request.id}", "w") as f:
        f.write(str(batch_request))

## Batch File Progress Check

In [33]:
# For if you are running the script later.
in_progress_count = 0
completed_count = 0
total = 0
for file in glob(f"{batch_requests_folder}/*"):
    total += 1
    batch_id = os.path.basename(file)
    try:
        batch_output = client.batches.retrieve(batch_id)
        if batch_output.status == "failed":
            print(f"Error: {batch_output.errors}")
        elif batch_output.status == "in_progress":
            in_progress_count += 1
        elif batch_output.status == "completed":
            completed_count += 1
        else:
            print(f"Batch {batch_id} Status: {batch_output.status}")
    except Exception as e:
        print(f"Error retrieving batch `{batch_id}`: {e}")
        continue

print(f"Total in progress batches: {in_progress_count}")
print(f"Total completed batches: {completed_count}")
print(f"Total batches: {total}")

Batch batch_681ae48f7f448190b0ab0e3529fc4b93 Status: expired
Total in progress batches: 0
Total completed batches: 19
Total batches: 20


## Batch Cancel (Do if you need to cancel the jobs)

In [None]:
# Cancel any batch if needed
for file in glob(f"{batch_requests_folder}/*"):
    batch_id = os.path.basename(file)
    try:
        client.batches.cancel(batch_id)
        print(f"Batch `{batch_id}` cancelled")
    except Exception as e:
        print(f"Couldn't cancel batch `{batch_id}`: {e}")
        continue

## Batch File Download

In [None]:
# For if you are running the script later.
for file in glob(f"{batch_requests_folder}/*"):
    batch_id = os.path.basename(file)
    batch_output = client.batches.retrieve(batch_id)
    if batch_output.status == "completed":
        if batch_output.output_file_id:
            output = client.files.content(batch_output.output_file_id)
            output.write_to_file(f"{result_folder}/output/{batch_output.output_file_id}.jsonl")

        if batch_output.error_file_id:
            error_file = client.files.content(batch_output.error_file_id)
            error_file.write_to_file(f"{result_folder}/error/{batch_output.error_file_id}.jsonl")
    else:
        print(f"Batch `{batch_output.id}` with status `{batch_output.status}` is not dealt with.")

Batch `batch_681ae48f7f448190b0ab0e3529fc4b93` with status `expired` is not dealt with.
