In [18]:
import os
from openai import AzureOpenAI
    
client = AzureOpenAI(
    api_key=os.getenv("AZURE_API_KEY"),  
    api_version="2025-03-01-preview",
    azure_endpoint = "https://aisg-sj.openai.azure.com/"
    # azure_endpoint = "https://decla-mbncunfi-australiaeast.cognitiveservices.azure.com/" # o3-mini
    )

# Upload a file with a purpose of "batch"
file = client.files.create(
  file=open("example_batch_input.jsonl", "rb"), 
  purpose="batch",
  extra_body={"expires_after":{"seconds": 1209600, "anchor": "created_at"}} # Optional you can set to a number between 1209600-2592000. This is equivalent to 14-30 days
)


print(file.model_dump_json(indent=2))

{
  "id": "file-7ba74b86dcb14621be65d4095c4ed1ce",
  "bytes": 1625,
  "created_at": 1749622941,
  "filename": "example_batch_input.jsonl",
  "object": "file",
  "purpose": "batch",
  "status": "processed",
  "expires_at": 1750832541,
  "status_details": null
}


In [19]:
import datetime

print(f"File expiration: {datetime.datetime.fromtimestamp(file.expires_at) if file.expires_at is not None else 'Not set'}")

file_id = file.id

File expiration: 2025-06-25 14:22:21


In [20]:
# Submit a batch job with the file
batch_response = client.batches.create(
    input_file_id=file_id,
    endpoint="/chat/completions",
    completion_window="24h",
    extra_body={"output_expires_after":{"seconds": 1209600, "anchor": "created_at"}} # Optional you can set to a number between 1209600-2592000. This is equivalent to 14-30 days
)


# Save batch ID for later use
batch_id = batch_response.id

print(batch_response.model_dump_json(indent=2))

{
  "id": "batch_8d9364c9-7bba-470e-b80d-a6278e99a62e",
  "completion_window": "24h",
  "created_at": 1749622944,
  "endpoint": "/chat/completions",
  "input_file_id": "file-7ba74b86dcb14621be65d4095c4ed1ce",
  "object": "batch",
  "status": "validating",
  "cancelled_at": null,
  "cancelling_at": null,
  "completed_at": null,
  "error_file_id": "",
  "errors": null,
  "expired_at": null,
  "expires_at": 1749709341,
  "failed_at": null,
  "finalizing_at": null,
  "in_progress_at": null,
  "metadata": null,
  "output_file_id": "",
  "request_counts": {
    "completed": 0,
    "failed": 0,
    "total": 0
  }
}


In [22]:
import time
import datetime 

status = "validating"
while status not in ("completed", "failed", "canceled"):
    time.sleep(60)
    batch_response = client.batches.retrieve(batch_id)
    status = batch_response.status
    print(f"{datetime.datetime.now()} Batch Id: {batch_id},  Status: {status}")

if batch_response.status == "failed":
    for error in batch_response.errors.data:  
        print(f"Error code {error.code} Message {error.message}")

2025-06-11 14:43:52.267612 Batch Id: batch_8d9364c9-7bba-470e-b80d-a6278e99a62e,  Status: completed


In [23]:
import json

output_file_id = batch_response.output_file_id

if not output_file_id:
    output_file_id = batch_response.error_file_id

if output_file_id:
    file_response = client.files.content(output_file_id)
    raw_responses = file_response.text.strip().split('\n')  

    for raw_response in raw_responses:  
        json_response = json.loads(raw_response)  
        formatted_json = json.dumps(json_response, indent=2)  
        print(formatted_json)

{
  "custom_id": "request-1",
  "response": {
    "body": {
      "choices": [
        {
          "content_filter_results": {
            "hate": {
              "filtered": false,
              "severity": "safe"
            },
            "self_harm": {
              "filtered": false,
              "severity": "safe"
            },
            "sexual": {
              "filtered": false,
              "severity": "safe"
            },
            "violence": {
              "filtered": false,
              "severity": "safe"
            }
          },
          "finish_reason": "stop",
          "index": 0,
          "logprobs": null,
          "message": {
            "annotations": [],
            "content": "It\u2019s a yellow, round \u201csmiley\u201d face with an open-mouthed grin and two red hearts in place of its eyes. It conveys adoration, love, infatuation or enthusiastic approval.",
            "refusal": null,
            "role": "assistant"
          }
        }
      ]

In [24]:
all_jobs = []
# Automatically fetches more pages as needed.
for job in client.batches.list(
    limit=20,
):
    # Do something with job here
    all_jobs.append(job)
print(all_jobs)

[Batch(id='batch_8d9364c9-7bba-470e-b80d-a6278e99a62e', completion_window='24h', created_at=1749622944, endpoint='/chat/completions', input_file_id='file-7ba74b86dcb14621be65d4095c4ed1ce', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1749623742, error_file_id=None, errors=None, expired_at=None, expires_at=1749709341, failed_at=None, finalizing_at=1749623676, in_progress_at=1749623199, metadata=None, output_file_id='file-ad5fb152-3cc1-4094-8591-67e7e94fd419', request_counts=BatchRequestCounts(completed=1, failed=0, total=1)), Batch(id='batch_350476ad-c6e5-4fd7-87a7-fe0ddde4c794', completion_window='24h', created_at=1749622329, endpoint='/chat/completions', input_file_id='file-da50b8e98976444eb2e50163a3184320', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1749622923, error_file_id='file-b07f3b18-8594-4cfd-a5f0-6dd66f556786', errors=None, expired_at=None, expires_at=1749708726, failed_at=None, finalizing

In [None]:
import time
from openai import BadRequestError

max_retries = 10
retries = 0
initial_delay = 5
delay = initial_delay

while True:
    try:
        batch_response = client.batches.create(
            input_file_id=file_id,
            endpoint="/chat/completions",
            completion_window="24h",
        )
        
        # Save batch ID for later use
        batch_id = batch_response.id
        
        print(f"✅ Batch created successfully after {retries} retries")
        print(batch_response.model_dump_json(indent=2))
        break  
        
    except BadRequestError as e:
        error_message = str(e)
        
        # Check if it's a token limit error
        if 'token_limit_exceeded' in error_message:
            retries += 1
            if retries >= max_retries:
                print(f"❌ Maximum retries ({max_retries}) reached. Giving up.")
                raise
            
            print(f"⏳ Token limit exceeded. Waiting {delay} seconds before retry {retries}/{max_retries}...")
            time.sleep(delay)
            
            # Exponential backoff - increase delay for next attempt
            delay *= 2
        else:
            # If it's a different error, raise it immediately
            print(f"❌ Encountered non-token limit error: {error_message}")
            raise