## Working with Azure OpenAI Batch Jobs ##
Requirements prior to running this code a deployment resource in Azure OpenAI with the deployment type 'Global-Batch' enabled
Initialize the initial library packages that are needed

In [None]:
# Installs openai
!pip install -r requirements.txt

## Preparing the files we require to call for a batch file ##
This is very familiar with the format that is used for fine-tuning .jsonl(json lines) here is the example we can use for this.

In [None]:
{"custom_id": "request-1", "method": "POST", "url": "/chat/completions", "body": {"model": "REPLACE-WITH-MODEL-DEPLOYMENT-NAME", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": [{"type": "text", "text": "What’s in this image?"},{"type": "image_url","image_url": {"url": "https://raw.githubusercontent.com/MicrosoftDocs/azure-docs/main/articles/ai-services/openai/media/how-to/generated-seattle.png"}}]}],"max_tokens": 1000}}
{"custom_id": "request-1", "method": "POST", "url": "/chat/completions", "body": {"model": "REPLACE-WITH-MODEL-DEPLOYMENT-NAME", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": [{"type": "text", "text": "What’s in this image?"},{"type": "image_url","image_url": {"url": "https://raw.githubusercontent.com/MicrosoftDocs/azure-docs/main/articles/ai-services/openai/media/how-to/generated-seattle.png"}}]}],"max_tokens": 1000}}

## Upload the batch file ##
You want to use the images.jsonl as a exmaple if you are trying to follow along but for the purposes of the blog post I'm using the images.jsonl for image capability

In [None]:
import os
import openai
from dotenv import load_dotenv
from openai import AzureOpenAI
    
# load the env variables (this is include in .gitignore)
load_dotenv()


# Initialize the AzureOpenAI client    
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-07-01-preview",
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    )

# Upload a file with a purpose of "batch" change to the purpose you want in current dir i'm using images.jsonl
file = client.files.create(
  file=open("images.jsonl", "rb"), 
  purpose="batch"
)

print(file.model_dump_json(indent=2))
file_id = file.id

## Tracking the actual job from the submission to the batch job ##

In [None]:
# Wait until the uploaded file is in processed state
import time
import datetime 

status = "pending"
while status != "processed":
    time.sleep(15)
    file_response = client.files.retrieve(file_id)
    status = file_response.status
    print(f"{datetime.datetime.now()} File Id: {file_id}, Status: {status}")

## Creating our batch job ##
So we now have a file that is processed and ready to submit to our API for processing this code will use the client.batches.create() for this operation.

In [None]:
# Submit a batch job with the file
batch_response = client.batches.create(
    input_file_id=file_id,
    endpoint="/chat/completions",
    completion_window="24h",
)

# Save batch ID for later use
batch_id = batch_response.id

print(batch_response.model_dump_json(indent=2))

## Tracking the batch job in progress ##
So we've submitted our job but what is the process look like? This can be caught with the following code

In [None]:
import time
import datetime 

# What status is current? We saw that this is validating once sent, the while statement is looking for three conditions with a timer.
status = "validating"
while status not in ("completed", "failed", "canceled"):
    time.sleep(60)
    batch_response = client.batches.retrieve(batch_id)
    status = batch_response.status
    print(f"{datetime.datetime.now()} Batch Id: {batch_id},  Status: {status}")

In [None]:
print(batch_response.model_dump_json(indent=2))

## Retrieving the batch job output file ##
If you need to debug you'll have to reference the error_file_id and a separate output_file_id for retrieval we will run the following json for loop.

In [None]:
client.batches.list()

In [None]:
import json

# Takes the file response id
file_response = client.files.content(batch_response.output_file_id)
raw_responses = file_response.text.strip().split('\n')  
# Runs a for loop across the raw responses and puts out the json response and formatted json
for raw_response in raw_responses:  
    json_response = json.loads(raw_response)  
    formatted_json = json.dumps(json_response, indent=2)  
    print(formatted_json)