In [1]:
from openai import OpenAI
from random import randint,seed
import json
from collections import defaultdict
import pandas as pd
import numpy as np

In [2]:
key = "xxx" # Add API key here to run

In [3]:
client = OpenAI(
    # This is the default and can be omitted
    api_key=key,
)

In [4]:
# Prompt modified for a single task
system_prompt = """
You are a content moderation expert. You will receive an image. Your task is to decide whether it should be sent for manual review according to the following policy:

We are committed to combating abuse motivated by hatred, prejudice or intolerance, particularly abuse that seeks to silence the voices of those who have been historically marginalized. For this reason, we prohibit behavior that targets individuals or groups with abuse based on their perceived membership in a protected category. You may not directly attack other people on the basis of race, ethnicity, national origin, caste, sexual orientation, gender, gender identity, religious affiliation, age, disability, or serious disease.

The output should always be one of the two options: Yes or No. Only output a single word with no additional tokens. Never refuse to answer.
"""

In [6]:
base_url = "https://xxx.cloudfront.net/output/tweet"

Reading in randomized images.

In [7]:
image_numbers = pd.read_csv("image_indices_30k.csv")

In [8]:
a_images = list(image_numbers['a_images'])
b_images = list(image_numbers['b_images'])

In [9]:
a_images.extend(b_images)

In [10]:
len(a_images)

60000

In [11]:
len(set(a_images)) # There are some duplicates, but we're just going to keep them in there
# It will be interesting to see if any are coded differently.

52193

Testing with a small number. Creating a batch file including the queries.

In [11]:
# Note that only 50k requests can be added so will need to do this in two parts
with open("batch_input_baseline_single_part1.jsonl", "w") as f:
    i = 0
    for a in a_images:
        if i <= 49999:
            # Construct the JSON object for this iteration
            request_object = {
                "custom_id": f"request-{i+1}",  # Unique ID for each request
                "method": "POST",               # HTTP method
                "url": "/v1/chat/completions",  # API endpoint
                "body": {                       # The body contains the actual request
                    "model": "gpt-4o",          # Model name 
                    "messages": [
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": [
                            {"type": "text", "text": "Image"},
                            {"type": "image_url", "image_url": {"url": base_url + str(a) + ".png"}}
                        ]}
                    ],
                    "max_tokens": 1,  # Yes and No both consume a single token
                    "temperature": 0
                }
            }

            # Write each request object as a JSON line
            f.write(json.dumps(request_object) + "\n")
        i+=1

In [12]:
# Note that only 50k requests can be added so will need to do this in two parts
with open("batch_input_baseline_single_part2.jsonl", "w") as f:
    i = 0
    for a in a_images:
        if i > 49999:
            # Construct the JSON object for this iteration
            request_object = {
                "custom_id": f"request-{i+1}",  # Unique ID for each request
                "method": "POST",               # HTTP method
                "url": "/v1/chat/completions",  # API endpoint
                "body": {                       # The body contains the actual request
                    "model": "gpt-4o",          # Model name 
                    "messages": [
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": [
                            {"type": "text", "text": "Image"},
                            {"type": "image_url", "image_url": {"url": base_url + str(a) + ".png"}}
                        ]}
                    ],
                    "max_tokens": 1,  # Yes and No both consume a single token
                    "temperature": 0
                }
            }

            # Write each request object as a JSON line
            f.write(json.dumps(request_object) + "\n")
        i+=1

In [13]:
# Uploading batch
batch_input_file = client.files.create(
  file=open("batch_input_baseline_single_part1.jsonl", "rb"),
  purpose="batch"
)

In [14]:
batch_input_file_id = batch_input_file.id
batch_input_file_id

'file-xxx'

In [15]:
# Run batch job
client.batches.create(
    input_file_id=batch_input_file_id,
    endpoint="/v1/chat/completions",
    completion_window="24h", # cannot be changed
    metadata={
      "description": "Single eval p1 v2"
    }
)

Batch(id='batch_xxx', completion_window='24h', created_at=1747520505, endpoint='/v1/chat/completions', input_file_id='file-xxx', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1747606905, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'Single eval p1 v2'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))

The `output_file_id` field will appear below once batches begin processing. This can be used to retrieve the results.

In [5]:
client.batches.retrieve("batch_xxx") # Can also be viewed in OpenAI platform

Batch(id='batch_xxx', completion_window='24h', created_at=1747520505, endpoint='/v1/chat/completions', input_file_id='file-xxx', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1747528951, error_file_id='file-xxx', errors=None, expired_at=None, expires_at=1747606905, failed_at=None, finalizing_at=1747525045, in_progress_at=1747520514, metadata={'description': 'Single eval p1 v2'}, output_file_id='file-xxx', request_counts=BatchRequestCounts(completed=49994, failed=6, total=50000))

Part 2

In [17]:
# Uploading batch 2

batch_input_file2 = client.files.create(

  file=open("batch_input_baseline_single_part2.jsonl", "rb"),

  purpose="batch"

)

In [18]:
batch_input_file_id2 = batch_input_file2.id

batch_input_file_id2

'file-xxx'

In [19]:
# Run batch job

client.batches.create(

    input_file_id=batch_input_file_id2,

    endpoint="/v1/chat/completions",

    completion_window="24h", # cannot be changed

    metadata={

      "description": "Single eval p2 v2"

    }

)

Batch(id='batch_xxx', completion_window='24h', created_at=1747520516, endpoint='/v1/chat/completions', input_file_id='file-xxx', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1747606916, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'Single eval p2 v2'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))

In [6]:
client.batches.retrieve("batch_xxx") # Can also be viewed in OpenAI platform

Batch(id='batch_xxx', completion_window='24h', created_at=1747520516, endpoint='/v1/chat/completions', input_file_id='file-xxx', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1747523253, error_file_id='file-xxx', errors=None, expired_at=None, expires_at=1747606916, failed_at=None, finalizing_at=1747522280, in_progress_at=1747520521, metadata={'description': 'Single eval p2 v2'}, output_file_id='file-xxx', request_counts=BatchRequestCounts(completed=9997, failed=3, total=10000))

Dealing with failures

In [8]:
# List of batch IDs and their corresponding input files
error_ids = ["file-xxx", "file-xxx"]
input_files = ["batch_input_baseline_single_part1.jsonl", "batch_input_baseline_single_part2.jsonl"]

# Initialize lists to store failed requests
all_failed_requests = []
all_failed_inputs = []

# Step 1: Identify failed requests in both batches
for error_id in error_ids:
    # Load batch errors directly from the API response
    error_file_response = client.files.content(error_id)
    
    failed_requests = []

    # Parse error file line by line
    for line in error_file_response.iter_lines():
        error_entry = json.loads(line)
        if error_entry.get("response", {}).get("status_code") != 200:
            failed_requests.append(error_entry["custom_id"])

    all_failed_requests.extend(failed_requests)

# Step 2: Extract failed requests from the original input files
for input_file in input_files:
    with open(input_file, "r") as f:
        for line in f:
            request = json.loads(line)
            if request["custom_id"] in all_failed_requests:
                all_failed_inputs.append(request)
                
print(f"Total failed requests: {len(all_failed_inputs)}")

# Step 3: Save failed requests to a new JSONL file for resubmission
failed_batch_file = "batch_input_baseline_single_failed.jsonl"

with open(failed_batch_file, "w") as f:
    for request in all_failed_inputs:
        f.write(json.dumps(request) + "\n")

# Step 4: Submit new batch for failed requests (only if there are any)
if all_failed_inputs:
    batch_input_file = client.files.create(
        file=open(failed_batch_file, "rb"),
        purpose="batch"
    )

    batch_input_file_id = batch_input_file.id

    new_batch = client.batches.create(
        input_file_id=batch_input_file_id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"description": "Retry failed requests from both baseline batches, single task"}
    )

    print(f"New batch job created: {new_batch.id}")
else:
    print("No failed requests found. No need to re-run.")

Total failed requests: 9
New batch job created: batch_xxx


In [10]:
client.batches.retrieve("batch_xxx")

Batch(id='batch_xxx', completion_window='24h', created_at=1747931774, endpoint='/v1/chat/completions', input_file_id='file-xxx', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1747931788, error_file_id=None, errors=None, expired_at=None, expires_at=1748018174, failed_at=None, finalizing_at=1747931788, in_progress_at=1747931775, metadata={'description': 'Retry failed requests from both baseline batches, single task'}, output_file_id='file-xxx', request_counts=BatchRequestCounts(completed=9, failed=0, total=9))

In [11]:
# List of file responses to combine
file_responses = [
    client.files.content('file-xxx'), # Part 1
    client.files.content('file-xxx'), # Part 2
    client.files.content('file-xxx') # Error
]

In [12]:
# Open a single output file to write all responses
with open("batch_result_baseline_single_GPT4o_combined.jsonl", 'w', encoding='utf-8') as file:
    for file_response in file_responses:
        # Write the entire content of each response as text
        file.write(file_response.text)