In [None]:
import json
import glob
import pandas as pd

In [None]:
files = glob.glob("batch_result_*")

In [None]:
files = [x for x in files if "result" in x]

In [None]:
files = [x for x in files if "alt" not in x and "single" not in x]

In [None]:
files

Each file has up to two versions. The `_failures` version consists of rows that were missed in the first batch. In all cases, missing rows were coded when these observations were passed in a second batch.

In [None]:
# Initialize an empty list to store the data
data = []

# Iterate through each file
for file in files:
    with open(file, 'r') as f:
        # Read each line as a JSON object
        for line in f:
            record = json.loads(line)
            # Add the filename as a new field
            record['source_file'] = file

            # Extract 'content' and 'refusal' from the 'choices' list if available
            if 'response' in record and 'body' in record['response'] and 'choices' in record['response']['body']:
                choices = record['response']['body']['choices']
                if choices and isinstance(choices, list):
                    # We assume there is at least one choice; you could add further checks here
                    record['content'] = choices[0]['message'].get('content', None)
                    record['refusal'] = choices[0]['message'].get('refusal', None)

            # Append the record to the data list
            data.append(record)

# Create a pandas dataframe from the list of dictionaries
df = pd.json_normalize(data)

In [None]:
df = df.assign(
    model=lambda df: df["source_file"].apply(lambda x: "mini" if "mini" in x else "base"),
    prompt=lambda df: df["source_file"].apply(lambda x: x.split("_")[2] if len(x.split("_")) > 2 else None)
)

In [None]:
df.groupby(["model", "prompt"]).size().reset_index(name="count")

Now merging in the image IDs that correspond to the request numbers. To do this, starting by creating a numeric ID vector, then using this to merge with `df`.

In [None]:
image_numbers = pd.read_csv("../image_indices_30k.csv")

In [None]:
image_numbers['custom_id'] = [f"request-{i}" for i in range(1, 30001)]

In [None]:
# Drop the 'Unnamed: 0' column
image_numbers = image_numbers.drop(columns=['Unnamed: 0'])

In [None]:
merged_main = pd.merge(df_main, image_numbers, on='custom_id', how='left')

In [None]:
merged_df_ = merged_df[["source_file", "content", "model", "prompt", "response.body.usage.prompt_tokens", "a_images", "b_images"]]

Verifying that we have 30k rows for each combination of model and prompt and 29664 for the identical conditions.

In [None]:
merged_df_.groupby(["model", "prompt"]).size().reset_index(name="count")

In [None]:
merged_df_.to_csv("gpt4o-experiments-results-main.csv")