In [1]:
import json
import glob
import pandas as pd

In [2]:
files = glob.glob("batch_result_*")

In [3]:
files = [x for x in files if "alt" in x]

In [5]:
files = files[1:]

Ignoring first version using indexing.

In [7]:
# Initialize an empty list to store the data
data = []

# Iterate through each file
for file in files:
    with open(file, 'r') as f:
        # Read each line as a JSON object
        for line in f:
            record = json.loads(line)
            # Add the filename as a new field
            record['source_file'] = file

            # Extract 'content' and 'refusal' from the 'choices' list if available
            if 'response' in record and 'body' in record['response'] and 'choices' in record['response']['body']:
                choices = record['response']['body']['choices']
                if choices and isinstance(choices, list):
                    # We assume there is at least one choice; you could add further checks here
                    record['content'] = choices[0]['message'].get('content', None)
                    record['refusal'] = choices[0]['message'].get('refusal', None)

            # Append the record to the data list
            data.append(record)

# Create a pandas dataframe from the list of dictionaries
df = pd.json_normalize(data)

                                           id  custom_id error  \
0  batch_req_682d1e05841081908aab5daf157bf874  request-1  None   
1  batch_req_682d1e05adfc81909a6ea36e400a42d9  request-2  None   
2  batch_req_682d1e05d1b081908019144979064fc4  request-3  None   
3  batch_req_682d1e05f1b0819089526e3de3ccf1bb  request-4  None   
4  batch_req_682d1e06176081908104c693603fbbd3  request-5  None   

                                         source_file content refusal  \
0  batch_result_baseline_alt_GPT4o_combined_v2.jsonl       B    None   
1  batch_result_baseline_alt_GPT4o_combined_v2.jsonl       A    None   
2  batch_result_baseline_alt_GPT4o_combined_v2.jsonl       A    None   
3  batch_result_baseline_alt_GPT4o_combined_v2.jsonl       A    None   
4  batch_result_baseline_alt_GPT4o_combined_v2.jsonl       B    None   

   response.status_code               response.request_id  \
0                   200  6089970e413090275885687ea91082ed   
1                   200  91af04b12aa9b871d5463ee

In [9]:
df.shape

(199500, 24)

Now merging in the image IDs that correspond to the request numbers. To do this, starting by creating a numeric ID vector, then using this to merge with `df`. Requests were coded numerically from one upwards.

In [10]:
image_numbers = pd.read_csv("../image_indices_alt_99750.csv")

In [16]:
image_numbers['custom_id'] = [f"request-{i}" for i in range(1, 99751)]

In [17]:
image_numbers.shape

(99750, 6)

In [19]:
image_numbers.head()

Unnamed: 0.1,Unnamed: 0,a_images,b_images,a_paths,b_paths,custom_id
0,0,555755,183551,output_alt/tweet585754.png,output_2024/tweet183551.png,request-1
1,1,419943,440191,output_alt/tweet449942.png,output_alt/tweet470190.png,request-2
2,2,497331,251346,output_alt/tweet527330.png,output_alt/tweet281345.png,request-3
3,3,32817,110536,output_2024/tweet32817.png,output_2024/tweet110536.png,request-4
4,4,224661,460665,output_alt/tweet254660.png,output_alt/tweet490664.png,request-5


In [20]:
merged_df = pd.merge(df, image_numbers, on='custom_id', how='left')

In [21]:
merged_df.head()

Unnamed: 0.1,id,custom_id,error,source_file,content,refusal,response.status_code,response.request_id,response.body.id,response.body.object,...,response.body.usage.completion_tokens_details.audio_tokens,response.body.usage.completion_tokens_details.accepted_prediction_tokens,response.body.usage.completion_tokens_details.rejected_prediction_tokens,response.body.service_tier,response.body.system_fingerprint,Unnamed: 0,a_images,b_images,a_paths,b_paths
0,batch_req_682d1e05841081908aab5daf157bf874,request-1,,batch_result_baseline_alt_GPT4o_combined_v2.jsonl,B,,200,6089970e413090275885687ea91082ed,chatcmpl-BZQ86rtKdRig9f1MkXIxNiT7oavA1,chat.completion,...,0,0,0,default,fp_b7faba9ef5,0,555755,183551,output_alt/tweet585754.png,output_2024/tweet183551.png
1,batch_req_682d1e05adfc81909a6ea36e400a42d9,request-2,,batch_result_baseline_alt_GPT4o_combined_v2.jsonl,A,,200,91af04b12aa9b871d5463eef65af22c0,chatcmpl-BZQAvMRt4mAYYRJiXc9k20KrflTpg,chat.completion,...,0,0,0,default,fp_b7faba9ef5,1,419943,440191,output_alt/tweet449942.png,output_alt/tweet470190.png
2,batch_req_682d1e05d1b081908019144979064fc4,request-3,,batch_result_baseline_alt_GPT4o_combined_v2.jsonl,A,,200,a91bb0868dc76d2a4f35506530d133bd,chatcmpl-BZQBDS81l7nQjwixAmzYbt8HxXstE,chat.completion,...,0,0,0,default,fp_b7faba9ef5,2,497331,251346,output_alt/tweet527330.png,output_alt/tweet281345.png
3,batch_req_682d1e05f1b0819089526e3de3ccf1bb,request-4,,batch_result_baseline_alt_GPT4o_combined_v2.jsonl,A,,200,6861f98f50b085400fdb7f38c196de62,chatcmpl-BZQAwZFemhT4irD9QqptLR7dGuoaM,chat.completion,...,0,0,0,default,fp_b7faba9ef5,3,32817,110536,output_2024/tweet32817.png,output_2024/tweet110536.png
4,batch_req_682d1e06176081908104c693603fbbd3,request-5,,batch_result_baseline_alt_GPT4o_combined_v2.jsonl,B,,200,88abeb1a6e9cfc26b56c00f0033affd8,chatcmpl-BZQAwRuzlkoYF3TdK9Ht5NquJFRO5,chat.completion,...,0,0,0,default,fp_b7faba9ef5,4,224661,460665,output_alt/tweet254660.png,output_alt/tweet490664.png


In [22]:
merged_df_ = merged_df[["source_file", "content", "response.body.usage.prompt_tokens", "a_images", "b_images", "a_paths", "b_paths"]]

In [23]:
merged_df_.shape

(199500, 7)

In [24]:
merged_df_.head()

Unnamed: 0,source_file,content,response.body.usage.prompt_tokens,a_images,b_images,a_paths,b_paths
0,batch_result_baseline_alt_GPT4o_combined_v2.jsonl,B,1365,555755,183551,output_alt/tweet585754.png,output_2024/tweet183551.png
1,batch_result_baseline_alt_GPT4o_combined_v2.jsonl,A,1875,419943,440191,output_alt/tweet449942.png,output_alt/tweet470190.png
2,batch_result_baseline_alt_GPT4o_combined_v2.jsonl,A,2385,497331,251346,output_alt/tweet527330.png,output_alt/tweet281345.png
3,batch_result_baseline_alt_GPT4o_combined_v2.jsonl,A,2385,32817,110536,output_2024/tweet32817.png,output_2024/tweet110536.png
4,batch_result_baseline_alt_GPT4o_combined_v2.jsonl,B,1875,224661,460665,output_alt/tweet254660.png,output_alt/tweet490664.png


In [25]:
merged_df_ = merged_df_.assign(
    model=lambda df: df["source_file"].apply(lambda x: "mini" if "mini" in x else "base"),
    prompt=lambda df: df["source_file"].apply(lambda x: x.split("_")[2] if len(x.split("_")) > 2 else None)
)

In [26]:
merged_df_.tail()

Unnamed: 0,source_file,content,response.body.usage.prompt_tokens,a_images,b_images,a_paths,b_paths,model,prompt
199495,batch_result_baseline_alt_GPT4o_mini_combined....,B,73845,565266,218822,output_alt/tweet595265.png,output_alt/tweet248821.png,mini,baseline
199496,batch_result_baseline_alt_GPT4o_mini_combined....,B,56844,494672,439779,output_alt/tweet524671.png,output_alt/tweet469778.png,mini,baseline
199497,batch_result_baseline_alt_GPT4o_mini_combined....,B,73845,318998,517816,output_alt/tweet348997.png,output_alt/tweet547815.png,mini,baseline
199498,batch_result_baseline_alt_GPT4o_mini_combined....,A,73845,248397,97965,output_alt/tweet278396.png,output_2024/tweet97965.png,mini,baseline
199499,batch_result_baseline_alt_GPT4o_mini_combined....,A,56844,347662,11343,output_alt/tweet377661.png,output_2024/tweet11343.png,mini,baseline


Verifying that we have 30k rows for each combination of model and prompt.

In [27]:
merged_df_.groupby(["model", "prompt"]).size().reset_index(name="count")

Unnamed: 0,model,prompt,count
0,base,baseline,99750
1,mini,baseline,99750


In [28]:
merged_df_.to_csv("gpt4o-experiments-results-alt.csv")