A Notebook for processing experimental results.

All runs correspond to a folder in `outputs/` from which we can load config information and results from json data.

In [1]:
import json
from pathlib import Path
import pandas as pd

In [2]:
# Set this to filter runs based on the cfg.experiment_name argument
experiment_name = None

# It can be a list like this:
# experiment_name = ["pythia-small", "pythia-big"]

In [11]:
folder_path = Path("outputs")
json_list = []

for json_file in folder_path.glob("*/*.json"):
    with open(json_file, "r", encoding="utf-8") as f:
        # Load the JSON content
        json_content = json.load(f)
        
        # Append the content as is (which will result in a single cell containing the JSON in the DataFrame)
        if experiment_name is not None:
            if "cfg_experiment_name" in json_content and json_content["cfg_experiment_name"] in experiment_name:
                json_list.append(json_content)
        else:
            json_list.append(json_content)
# Convert the list of JSON objects to a DataFrame
df = pd.DataFrame(json_list)

In [13]:
df.head().round(2)

Unnamed: 0,success,num_free_tokens,target_str,target_length,cfg_batch_size,cfg_discrete_optimizer,cfg_lr,cfg_model_name,cfg_num_steps,cfg_optimizer,...,cfg_input_str,cfg_target_str,cfg_chat_template,cfg_system_prompt,cfg_dataset,cfg_data_idx,cfg_random_weights,cfg_max_tokens,cfg_mini_batch_size,time
0,False,10,"To be or not to be, that is the question.",12,100,gcg,0.01,EleutherAI/pythia-14m,20,adam,...,,"To be or not to be, that is the question.","[, ]",,famous_quotes,0.0,False,10,100,20240412-08:06:00
1,False,10,jumps over the lazy dog,6,100,gcg,0.01,EleutherAI/pythia-14m,20,adam,...,,jumps over the lazy dog,"[, ]",,,,False,10,100,20240412-08:05:01
2,False,10,"I think, therefore I am.",7,100,gcg,0.01,EleutherAI/pythia-14m,20,adam,...,,"I think, therefore I am.","[, ]",,famous_quotes,1.0,False,10,100,20240412-08:07:23


In [14]:
# Add columns with compression ratio and memorization and sort
df["ratio"] = df["target_length"] / df["num_free_tokens"]
df["memorized"] = df["ratio"] > 1
df.loc[df["success"] == False, "memorized"] = False
df = df.sort_values(by=["cfg_model_name", "cfg_dataset", "cfg_data_idx", "cfg_discrete_optimizer"])

In [19]:
# Filter the rows where dataset is 'random', the model is 'EleutherAI/pythia-14m', and success is True
filtered_df = df[(df['cfg_dataset'] == 'famous_quotes') &
                 (df['cfg_model_name'] == 'EleutherAI/pythia-14m')
                 ]
filtered_df.head().round(2)

Unnamed: 0,success,num_free_tokens,target_str,target_length,cfg_batch_size,cfg_discrete_optimizer,cfg_lr,cfg_model_name,cfg_num_steps,cfg_optimizer,...,cfg_chat_template,cfg_system_prompt,cfg_dataset,cfg_data_idx,cfg_random_weights,cfg_max_tokens,cfg_mini_batch_size,time,ratio,memorized
0,False,10,"To be or not to be, that is the question.",12,100,gcg,0.01,EleutherAI/pythia-14m,20,adam,...,"[, ]",,famous_quotes,0.0,False,10,100,20240412-08:06:00,1.2,False
2,False,10,"I think, therefore I am.",7,100,gcg,0.01,EleutherAI/pythia-14m,20,adam,...,"[, ]",,famous_quotes,1.0,False,10,100,20240412-08:07:23,0.7,False


In [20]:
# Sort and dedup
df_sorted = df.sort_values(by=['cfg_model_name', 'cfg_discrete_optimizer', 'cfg_dataset', 'cfg_data_idx', 'success'], ascending=[True, True, True, True, False])
df = df_sorted.drop_duplicates(subset=['cfg_dataset', 'cfg_data_idx', 'cfg_model_name', 'cfg_discrete_optimizer'], keep='first')
df = df[df["success"] == True]

# Make summary counting the average ratio and success rate for each dataset and discrete_optimizer include counts
summary = df.groupby(["cfg_model_name", "cfg_dataset", "cfg_discrete_optimizer"]).agg(
    {"ratio": "mean", "memorized": "mean", "success": "count"}).round(2)
summary

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ratio,memorized,success
cfg_model_name,cfg_dataset,cfg_discrete_optimizer,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
