# Ablations for experiments

In this notebook, we conduct the ablation studies for the following experiments:
- **P1**: Second autocorrelation inequality (AlphaEvolve's B2) 
- **P2**: Minimizing the ratio of maximum to minimum distance (AlphaEvolve's B8) 
- **P3**: Packing circles inside a unit square to maximize sum of radii (AlphaEvolve's B12) 
- **P4**: Packing circles inside a rectangle of perimeter 4 to maximize sum of radii (AlphaEvolve's B13)  

In [None]:
import sys

sys.path.insert(0, "../")

import os

import pandas as pd

from data_analysis_utils.processing_utils import process_experiments
from data_analysis_utils.plotting_utils import (
    plot_experiments_statistical_summary,
    plot_experiments_mean_std,
)

## Loading and processing experiments

In [None]:
model_names = ["GOOGLE_GEMINI-2.5-FLASH", "GOOGLE_GEMINI-2.5-PRO"]
model2cost = {
    "GOOGLE_GEMINI-2.5-FLASH": {"prompt_pm": 0.3, "compl_pm": 2.5},
    "GOOGLE_GEMINI-2.5-PRO": {"prompt_pm": 1.25, "compl_pm": 10},
}
prob_path = f"../problems/alphaevolve_math_problems"
exp_path = "../experiments/alphaevolve_math_problems"

args_list = [
    {
        "inpt_dir": f"{prob_path}/second_autocorr_ineq/",
        "out_dirs": [
            f"{exp_path}/second_autocorr_ineq/gemini_no_evolve_1/",
            f"{exp_path}/second_autocorr_ineq/gemini_no_mp_or_insp_1/",
            f"{exp_path}/second_autocorr_ineq/gemini_mp_1/",
            f"{exp_path}/second_autocorr_ineq/gemini_insp_1/",
            f"{exp_path}/second_autocorr_ineq/gemini_mp_insp_1/",
            f"{exp_path}/second_autocorr_ineq/gemini_no_evolve_2/",
            f"{exp_path}/second_autocorr_ineq/gemini_no_mp_or_insp_2/",
            f"{exp_path}/second_autocorr_ineq/gemini_mp_2/",
            f"{exp_path}/second_autocorr_ineq/gemini_insp_2/",
            f"{exp_path}/second_autocorr_ineq/gemini_mp_insp_2/",
            f"{exp_path}/second_autocorr_ineq/gemini_no_evolve_3/",
            f"{exp_path}/second_autocorr_ineq/gemini_no_mp_or_insp_3/",
            f"{exp_path}/second_autocorr_ineq/gemini_mp_3/",
            f"{exp_path}/second_autocorr_ineq/gemini_insp_3/",
            f"{exp_path}/second_autocorr_ineq/gemini_mp_insp_3/",
        ],
        "sol_func_name": "run",
    },
    {
        "inpt_dir": f"{prob_path}/minimizing_max_min_dist/2/",
        "out_dirs": [
            f"{exp_path}/minimizing_max_min_dist/2/gemini_mp/",
            f"{exp_path}/minimizing_max_min_dist/2/gemini_mp2/",
            f"{exp_path}/minimizing_max_min_dist/2/gemini_mp_insp/",
            f"{exp_path}/minimizing_max_min_dist/2/gemini_mp_insp_nocontext/",
            f"{exp_path}/minimizing_max_min_dist/2/gemini_rank/",
            f"{exp_path}/minimizing_max_min_dist/2/gemini_no_evolve/",
            f"{exp_path}/minimizing_max_min_dist/2/gemini_insp/",
        ],
        "sol_func_name": "min_max_dist_dim2_16",
    },
    {
        "inpt_dir": f"{prob_path}/minimizing_max_min_dist/3/",
        "out_dirs": [
            f"{exp_path}/minimizing_max_min_dist/3/gemini_mp_insp/",
            f"{exp_path}/minimizing_max_min_dist/3/gemini_mp_insp_nocontext/",
        ],
        "sol_func_name": "min_max_dist_dim3_14",
    },
    {
        "inpt_dir": f"{prob_path}/circle_packing_square/26/",
        "out_dirs": [
            f"{exp_path}/circle_packing_square/26/gemini_no_evolve_1/",
            f"{exp_path}/circle_packing_square/26/gemini_no_mp_or_insp_1/",
            f"{exp_path}/circle_packing_square/26/gemini_mp_1/",
            f"{exp_path}/circle_packing_square/26/gemini_insp_1/",
            f"{exp_path}/circle_packing_square/26/gemini_mp_insp_1/",
            f"{exp_path}/circle_packing_square/26/gemini_no_evolve_2/",
            f"{exp_path}/circle_packing_square/26/gemini_no_mp_or_insp_2/",
            f"{exp_path}/circle_packing_square/26/gemini_mp_2/",
            f"{exp_path}/circle_packing_square/26/gemini_insp_2/",
            f"{exp_path}/circle_packing_square/26/gemini_mp_insp_2/",
            f"{exp_path}/circle_packing_square/26/gemini_no_evolve_3/",
            f"{exp_path}/circle_packing_square/26/gemini_no_mp_or_insp_3/",
            f"{exp_path}/circle_packing_square/26/gemini_mp_3/",
            f"{exp_path}/circle_packing_square/26/gemini_insp_3/",
            f"{exp_path}/circle_packing_square/26/gemini_mp_insp_3/",
        ],
        "sol_func_name": "circle_packing26",
    },
    {
        "inpt_dir": f"{prob_path}/circle_packing_square/32/",
        "out_dirs": [
            f"{exp_path}/circle_packing_square/32/gemini_no_evolve_1/",
            f"{exp_path}/circle_packing_square/32/gemini_no_mp_or_insp_1/",
            f"{exp_path}/circle_packing_square/32/gemini_mp_1/",
            f"{exp_path}/circle_packing_square/32/gemini_insp_1/",
            f"{exp_path}/circle_packing_square/32/gemini_mp_insp_1/",
            f"{exp_path}/circle_packing_square/32/gemini_no_evolve_2/",
            f"{exp_path}/circle_packing_square/32/gemini_no_mp_or_insp_2/",
            f"{exp_path}/circle_packing_square/32/gemini_mp_2/",
            f"{exp_path}/circle_packing_square/32/gemini_insp_2/",
            f"{exp_path}/circle_packing_square/32/gemini_mp_insp_2/",
            f"{exp_path}/circle_packing_square/32/gemini_no_evolve_3/",
            f"{exp_path}/circle_packing_square/32/gemini_no_mp_or_insp_3/",
            f"{exp_path}/circle_packing_square/32/gemini_mp_3/",
            f"{exp_path}/circle_packing_square/32/gemini_insp_3/",
            f"{exp_path}/circle_packing_square/32/gemini_mp_insp_3/",
        ],
        "sol_func_name": "circle_packing32",
    },
    {
        "inpt_dir": f"{prob_path}/circle_packing_rect/",
        "out_dirs": [
            f"{exp_path}/circle_packing_rect/gemini_mp_insp/",
            f"{exp_path}/circle_packing_rect/gemini_mp_insp_nocontext/",
        ],
        "sol_func_name": "circle_packing21",
    },
]
inpt_dir2index = {args_list[i]["inpt_dir"]: i for i in range(len(args_list))}

overview_df_rows = []
all_exp = {}

for args in args_list:
    experiments_res = process_experiments(args, model_names, model2cost)
    all_exp[args["inpt_dir"]] = experiments_res

    for exp in experiments_res.keys():
        best_isl = experiments_res[exp]["df"]["best_fitness"].idxmax()
        overview_df_rows.append(
            [
                f"{args['inpt_dir']}{exp}",
                experiments_res[exp]["df"]["best_fitness"].iloc[best_isl],
                experiments_res[exp]["df"]["epoch_best_found"].iloc[best_isl],
                experiments_res[exp]["df"]["num_epochs"].iloc[best_isl],
                experiments_res[exp]["cost"],
            ]
        )

overview_df = pd.DataFrame(
    overview_df_rows,
    columns=["exp_name", "best_fitness", "epoch_best_found", "num_epochs", "est_cost"],
)

overview_df

## Ablations

In [None]:
os.makedirs("figs/", exist_ok=True)

### P1

In [None]:
args = args_list[inpt_dir2index[f"{prob_path}/second_autocorr_ineq/"]]
print(args)

experiments_res = all_exp[args["inpt_dir"]]

In [None]:
plot_experiments_mean_std(
    {
        "no evolution": [
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_no_evolve_1/"]["res"],
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_no_evolve_2/"]["res"],
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_no_evolve_3/"]["res"],
        ],
        "no mp or insp": [
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_no_mp_or_insp_1/"]["res"],
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_no_mp_or_insp_2/"]["res"],
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_no_mp_or_insp_3/"]["res"],
        ],
        "mp": [
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_mp_1/"]["res"],
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_mp_2/"]["res"],
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_mp_3/"]["res"],
        ],
        "insp": [
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_insp_1/"]["res"],
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_insp_2/"]["res"],
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_insp_3/"]["res"],
        ],
        "mp + insp": [
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_mp_insp_1/"]["res"],
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_mp_insp_2/"]["res"],
            experiments_res[f"{exp_path}/second_autocorr_ineq/gemini_mp_insp_3/"]["res"],
        ],
    },
    epsilon=1e-4,
    title="Ablations for experiment P1",
    save_path="figs/P1_ablations.pdf",
)

### P2

#### P2.A (n = 16, d = 2)

In [None]:
args = args_list[inpt_dir2index[f"{prob_path}/minimizing_max_min_dist/2/"]]
print(args)

experiments_res = all_exp[args["inpt_dir"]]

In [None]:
plot_experiments_statistical_summary(
    {
        "no evolution": experiments_res[f"{exp_path}/minimizing_max_min_dist/2/gemini_no_evolve/"][
            "res"
        ],
        "no mp or insp": experiments_res[f"{exp_path}/minimizing_max_min_dist/2/gemini_rank/"][
            "res"
        ],
        "mp": experiments_res[f"{exp_path}/minimizing_max_min_dist/2/gemini_mp2/"]["res"],
        "insp": experiments_res[f"{exp_path}/minimizing_max_min_dist/2/gemini_insp/"]["res"],
        "mp + insp": experiments_res[
            f"{exp_path}/minimizing_max_min_dist/2/gemini_mp_insp_nocontext/"
        ]["res"],
    },
    epsilon=1e-4,
    title="Ablations for experiment P2.A",
    save_path="figs/P2A_ablations.pdf",
)

#### P2.B (n = 14, d = 3)

In [None]:
args = args_list[inpt_dir2index[f"{prob_path}/minimizing_max_min_dist/3/"]]
print(args)

experiments_res = all_exp[args["inpt_dir"]]

### P3

#### P3.A (n = 26)

In [None]:
args = args_list[inpt_dir2index[f"{prob_path}/circle_packing_square/26/"]]
print(args)

experiments_res = all_exp[args["inpt_dir"]]

In [None]:
plot_experiments_mean_std(
    {
        "no evolution": [
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_no_evolve_1/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_no_evolve_2/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_no_evolve_3/"]["res"],
        ],
        "no mp or insp": [
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_no_mp_or_insp_1/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_no_mp_or_insp_2/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_no_mp_or_insp_3/"]["res"],
        ],
        "mp": [
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_mp_1/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_mp_2/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_mp_3/"]["res"],
        ],
        "insp": [
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_insp_1/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_insp_2/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_insp_3/"]["res"],
        ],
        "mp + insp": [
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_mp_insp_1/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_mp_insp_2/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/26/gemini_mp_insp_3/"]["res"],
        ],
    },
    epsilon=1e-4,
    title="Ablations for experiment P3.A",
    save_path="figs/P3A_ablations.pdf",
)

#### P3.B (n = 32)

In [None]:
args = args_list[inpt_dir2index[f"{prob_path}/circle_packing_square/32/"]]
print(args)

experiments_res = all_exp[args["inpt_dir"]]

In [None]:
plot_experiments_mean_std(
    {
        "no evolution": [
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_no_evolve_1/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_no_evolve_2/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_no_evolve_3/"]["res"],
        ],
        "no mp or insp": [
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_no_mp_or_insp_1/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_no_mp_or_insp_2/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_no_mp_or_insp_3/"]["res"],
        ],
        "mp": [
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_mp_1/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_mp_2/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_mp_3/"]["res"],
        ],
        "insp": [
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_insp_1/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_insp_2/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_insp_3/"]["res"],
        ],
        "mp + insp": [
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_mp_insp_1/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_mp_insp_2/"]["res"],
            experiments_res[f"{exp_path}/circle_packing_square/32/gemini_mp_insp_3/"]["res"],
        ],
    },
    epsilon=1e-4,
    title="Ablations for experiment P3.B",
    save_path="figs/P3B_ablations.pdf",
)

### P4

In [None]:
args = args_list[inpt_dir2index[f"{prob_path}/circle_packing_rect/"]]
print(args)

experiments_res = all_exp[args["inpt_dir"]]