In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:


from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter, PercentFormatter
import seaborn as sns
import matplotlib.lines as mlines
from scipy.stats import wilcoxon, ranksums, mannwhitneyu
from statsmodels.stats.multitest import multipletests


from utils import get_df

In [3]:
ALGO_LIST = [
    "mcpg_me",
    "dcg_me",
    "pga_me",
    "me",
    "memes",
    "ppga",
]

In [4]:
def filter(df_row):
    if df_row["algo"] == "pga_me":
        if df_row["batch_size"] != 1024:
            return 

    if df_row["algo"] == "me":
        if df_row["batch_size"] != 8192:
            return 
        
    if df_row["algo"] == "ppga":
        if df_row["batch_size"] != 6000:
            return 
        
    if df_row["algo"] == "memes":
        if df_row["batch_size"] != 8192:
            return 
        
    if df_row["algo"] == "dcg_me":
        if df_row["batch_size"] != 2048:
            return 
        
    if df_row["algo"] == "mcpg_me":
        if df_row["batch_size"] != 4096:
            return 
        

        
    if df_row["algo"] == "mcpg_me":
        if df_row["proportion_mutation_ga"] == 0 and df_row["greedy"] == 0:
            return "mcpg_only"
    if df_row["algo"] == "mcpg_me":
        if df_row["proportion_mutation_ga"] == 0 and df_row["greedy"] == 0.5:
            return "mcpg_only_05"
        
    if df_row["algo"] == "mcpg_me":
        if df_row["proportion_mutation_ga"] == 0 and df_row["greedy"] == 1:
            return "mcpg_only_1"
        

    if df_row["algo"] == "mcpg_me":
        if df_row["proportion_mutation_ga"] == 0.5 and df_row["greedy"] == 0.5:
            return "mcpg_me_05"
        
    if df_row["algo"] == "mcpg_me":
        if df_row["proportion_mutation_ga"] == 0.5 and df_row["greedy"] == 1:
            return "mcpg_me_1"
        

    return df_row["algo"]

In [5]:
results_dir = Path("fig1/output/")
EPISODE_LENGTH = 250
df = get_df(results_dir, EPISODE_LENGTH)
df['algo'] = df.apply(filter, axis=1)
df = df[df["algo"].isin(ALGO_LIST)]
# df = df[df["batch_size"].isin(BATCH_LIST)]
#df = df[df["num_evaluations"] <= 1_005_000]

ant_omni_250
ant_uni_250
anttrap_omni_250
hopper_uni_250
walker2d_uni_250


In [115]:
df = df[df['env'].isin(['ant_omni_250', "anttrap_omni_250"])]

In [136]:
df_1mil_ev = df[df['num_evaluations'] <= 1_001_024]

In [117]:
per_env_groups = df_1mil_ev.groupby('env')

max_time_per_env = per_env_groups['time'].max()

In [118]:
max_time_per_env

env
ant_omni_250        834.290894
anttrap_omni_250    943.068481
Name: time, dtype: float32

In [119]:
half_time_per_env = max_time_per_env 

In [120]:
half_time_per_env

env
ant_omni_250        834.290894
anttrap_omni_250    943.068481
Name: time, dtype: float32

In [121]:
df_half_time = df[df.apply(lambda row: row['time'] <= half_time_per_env[row['env']], axis=1)]

In [122]:
idx = df_half_time.groupby(['env', 'algo', 'run'])['time'].idxmax()
df_half_time_last_itr = df_half_time.loc[idx]
df_half_time_last_itr


Unnamed: 0,ai_offspring_added,coverage,ga_offspring_added,iteration,max_fitness,qd_score,qpg_offspring_added,time,env,algo,batch_size,num_critic_training_steps,num_pg_training_steps,training_batch_size,run,num_evaluations,proportion_mutation_ga,no_epochs,greedy,evaluation
19490,0.0,0.989258,7.0,1001,1006.637329,967720.562500,0.0,828.225037,ant_omni_250,dcg_me,2048,3000.0,150.0,100.0,2025-01-20_182537_017372,2050048,,,,
20720,9.0,0.992188,12.0,1001,1006.596252,970777.875000,0.0,831.550049,ant_omni_250,dcg_me,2048,3000.0,150.0,100.0,2025-01-20_182539_621908,2050048,,,,
21940,6.0,0.991211,17.0,1001,1006.555664,950646.375000,0.0,830.249146,ant_omni_250,dcg_me,2048,3000.0,150.0,100.0,2025-01-20_182717_481258,2050048,,,,
23170,5.0,0.978516,1.0,1001,1006.638550,960608.687500,0.0,829.842712,ant_omni_250,dcg_me,2048,3000.0,150.0,100.0,2025-01-20_182745_627814,2050048,,,,
24390,2.0,0.995117,8.0,991,1006.534851,964432.687500,0.0,827.225403,ant_omni_250,dcg_me,2048,3000.0,150.0,100.0,2025-01-20_182751_572627,2029568,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1037617,,0.132812,,61,895.361572,107969.640625,,931.493835,anttrap_omni_250,ppga,6000,,,,2025-01-22_231241_258658,2422920,,,,2422920.0
1037716,,0.095703,,61,941.617249,76403.687500,,940.016052,anttrap_omni_250,ppga,6000,,,,2025-01-22_231248_204271,2422920,,,,2422920.0
1037815,,0.133789,,61,932.129456,99209.539062,,934.782166,anttrap_omni_250,ppga,6000,,,,2025-01-22_231430_314186,2422920,,,,2422920.0
1037913,,0.017578,,61,997.976807,15164.122070,,933.127136,anttrap_omni_250,ppga,6000,,,,2025-01-22_231510_376254,2422920,,,,2422920.0


In [123]:
df_half_time_last_itr = df_half_time_last_itr[['env', 'algo', 'qd_score', 'time']]

In [124]:
df_half_time_last_itr = df_half_time_last_itr[df_half_time_last_itr['algo'].isin(['mcpg_me', "dcg_me"])]

In [125]:
df_half_time_last_itr_median = df_half_time_last_itr.groupby(['env', 'algo'])[['qd_score', 'time']].median()
df_half_time_last_itr_median

Unnamed: 0_level_0,Unnamed: 1_level_0,qd_score,time
env,algo,Unnamed: 2_level_1,Unnamed: 3_level_1
ant_omni_250,dcg_me,969795.6875,829.466248
ant_omni_250,mcpg_me,926686.625,832.403625
anttrap_omni_250,dcg_me,950421.9375,938.665497
anttrap_omni_250,mcpg_me,888099.46875,940.474304


In [126]:
METRICS_LIST = ["qd_score", "time"]
ENV_LIST = ["ant_omni_250", "anttrap_omni_250"]
ALGO_LIST = ["mcpg_me", "dcg_me"]
P_VALUE_LIST = [
    ["qd_score", "ant_omni_250", "mcpg_me", "dcg_me"],
    ["qd_score", "anttrap_omni_250", "mcpg_me", "dcg_me"],
    ["time", "ant_omni_250", "mcpg_me", "dcg_me"],
    ["time", "anttrap_omni_250", "mcpg_me", "dcg_me"],
]

In [127]:
# Compute p-values
p_value_df = pd.DataFrame(columns=["metric", "env", "algo_1", "algo_2", "p_value"])
for metric in METRICS_LIST:
    for env in ENV_LIST:
        for algo_1 in ALGO_LIST:
            for algo_2 in ALGO_LIST:
                stat = mannwhitneyu(
                    df_half_time_last_itr[(df_half_time_last_itr["env"] == env) & (df_half_time_last_itr["algo"] == algo_1)][metric],
                    df_half_time_last_itr[(df_half_time_last_itr["env"] == env) & (df_half_time_last_itr["algo"] == algo_2)][metric],
                )
                p_value_df.loc[len(p_value_df)] = {"metric": metric, "env": env, "algo_1": algo_1, "algo_2": algo_2, "p_value": stat.pvalue}

# Filter p-values
p_value_df.set_index(["metric", "env", "algo_1", "algo_2"], inplace=True)
p_value_df = p_value_df.loc[P_VALUE_LIST]

# Correct p-values
p_value_df.reset_index(inplace=True)
p_value_df["p_value_corrected"] = multipletests(p_value_df["p_value"], method="holm")[1]
p_value_df = p_value_df.pivot(index=["env", "algo_1", "algo_2"], columns="metric", values="p_value_corrected")

In [128]:
p_value_df

Unnamed: 0_level_0,Unnamed: 1_level_0,metric,qd_score,time
env,algo_1,algo_2,Unnamed: 3_level_1,Unnamed: 4_level_1
ant_omni_250,mcpg_me,dcg_me,0.000182,0.000276
anttrap_omni_250,mcpg_me,dcg_me,0.002834,0.007717


In [137]:
idx = df_1mil_ev.groupby(['env', 'algo', 'run'])['time'].idxmax()
df_1mil_ev_last_itr = df_1mil_ev.loc[idx]

In [139]:
df_1mil_ev_last_itr

Unnamed: 0,ai_offspring_added,coverage,ga_offspring_added,iteration,max_fitness,qd_score,qpg_offspring_added,time,env,algo,batch_size,num_critic_training_steps,num_pg_training_steps,training_batch_size,run,num_evaluations,proportion_mutation_ga,no_epochs,greedy,evaluation
18970,2.0,0.902344,16.0,481,1006.618896,865821.500000,0.0,409.327606,ant_omni_250,dcg_me,2048,3000.0,150.0,100.0,2025-01-20_182537_017372,985088,,,,
20200,1.0,0.917969,22.0,481,1006.596252,877842.187500,0.0,411.018738,ant_omni_250,dcg_me,2048,3000.0,150.0,100.0,2025-01-20_182539_621908,985088,,,,
21420,5.0,0.762695,32.0,481,1006.538879,703154.062500,0.0,409.938904,ant_omni_250,dcg_me,2048,3000.0,150.0,100.0,2025-01-20_182717_481258,985088,,,,
22650,9.0,0.891602,23.0,481,1006.539978,850385.500000,0.0,409.822601,ant_omni_250,dcg_me,2048,3000.0,150.0,100.0,2025-01-20_182745_627814,985088,,,,
23880,3.0,0.912109,26.0,481,1006.516968,862076.312500,0.0,412.916595,ant_omni_250,dcg_me,2048,3000.0,150.0,100.0,2025-01-20_182751_572627,985088,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1037581,,0.096680,,25,869.964600,76744.507812,,391.875519,anttrap_omni_250,ppga,6000,,,,2025-01-22_231241_258658,993000,,,,993000.0
1037680,,0.088867,,25,789.847107,61193.281250,,400.597351,anttrap_omni_250,ppga,6000,,,,2025-01-22_231248_204271,993000,,,,993000.0
1037779,,0.119141,,25,771.435608,80035.531250,,393.021942,anttrap_omni_250,ppga,6000,,,,2025-01-22_231430_314186,993000,,,,993000.0
1037877,,0.017578,,25,977.091675,14977.866211,,394.135010,anttrap_omni_250,ppga,6000,,,,2025-01-22_231510_376254,993000,,,,993000.0


In [140]:
METRICS_LIST = ["qd_score", "time"]
ENV_LIST = ["ant_omni_250", "anttrap_omni_250"]
ALGO_LIST = ["mcpg_me", "dcg_me", "pga_me", "me", "memes", "ppga"]
P_VALUE_LIST = [
    ["qd_score", "ant_omni_250", "mcpg_me", "dcg_me"],
    ["qd_score", "anttrap_omni_250", "mcpg_me", "dcg_me"],
    ["qd_score", "ant_omni_250", "mcpg_me", "pga_me"],
    ["qd_score", "ant_omni_250", "mcpg_me", "me"],
    ["qd_score", "ant_omni_250", "mcpg_me", "memes"],
    ["qd_score", "ant_omni_250", "mcpg_me", "ppga"],
    ["qd_score", "anttrap_omni_250", "mcpg_me", "pga_me"],
    ["qd_score", "anttrap_omni_250", "mcpg_me", "me"],
    ["qd_score", "anttrap_omni_250", "mcpg_me", "memes"],
    ["qd_score", "anttrap_omni_250", "mcpg_me", "ppga"],

]


In [141]:
# Compute p-values
p_value_df = pd.DataFrame(columns=["metric", "env", "algo_1", "algo_2", "p_value"])
for metric in METRICS_LIST:
    for env in ENV_LIST:
        for algo_1 in ALGO_LIST:
            for algo_2 in ALGO_LIST:
                stat = mannwhitneyu(
                    df_1mil_ev_last_itr[(df_1mil_ev_last_itr["env"] == env) & (df_1mil_ev_last_itr["algo"] == algo_1)][metric],
                    df_1mil_ev_last_itr[(df_1mil_ev_last_itr["env"] == env) & (df_1mil_ev_last_itr["algo"] == algo_2)][metric],
                )
                p_value_df.loc[len(p_value_df)] = {"metric": metric, "env": env, "algo_1": algo_1, "algo_2": algo_2, "p_value": stat.pvalue}

# Filter p-values
p_value_df.set_index(["metric", "env", "algo_1", "algo_2"], inplace=True)
p_value_df = p_value_df.loc[P_VALUE_LIST]

# Correct p-values
p_value_df.reset_index(inplace=True)
p_value_df["p_value_corrected"] = multipletests(p_value_df["p_value"], method="holm")[1]
p_value_df = p_value_df.pivot(index=["env", "algo_1", "algo_2"], columns="metric", values="p_value_corrected")

In [142]:
p_value_df

Unnamed: 0_level_0,Unnamed: 1_level_0,metric,qd_score
env,algo_1,algo_2,Unnamed: 3_level_1
ant_omni_250,mcpg_me,dcg_me,0.005004
ant_omni_250,mcpg_me,me,0.000359
ant_omni_250,mcpg_me,memes,0.000359
ant_omni_250,mcpg_me,pga_me,0.000359
ant_omni_250,mcpg_me,ppga,0.000516
anttrap_omni_250,mcpg_me,dcg_me,0.074116
anttrap_omni_250,mcpg_me,me,0.000359
anttrap_omni_250,mcpg_me,memes,0.000359
anttrap_omni_250,mcpg_me,pga_me,0.000359
anttrap_omni_250,mcpg_me,ppga,0.000516
