In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
import sys
sys.path.append("/project/")

from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter, PercentFormatter
import seaborn as sns
import matplotlib.lines as mlines

from scipy.stats import wilcoxon, ranksums, mannwhitneyu
from statsmodels.stats.multitest import multipletests


from utils import get_df

In [3]:
ALGO_LIST = [
    "mcpg_me",
    "dcg_me",
    "pga_me",
    "me",
    "memes",
    "ppga",
]

In [4]:
def filter(df_row):
    if df_row["algo"] == "pga_me":
        if df_row["batch_size"] != 1024:
            return 

    if df_row["algo"] == "me":
        if df_row["batch_size"] != 8192:
            return 
        
    if df_row["algo"] == "ppga":
        if df_row["batch_size"] != 6000:
            return 
        
    if df_row["algo"] == "memes":
        if df_row["batch_size"] != 8192:
            return 
        
    if df_row["algo"] == "dcg_me":
        if df_row["batch_size"] != 2048:
            return 
        
    if df_row["algo"] == "mcpg_me":
        if df_row["batch_size"] != 4096:
            return 
        

        
    if df_row["algo"] == "mcpg_me":
        if df_row["proportion_mutation_ga"] == 0 and df_row["greedy"] == 0:
            return "mcpg_only"
    if df_row["algo"] == "mcpg_me":
        if df_row["proportion_mutation_ga"] == 0 and df_row["greedy"] == 0.5:
            return "mcpg_only_05"
        
    if df_row["algo"] == "mcpg_me":
        if df_row["proportion_mutation_ga"] == 0 and df_row["greedy"] == 1:
            return "mcpg_only_1"
        

    if df_row["algo"] == "mcpg_me":
        if df_row["proportion_mutation_ga"] == 0.5 and df_row["greedy"] == 0.5:
            return "mcpg_me_05"
        
    if df_row["algo"] == "mcpg_me":
        if df_row["proportion_mutation_ga"] == 0.5 and df_row["greedy"] == 1:
            return "mcpg_me_1"
        

    return df_row["algo"]

In [5]:
results_dir = Path("fig1/output/")
EPISODE_LENGTH = 250
df = get_df(results_dir, EPISODE_LENGTH)
df['algo'] = df.apply(filter, axis=1)
df = df[df["algo"].isin(ALGO_LIST)]
# df = df[df["batch_size"].isin(BATCH_LIST)]
#df = df[df["num_evaluations"] <= 1_005_000]

ant_omni_250
ant_uni_250
anttrap_omni_250
hopper_uni_250
walker2d_uni_250


In [6]:
df = df[df['env'].isin(['hopper_uni_250', 'walker2d_uni_250'])]


In [7]:
df_1mil_ev = df[df['num_evaluations'] <= 1_004_096]


In [10]:
idx = df_1mil_ev.groupby(['env', 'algo', 'run'])['iteration'].idxmax()
df_1mil_ev_last_itr = df_1mil_ev.loc[idx]

In [11]:
METRICS_LIST = ["qd_score", "time"]
ENV_LIST = ["hopper_uni_250", "walker2d_uni_250"]
ALGO_LIST = ["mcpg_me", "dcg_me"]
P_VALUE_LIST = [
    ["qd_score", "hopper_uni_250", "mcpg_me", "dcg_me"],
    ["qd_score", "walker2d_uni_250", "mcpg_me", "dcg_me"],
    ["time", "hopper_uni_250", "mcpg_me", "dcg_me"],
    ["time", "walker2d_uni_250", "mcpg_me", "dcg_me"],
]

In [12]:
p_value_df = pd.DataFrame(columns=["metric", "env", "algo_1", "algo_2", "p_value"])
for metric in METRICS_LIST:
    for env in ENV_LIST:
        for algo_1 in ALGO_LIST:
            for algo_2 in ALGO_LIST:
                stat = mannwhitneyu(
                    df_1mil_ev_last_itr[(df_1mil_ev_last_itr["env"] == env) & (df_1mil_ev_last_itr["algo"] == algo_1)][metric],
                    df_1mil_ev_last_itr[(df_1mil_ev_last_itr["env"] == env) & (df_1mil_ev_last_itr["algo"] == algo_2)][metric],
                )
                p_value_df.loc[len(p_value_df)] = {"metric": metric, "env": env, "algo_1": algo_1, "algo_2": algo_2, "p_value": stat.pvalue}

# Filter p-values
p_value_df.set_index(["metric", "env", "algo_1", "algo_2"], inplace=True)
p_value_df = p_value_df.loc[P_VALUE_LIST]

# Correct p-values
p_value_df.reset_index(inplace=True)
p_value_df["p_value_corrected"] = multipletests(p_value_df["p_value"], method="holm")[1]
p_value_df = p_value_df.pivot(index=["env", "algo_1", "algo_2"], columns="metric", values="p_value_corrected")

In [13]:
p_value_df

Unnamed: 0_level_0,Unnamed: 1_level_0,metric,qd_score,time
env,algo_1,algo_2,Unnamed: 3_level_1,Unnamed: 4_level_1
hopper_uni_250,mcpg_me,dcg_me,0.000188,0.000188
walker2d_uni_250,mcpg_me,dcg_me,0.000188,0.000188


In [14]:
df_1mil_ev_last_itr = df_1mil_ev_last_itr.groupby(['env', 'algo'])[['qd_score', 'time']].median().reset_index()
df_1mil_ev_last_itr

Unnamed: 0,env,algo,qd_score,time
0,hopper_uni_250,dcg_me,526575.5,325.476532
1,hopper_uni_250,mcpg_me,680010.875,63.191025
2,hopper_uni_250,me,401396.375,15.809587
3,hopper_uni_250,memes,115590.742188,363.499847
4,hopper_uni_250,pga_me,474019.890625,694.029846
5,hopper_uni_250,ppga,157146.15625,273.221466
6,walker2d_uni_250,dcg_me,644697.75,368.894531
7,walker2d_uni_250,mcpg_me,767375.59375,77.591843
8,walker2d_uni_250,me,433819.34375,24.913748
9,walker2d_uni_250,memes,109119.800781,378.308105


In [15]:
df_1mil_ev_last_itr_top_2 = df_1mil_ev_last_itr[df_1mil_ev_last_itr['algo'].isin(["mcpg_me", "dcg_me"])]
df_1mil_ev_last_itr_top_2


Unnamed: 0,env,algo,qd_score,time
0,hopper_uni_250,dcg_me,526575.5,325.476532
1,hopper_uni_250,mcpg_me,680010.875,63.191025
6,walker2d_uni_250,dcg_me,644697.75,368.894531
7,walker2d_uni_250,mcpg_me,767375.59375,77.591843


In [16]:
# Calculate mean QD score for each algorithm
mcpg_qd = df_1mil_ev_last_itr_top_2[df_1mil_ev_last_itr_top_2['algo'] == 'mcpg_me']['qd_score'].mean()
dcg_qd = df_1mil_ev_last_itr_top_2[df_1mil_ev_last_itr_top_2['algo'] == 'dcg_me']['qd_score'].mean()

# Calculate mean time for each algorithm
mcpg_time = df_1mil_ev_last_itr_top_2[df_1mil_ev_last_itr_top_2['algo'] == 'mcpg_me']['time'].mean()
dcg_time = df_1mil_ev_last_itr_top_2[df_1mil_ev_last_itr_top_2['algo'] == 'dcg_me']['time'].mean()

# Calculate differences and ratios
qd_diff = mcpg_qd - dcg_qd
qd_percent = (mcpg_qd / dcg_qd - 1) * 100
time_speedup = dcg_time / mcpg_time

print(f"MCPG-ME achieves {qd_diff:.2f} higher QD score on average ({qd_percent:.1f}% improvement)")
print(f"MCPG-ME is {time_speedup:.1f}x faster than DCG-ME")

MCPG-ME achieves 138056.61 higher QD score on average (23.6% improvement)
MCPG-ME is 4.9x faster than DCG-ME
