In [1]:
import pickle
import os

with open(os.path.join("..", "data", "runs", "iqm_envs_c3po.pickle"), "rb") as f:
    iqm_scores, iqm_cis = pickle.load(f)

iqm_scores, iqm_cis

({'SafetyAntVelocity-v1': {'Reward': {'C3PO': array([  94.14362916, 1143.03286142, 1782.62650643, 2244.59333351,
           2439.08288733, 2630.29018628, 2715.0704779 , 2857.27203204,
           2974.47572551, 3043.58323649]),
    'C-TRPO': array([1766.57742302, 2621.18400222, 2791.08953942, 2910.7076351 ,
           2964.39357956, 3008.96305544, 3007.18883487, 3030.92420698,
           3062.32053972, 3019.66926891]),
    'CPO': array([1522.69467923, 2289.93662492, 2646.59903545, 2816.13363018,
           2989.45625349, 2996.67725151, 3003.39784295, 3063.91800921,
           3087.27596258, 3106.7091492 ]),
    'PPO-Lag': array([  44.64752112,  850.46453855, 1740.87764482, 2406.07964188,
           2611.01639871, 2825.70691856, 2972.83608587, 3019.75313962,
           3159.7161256 , 3210.73767024]),
    'CPPO-PID': array([  44.64752112,  850.46453855, 1641.2859461 , 2288.81459177,
           2580.97370037, 2816.48576643, 2968.07666147, 3029.28647766,
           3161.48292977, 3205.25982

In [2]:
from helpers import *
ALGOS = [
    "C3PO",
    "C-TRPO",
    "CPO",
    "PPO-Lag",
    "CPPO-PID",
    "P2BPO",
    "P3O",
    "PPO",
]

new_eval_score_dict = {}
for env in ENVS:
    for algo in ALGOS:
        new_eval_score_dict[algo.upper()] = {}
        for metric in ["Reward", "Final Cost"]:
            metric_name = metric #legacy
            new_eval_score_dict[algo.upper()][metric_name] = {}
            for env in ENVS:
                mean = round(iqm_scores[env][metric][algo][-1], 1)
                ci_up = round(iqm_cis[env][metric][algo][0,-1] - mean, 1)
                ci_down = round(iqm_cis[env][metric][algo][1,-1] - mean, 1)
                ci = max(ci_up, ci_down)
                mean, ci = (mean, ci) if mean < 100 else (int(mean), int(ci))
                new_eval_score_dict[algo.upper()][metric_name][env.replace("Safety", "").split("-")[0].strip("1")] = f"{mean} ± {ci}"

latex_lookup = {
    "Reward": r'$R$', "Final Cost": r'$C$'
}

df_eval = pd.DataFrame.from_dict({(i, latex_lookup[j]): new_eval_score_dict[i][j] 
                           for i in new_eval_score_dict.keys() 
                           for j in new_eval_score_dict[i].keys()},
                           orient='index')

df_eval

Unnamed: 0,Unnamed: 1,AntVelocity,HalfCheetahVelocity,HumanoidVelocity,HopperVelocity,CarButton,PointGoal,RacecarCircle,PointPush
C3PO,$R$,3043 ± 44,2458 ± 436,5389 ± 93,1674 ± 35,2.3 ± 0.7,23.8 ± 0.9,25.9 ± 5.1,4.5 ± 2.6
C3PO,$C$,15.0 ± 4.7,13.3 ± 6.4,1.2 ± 0.9,9.9 ± 1.7,53.4 ± 22.3,37.9 ± 1.7,5.0 ± 1.7,20.2 ± 10.0
C-TRPO,$R$,3019 ± 149,2841 ± 41,5746 ± 248,1621 ± 82,1.1 ± 0.2,19.3 ± 0.9,29.5 ± 3.1,1.0 ± 6.6
C-TRPO,$C$,13.2 ± 9.2,12.1 ± 7.6,12.2 ± 5.9,17.7 ± 8.0,34.0 ± 10.2,23.3 ± 3.6,20.2 ± 4.0,25.3 ± 7.0
CPO,$R$,3106 ± 21,2824 ± 104,5569 ± 349,1696 ± 19,1.1 ± 0.2,20.4 ± 2.0,29.8 ± 1.9,0.7 ± 2.9
CPO,$C$,25.1 ± 11.3,23.1 ± 8.0,16.2 ± 8.6,25.7 ± 4.4,33.5 ± 8.7,28.2 ± 4.1,23.1 ± 4.5,28.9 ± 20.0
PPO-LAG,$R$,3210 ± 85,3033 ± 1,5814 ± 122,240 ± 159,0.3 ± 0.8,9.4 ± 1.8,30.9 ± 1.8,0.6 ± 0.0
PPO-LAG,$C$,28.9 ± 8.7,23.2 ± 1.9,12.7 ± 31.0,38.8 ± 36.4,39.2 ± 41.1,22.5 ± 10.1,31.7 ± 2.7,18.2 ± 9.5
CPPO-PID,$R$,3205 ± 76,3036 ± 10,5877 ± 84,1657 ± 61,-1.2 ± 0.6,6.1 ± 4.8,8.1 ± 4.3,1.0 ± 1.1
CPPO-PID,$C$,26.2 ± 4.4,26.5 ± 7.2,20.3 ± 6.0,18.6 ± 8.1,23.8 ± 6.0,21.8 ± 6.8,33.3 ± 5.9,22.8 ± 9.9


In [3]:
ids = pd.IndexSlice
admissible = {
    env: list(df_eval[env].loc[ids[:, r"$C$"]].apply(lambda x: float(x.split(" ± ")[0]))[df_eval[env].loc[ids[:, r"$C$"]].apply(lambda x: float(x.split(" ± ")[0]) #+ float(x.split(" ± ")[1])
                                                                                                                                                                        ) < 25.0].index) for env in df_eval.columns
}
admissible

{'AntVelocity': ['C3PO', 'C-TRPO', 'P3O'],
 'HalfCheetahVelocity': ['C3PO', 'C-TRPO', 'CPO', 'PPO-LAG'],
 'HumanoidVelocity': ['C3PO',
  'C-TRPO',
  'CPO',
  'PPO-LAG',
  'CPPO-PID',
  'P2BPO',
  'P3O'],
 'HopperVelocity': ['C3PO', 'C-TRPO', 'CPPO-PID', 'P2BPO', 'P3O'],
 'CarButton': ['CPPO-PID'],
 'PointGoal': ['C-TRPO', 'PPO-LAG', 'CPPO-PID', 'P3O'],
 'RacecarCircle': ['C3PO', 'C-TRPO', 'CPO', 'P2BPO', 'P3O'],
 'PointPush': ['C3PO', 'PPO-LAG', 'CPPO-PID', 'P3O']}

In [5]:
from functools import partial

def bold_formatter(x, value):
    xn = round(float(x.split(" ± ")[0]), 1)
    if xn == value:
        return f"{{\\bfseries {x}}}"
    else:
        return f"{{{x}}}"

def underline_formatter(x, value):
    xn = round(float(x.split(" ± ")[0]), 1)
    if xn == value:
        return f"{{\\fbox{{{x}}}}}"
    else:
        return f"{{{x}}}"

ids = pd.IndexSlice

fmts1 = {column: partial(bold_formatter, value=df_eval[column].loc[ids[admissible[column], r"$R$"]].apply(lambda x: round(float(x.split(" ± ")[0]), 1)).max()) for column in df_eval.columns}

styled = df_eval.style.format(subset=ids[:, r"$R$", :], formatter=fmts1)

print(styled.to_latex(position_float="centering", hrules=True, multirow_align="t", multicol_align="r", clines="skip-last;data",))

\begin{table}
\centering
\begin{tabular}{llllllllll}
\toprule
 &  & AntVelocity & HalfCheetahVelocity & HumanoidVelocity & HopperVelocity & CarButton & PointGoal & RacecarCircle & PointPush \\
\midrule
\multirow[t]{2}{*}{C3PO} & $R$ & {3043 ± 44} & {2458 ± 436} & {5389 ± 93} & {\bfseries 1674 ± 35} & {2.3 ± 0.7} & {23.8 ± 0.9} & {25.9 ± 5.1} & {\bfseries 4.5 ± 2.6} \\
 & $C$ & 15.0 ± 4.7 & 13.3 ± 6.4 & 1.2 ± 0.9 & 9.9 ± 1.7 & 53.4 ± 22.3 & 37.9 ± 1.7 & 5.0 ± 1.7 & 20.2 ± 10.0 \\
\cline{1-10}
\multirow[t]{2}{*}{C-TRPO} & $R$ & {3019 ± 149} & {2841 ± 41} & {5746 ± 248} & {1621 ± 82} & {1.1 ± 0.2} & {\bfseries 19.3 ± 0.9} & {29.5 ± 3.1} & {1.0 ± 6.6} \\
 & $C$ & 13.2 ± 9.2 & 12.1 ± 7.6 & 12.2 ± 5.9 & 17.7 ± 8.0 & 34.0 ± 10.2 & 23.3 ± 3.6 & 20.2 ± 4.0 & 25.3 ± 7.0 \\
\cline{1-10}
\multirow[t]{2}{*}{CPO} & $R$ & {3106 ± 21} & {2824 ± 104} & {5569 ± 349} & {1696 ± 19} & {1.1 ± 0.2} & {20.4 ± 2.0} & {\bfseries 29.8 ± 1.9} & {0.7 ± 2.9} \\
 & $C$ & 25.1 ± 11.3 & 23.1 ± 8.0 & 16.2 ± 8.6 & 25.7 

In [6]:
from functools import partial

def bold_formatter(x, value):
    xn = round(float(x.split(" ± ")[0]), 1)
    if xn == value:
        return f"{{\\bfseries {x}}}"
    else:
        return f"{{{x}}}"

def underline_formatter(x, value):
    xn = round(float(x.split(" ± ")[0]), 1)
    if xn == value:
        return f"{{\\fbox{{{x}}}}}"
    else:
        return f"{{{x}}}"

ids = pd.IndexSlice

fmts1 = {column: partial(bold_formatter, value=df_eval[column].loc[ids[admissible[column], r"$R(\pi_\textrm{final})$"]].apply(lambda x: round(float(x.split(" ± ")[0]), 1)).max()) for column in df_eval.columns}

styled = df_eval[["AntVelocity", "HalfCheetahVelocity",	"HumanoidVelocity",	"HopperVelocity"]].style.format(subset=ids[:, r"$R(\pi_\textrm{final})$", :], formatter=fmts1)

styled

KeyError: '$R(\\pi_\\textrm{final})$'

In [None]:
print(styled.to_latex(position_float="centering", hrules=True, multirow_align="t", multicol_align="r", clines="skip-last;data",))

\begin{table}
\centering
\begin{tabular}{llllll}
\toprule
 &  & AntVelocity & HalfCheetahVelocity & HumanoidVelocity & HopperVelocity \\
\midrule
\multirow[t]{2}{*}{C3PO (OURS)} & $R(\pi_\textrm{final})$ & {2662 ± 114} & {2421 ± 209} & {5531 ± 110} & {1599 ± 63} \\
 & $C(\pi_\textrm{final})$ & 17.9 ± 12.1 & 22.7 ± 9.7 & 12.7 ± 7.3 & 21.3 ± 3.7 \\
\cline{1-6}
\multirow[t]{2}{*}{C-TRPO} & $R(\pi_\textrm{final})$ & {3019 ± 149} & {2841 ± 41} & {5746 ± 248} & {1621 ± 82} \\
 & $C(\pi_\textrm{final})$ & 13.2 ± 9.2 & 12.1 ± 7.6 & 12.2 ± 5.9 & 17.7 ± 8.0 \\
\cline{1-6}
\multirow[t]{2}{*}{CPO} & $R(\pi_\textrm{final})$ & {3106 ± 21} & {2824 ± 104} & {5569 ± 349} & {1696 ± 19} \\
 & $C(\pi_\textrm{final})$ & 25.1 ± 11.3 & 23.1 ± 8.0 & 16.2 ± 8.6 & 25.7 ± 4.4 \\
\cline{1-6}
\multirow[t]{2}{*}{PPO-LAG} & $R(\pi_\textrm{final})$ & {3210 ± 85} & {\bfseries 3033 ± 1} & {5814 ± 122} & {240 ± 159} \\
 & $C(\pi_\textrm{final})$ & 28.9 ± 8.7 & 23.2 ± 1.9 & 12.7 ± 31.0 & 38.8 ± 36.4 \\
\cline{1-6}
\multi

In [7]:
from functools import partial

def bold_formatter(x, value):
    xn = round(float(x.split(" ± ")[0]), 1)
    if xn == value:
        return f"{{\\bfseries{x}}}"
    else:
        return f"{{{x}}}"

def underline_formatter(x, value):
    xn = round(float(x.split(" ± ")[0]), 1)
    if xn == value:
        return f"{{\\fbox{{{x}}}}}"
    else:
        return f"{{{x}}}"

ids = pd.IndexSlice

fmts1 = {column: partial(bold_formatter, value=df_eval[column].loc[ids[admissible[column], r"$R(\pi_\textrm{final})$"]].apply(lambda x: round(float(x.split(" ± ")[0]), 1)).max()) for column in df_eval.columns}

styled = df_eval[["PointGoal", "PointPush", "CarButton", "RacecarCircle"]].style.format(subset=ids[:, r"$R(\pi_\textrm{final})$", :], formatter=fmts1)

styled

KeyError: '$R(\\pi_\\textrm{final})$'

In [8]:
print(styled.to_latex(position_float="centering", hrules=True, multirow_align="t", multicol_align="r", clines="skip-last;data",))

\begin{table}
\centering
\begin{tabular}{llllllllll}
\toprule
 &  & AntVelocity & HalfCheetahVelocity & HumanoidVelocity & HopperVelocity & CarButton & PointGoal & RacecarCircle & PointPush \\
\midrule
\multirow[t]{2}{*}{C3PO} & $R$ & {3043 ± 44} & {2458 ± 436} & {5389 ± 93} & {\bfseries 1674 ± 35} & {2.3 ± 0.7} & {23.8 ± 0.9} & {25.9 ± 5.1} & {\bfseries 4.5 ± 2.6} \\
 & $C$ & 15.0 ± 4.7 & 13.3 ± 6.4 & 1.2 ± 0.9 & 9.9 ± 1.7 & 53.4 ± 22.3 & 37.9 ± 1.7 & 5.0 ± 1.7 & 20.2 ± 10.0 \\
\cline{1-10}
\multirow[t]{2}{*}{C-TRPO} & $R$ & {3019 ± 149} & {2841 ± 41} & {5746 ± 248} & {1621 ± 82} & {1.1 ± 0.2} & {\bfseries 19.3 ± 0.9} & {29.5 ± 3.1} & {1.0 ± 6.6} \\
 & $C$ & 13.2 ± 9.2 & 12.1 ± 7.6 & 12.2 ± 5.9 & 17.7 ± 8.0 & 34.0 ± 10.2 & 23.3 ± 3.6 & 20.2 ± 4.0 & 25.3 ± 7.0 \\
\cline{1-10}
\multirow[t]{2}{*}{CPO} & $R$ & {3106 ± 21} & {2824 ± 104} & {5569 ± 349} & {1696 ± 19} & {1.1 ± 0.2} & {20.4 ± 2.0} & {\bfseries 29.8 ± 1.9} & {0.7 ± 2.9} \\
 & $C$ & 25.1 ± 11.3 & 23.1 ± 8.0 & 16.2 ± 8.6 & 25.7 