In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

pd.options.display.float_format = "{:.3f}".format

In [12]:
def seed_read_results(env_name, files, keys, seeds, update=True):
    parent = "results_chessworld/" + env_name + "/"
    final_dfs = {}

    for seed in seeds:
        all_dfs = []
        for file in files:

            # For GCN run on the correct curriculum on seed 5, replacing old incorrect curriculum
            if seed == 5 and file == 'ChessWorld-v1_gcn_formula_update2' and update:
                cur_name = parent + str(seed) + '/ChessWorld-v1_gcn_formula_update_quick.csv'

            else:
                cur_name = parent + str(seed) + "/" + file + ".csv"

            

            cur_df = pd.read_csv(cur_name)
            cur_df.set_index(["Task Set", "Task ID"], inplace=True)

            if "Successes x/29" in cur_df.columns:
                cur_df["Successes x/29"] *= (float(100)/29)
    
            if "Avg Discounted Return" in cur_df.columns:
                # cur_df.drop(columns=["Avg Discounted Return"], inplace=True)
                cur_df.drop(columns=["Avg Steps"], inplace=True)
    
            all_dfs.append(cur_df)
    
        df_concat = pd.concat(all_dfs, axis=1, keys=keys)
        final_dfs[seed] = df_concat

    df_mean = sum(final_dfs.values()) / len(final_dfs)
    df_std = sum([(df_mean - df_cur)**2 / len(final_dfs) for df_cur in final_dfs.values()]).apply(np.sqrt)
    return final_dfs, df_mean, df_std

In [13]:
def actual_stds(final_dfs, df_mean):
    df_mean_grouped = df_mean.groupby("Task Set").mean()
    final_dfs_grouped = [(cur_df.groupby("Task Set").mean() - df_mean_grouped)**2/len(final_dfs) for cur_df in final_dfs]

    df_std = sum(final_dfs_grouped).apply(np.sqrt)

    return df_std

In [14]:
files_finite_seeds = [
                'ChessWorld-v1_deepsets_formula_update',
                'ChessWorld-v1_gcn_formula_update2',
                'ChessWorld-v1_transformer_formula_update'
               ]

keys_seeds = ["DeepLTL", "LTL-GNN", "LTL-ENC"]

dfs_ra, df_ra_mean, df_ra_std = seed_read_results("ChessWorld-v1", files_finite_seeds, keys_seeds, list(range(1, 6)))

# df_ra_mean

In [15]:
df_ra_mean.groupby("Task Set").mean()

Unnamed: 0_level_0,DeepLTL,DeepLTL,LTL-GNN,LTL-GNN,LTL-ENC,LTL-ENC
Unnamed: 0_level_1,Successes x/29,Avg Discounted Return,Successes x/29,Avg Discounted Return,Successes x/29,Avg Discounted Return
Task Set,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Avoid 2,92.299,0.886,95.172,0.913,57.586,0.554
Avoid 3,68.793,0.658,82.586,0.785,51.724,0.494
Avoid 4,91.172,0.886,92.69,0.902,76.966,0.746
Avoid 5,67.126,0.642,74.253,0.709,41.724,0.398
Reach,99.08,0.906,99.31,0.915,70.575,0.642
Reach Avoid General,91.527,0.873,93.596,0.892,76.552,0.724
Reach Avoid X and not Y,91.862,0.897,91.034,0.888,80.828,0.785


In [16]:
actual_stds(dfs_ra.values(), df_ra_mean)

Unnamed: 0_level_0,DeepLTL,DeepLTL,LTL-GNN,LTL-GNN,LTL-ENC,LTL-ENC
Unnamed: 0_level_1,Successes x/29,Avg Discounted Return,Successes x/29,Avg Discounted Return,Successes x/29,Avg Discounted Return
Task Set,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Avoid 2,3.77,0.037,3.243,0.029,16.431,0.159
Avoid 3,3.055,0.03,6.597,0.061,20.982,0.2
Avoid 4,2.738,0.027,3.867,0.036,15.425,0.153
Avoid 5,5.31,0.05,8.52,0.078,18.052,0.171
Reach,1.839,0.017,0.92,0.009,20.913,0.187
Reach Avoid General,2.319,0.023,1.946,0.016,17.134,0.162
Reach Avoid X and not Y,1.337,0.014,3.235,0.031,11.132,0.11


In [17]:
files_infinite_seeds = [
                'ChessWorld-v1_deepsets_formula_update_inf',
                'ChessWorld-v1_gcn_formula_update_inf',
                'ChessWorld-v1_transformer_formula_update_inf'
               ]

keys_seeds = ["DeepLTL", "LTL-GNN", "LTL-ENC"]

dfs_rs, df_rs_mean, df_rs_std = seed_read_results("ChessWorld-v1", files_infinite_seeds, keys_seeds, list(range(1, 6)))

# df_ra_mean

In [18]:
# Last task mistakenly repeated
df_rs_m1 = df_rs_mean.iloc[:-1]

# df_rs_m1
df_rs_m1.groupby("Task Set").mean()

Unnamed: 0_level_0,DeepLTL,LTL-GNN,LTL-ENC
Unnamed: 0_level_1,Success rate,Success rate,Success rate
Task Set,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Reach avoid always eventually,0.957,0.928,0.676
Reach infinite training,0.4,0.86,0.431
Reach stay infinite,0.357,0.767,0.336


In [19]:
actual_stds([cdf.iloc[:-1] for cdf in dfs_rs.values()], df_rs_m1)

Unnamed: 0_level_0,DeepLTL,LTL-GNN,LTL-ENC
Unnamed: 0_level_1,Success rate,Success rate,Success rate
Task Set,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Reach avoid always eventually,0.017,0.093,0.285
Reach infinite training,0.49,0.28,0.422
Reach stay infinite,0.44,0.367,0.339
