# Plot training results from csv files

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
from rliable import library as rly
from rliable import metrics, plot_utils

In [None]:
pd.set_option('display.max_columns', 50)
sns.set_theme(style="darkgrid")
sns.set_context("notebook", font_scale=1.5, rc={"font.size":15,"axes.titlesize":20,"axes.labelsize":20}) 
plt.rcParams['figure.figsize'] = (10, 10)
plt.rcParams['font.size'] = 15
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['mathtext.fontset'] = 'cm'
plt.rcParams['xtick.labelsize'] = 15
plt.rcParams['ytick.labelsize'] = 15
plt.rcParams['legend.frameon'] = True

In [None]:
data_name = "test"  # "target_kl"

In [None]:
df = pd.read_csv(f"../data/{data_name}/train.csv").drop("Unnamed: 0", axis=1)
df

In [None]:
if data_name == "target_kl":
    df["algorithm"] = df["algorithm"].str.upper() + "/" + df["init_target_kl"].astype("str")
elif data_name == "test":
    df.loc[np.isclose(df["init_target_kl"], 0.0025347), "algorithm"] = "TRPO (Walker2D)"
    df.loc[np.isclose(df["init_target_kl"], 0.00029658), "algorithm"] = "TRPO (Hopper)"
    df.loc[np.isclose(df["init_target_kl"], 0.00556098), "algorithm"] = "ATRPO (Walker2D)"
    df.loc[np.isclose(df["init_target_kl"], 0.00128343), "algorithm"] = "ATRPO (Hopper)"

df

In [None]:
df_final_score = pd.read_csv(f"../data/{data_name}/final_score.csv").drop("Unnamed: 0", axis=1)
df_final_score

In [None]:
if data_name == "target_kl":
    df_final_score["algorithm"] = df_final_score["algorithm"].str.upper() + "/" + df_final_score["init_target_kl"].astype("str")
elif data_name == "test":
    df_final_score.loc[np.isclose(df_final_score["init_target_kl"], 0.0025347), "algorithm"] = "TRPO (Walker2D)"
    df_final_score.loc[np.isclose(df_final_score["init_target_kl"], 0.00029658), "algorithm"] = "TRPO (Hopper)"
    df_final_score.loc[np.isclose(df_final_score["init_target_kl"], 0.00556098), "algorithm"] = "ATRPO (Walker2D)"
    df_final_score.loc[np.isclose(df_final_score["init_target_kl"], 0.00128343), "algorithm"] = "ATRPO (Hopper)"

df_final_score

## Plot results

In [None]:
algorithms = sorted(list(df.algorithm.unique()))
algorithms

In [None]:
seeds = [str(seed) for seed in df["seed"].unique()]
seeds

In [None]:
tasks = [
    "halfcheetah",
    "ant",
    "hopper",
    "walker2d",
    "swimmer",
    "reacher",
    "pusher",
    "inverted_pendulum",
    "inverted_double_pendulum",
]
tasks

In [None]:
num_tasks = len(tasks)

In [None]:
min_max_scores = {
    "ant": (-6.7080652713775635, 3420.190185546875),
    "halfcheetah": (-323.6818895675242, 5679.3515625),
    "hopper": (14.777723520994186, 3838.20751953125),
    "walker2d": (0.7858000923879445, 4437.2666015625),
    "swimmer": (1.0794172366004204, 363.65631103515625),
    "reacher": (-818.4641944468021, -54.64520263671875), 
    "pusher": (-1542.1742627620697, -155.21102905273438),
    "inverted_pendulum": (5.0, 1000.0),
    "inverted_double_pendulum": (155.97544860839844, 9359.9853515625),
}

### Compare final returns

In [None]:
IQM = lambda x: metrics.aggregate_iqm(x) # Interquartile Mean
MEAN = lambda x: metrics.aggregate_mean(x)
MEDIAN = lambda x: metrics.aggregate_median(x)
aggregate_func = lambda x: np.array([MEDIAN(x), IQM(x), MEAN(x)])

In [None]:
num_demo_runs = 20
num_demo_runs

In [None]:
rewards_key = "rewards"

In [None]:
final_score_dict = {}
for algorithm in algorithms:
    scores = np.zeros((len(seeds) * num_demo_runs, num_tasks))
    for i, task in enumerate(tasks):
        scores[:, i] = df_final_score[(df_final_score["algorithm"] == algorithm) & (df_final_score["env"] == task)][rewards_key].values.copy()
        scores[:, i] = (scores[:, i] - min_max_scores[task][0]) / (min_max_scores[task][1] - min_max_scores[task][0])
    final_score_dict[algorithm] = scores

In [None]:
if data_name == "target_kl":
    color_palette = sns.color_palette("icefire", n_colors=len(algorithms))
elif data_name == "test":
    palette = sns.color_palette("Paired")
    color_palette = palette[2:4] + palette[6:8]
color_palette

#### Performance Profiles

In [None]:
thresholds = np.linspace(0.0, 1.1, 100)
score_distributions, score_distributions_cis = rly.create_performance_profile(
    final_score_dict, thresholds
)

In [None]:
df_perfprof = pd.DataFrame(score_distributions).melt().rename(columns={"variable": "algorithm", "value": "score"})
df_perfprof["tau"] = np.tile(thresholds, len(algorithms))
df_perfprof

In [None]:
x_label = r"Normalized Score $(\tau )$"
y_label = r"Fraction of runs with score $> \tau$"

In [None]:
colors = dict(zip(algorithms, color_palette))

fig = sns.lineplot(
    data=df_perfprof,
    x="tau",
    y="score",
    hue="algorithm",
    style="algorithm",
    palette=color_palette,
)
for algorithm in algorithms:
    lower, upper = score_distributions_cis[algorithm]
    fig.fill_between(thresholds, y1=lower, y2=upper, color=colors[algorithm], alpha=0.2)

fig.set_xlabel(x_label)
fig.set_ylabel(y_label)
# fig.legend(title="Algorithm", loc='center left', bbox_to_anchor=(1., .5), fontsize=20)
fig.legend(title="Algorithm", loc='upper center', bbox_to_anchor=(.5, 1.2), fontsize=20, ncol=2)

fig.figure.savefig("fig/perfprof.pdf", bbox_inches="tight")

#### Metrics with 95% stratified bootstrap CIs

In [None]:
aggregate_final_scores, aggregate_final_interval_estimates = rly.get_interval_estimates(
    score_dict=final_score_dict,
    func=aggregate_func,
)

In [None]:
plt.rcParams['font.size'] = 8
y_label = "Normalized Score"

In [None]:
fig, axes = plot_utils.plot_interval_estimates(
    aggregate_final_scores, 
    aggregate_final_interval_estimates,
    metric_names = ['Median', 'IQM', 'Mean'],
    algorithms=algorithms,
    color_palette=color_palette,
    xlabel_y_coordinate=-0.25,
    xlabel=y_label
)

fig.figure.savefig("fig/final_return.pdf", bbox_inches="tight")

### Plot target KL divergence

In [None]:
task = tasks[0]

In [None]:
x_label = "$\#$ Updates"
y_label = "Trust Region Radius"

In [None]:
fig = sns.lineplot(
    df[df["env"] == task],
    x="n_updates",
    y="train/target_kl",
    hue="algorithm",
    hue_order=algorithms,
    style="algorithm",
    units="seed",
    estimator=None,
    dashes=True,
    # markers=True,
    errorbar="ci",
    palette=color_palette
)
fig.set_xticks(x_ticks)

fig.set_xlabel(x_label)
fig.set_ylabel(y_label)
# fig.legend(title="Algorithm", loc='center left', bbox_to_anchor=(1., .5), fontsize=20)
fig.legend(title="Algorithm", loc='upper center', bbox_to_anchor=(.5, 1.2), fontsize=20, ncol=2)
# fig.legend(title="Algorithm", loc='best', fontsize=20)

fig.figure.savefig("fig/target_kl.pdf", bbox_inches="tight")