# Meta RL Experiments with Garage

We use the [garage library](https://github.com/rlworkgroup/garage) to evaluate some of the popular meta RL algorithms on our problems. In particular, we test *MAML-TPRO*, *RL*$^2$ and *PEARL*.


## Installation

Start by installing the garage package [as advised on their docs](https://garage.readthedocs.io/en/latest/user/installation.html). Note that as garage is not longer very actively maintained, installing it can be a bit tricky. Here are some tips for debugging:

1. Use Python 3.7 -> anything above has thrown errors for us about packages not resolving.
2. Really ensure that mujoco is installed correctly (and on the correct path, although this is needed only for testing installation).
3. You may need to install patchelf [as described here](https://github.com/openai/mujoco-py/issues/652>).


## Garage experiments

Next, we run the actual experiments using the experiment scripts provided in `bauwerk/baselines`. Assuming this notebook is run out of its directory in the repo, you can run the experiment using:

In [None]:
! python ../../baselines/maml.py --epochs=1

In [None]:
! python ../../baselines/pearl.py --num_epochs=10

In [None]:
! python ../../baselines/rl2_ppo_tf.py --n_epochs=2

## Plotting

Given the completion of all the above experiments, we can now move on to plot the results.

In [None]:
import matplotlib.pyplot as plt

# Setup and helper code
import bauwerk
import bauwerk.eval
import bauwerk.benchmarks
from bauwerk.utils.garage import DEFAULT_EPISODE_LEN
import gym
import numpy as np

# The length of each task
TASK_LEN = DEFAULT_EPISODE_LEN

In [None]:
# Get log files for all experiments
import pathlib
from tensorboard.backend.event_processing import event_accumulator

exp_dir = pathlib.Path("./data/local/experiment/")

def get_first_exp_log(algorithm_name):
    folder = [exp_file for exp_file in exp_dir.iterdir() if algorithm_name in str(exp_file)][0]
    events_file_name = [exp_file for exp_file in folder.iterdir() if "events.out" in str(exp_file)][0]
    return str(events_file_name.absolute())

algs = ["maml","rl2_ppo","pearl"]
pretty_alg_names = list(zip(algs, ["MAML-TRPO", "RL$^2$", "PEARL"]))
tensorboard_eas = {key: event_accumulator.EventAccumulator(get_first_exp_log(key)) for key in algs}
# load all the data
tensorboard_eas = {name: ea.Reload() for name, ea in tensorboard_eas.items()}



In [None]:


# Create SolarBatteryHouse environment
build_dist_b = bauwerk.benchmarks.BuildDistB(seed=0, episode_len=TASK_LEN)
test_env = build_dist_b.make_env()

battery_sizes = [1,5,15,20] #25]
env_data = {}

for size in battery_sizes:
    env_data[size] = {}
    task = bauwerk.benchmarks.Task(
        cfg=bauwerk.envs.solar_battery_house.EnvConfig(
            battery_size=size, 
            episode_len=TASK_LEN
        )
    )
    test_env.set_task(task)
    env_data[size]["optimal"] = bauwerk.eval.get_optimal_perf(test_env, eval_len=TASK_LEN)
    env_data[size]["no charge"] = bauwerk.eval.evaluate_actions(np.zeros((TASK_LEN,1)), test_env)
    env_data[size]["random"], _ = bauwerk.eval.get_avg_rndm_perf(
        test_env, 
        eval_len=TASK_LEN,
        num_samples=10,
    )

    #def get_feasible_val(perc):
    #    return env_data[size]["optimal"] * perc + env_data[size]["no charge"] * (1 - perc)

    # Add the algorithms performance based on tensorboard data
    for alg_name, pretty_name in pretty_alg_names:
        env_data[size][pretty_name] = tensorboard_eas[alg_name].Scalars(
            f"MetaTest/bauwerk/House-{size}kWh/AverageReturn"
        )[-1].value / TASK_LEN
        print(f"Adding data from {pretty_name} on size {size}: {env_data[size][pretty_name]}.")
    

In [None]:
env_data[15]["no charge"]

In [None]:
# relevant tutorial https://www.geeksforgeeks.org/bar-plot-in-matplotlib/

import copy
import seaborn as sns
sns.set_theme(style="white", context="paper", font="serif")
palette = sns.color_palette("deep")

num_values_per_house = 4
space_between_houses = 2.5
height = 1/(num_values_per_house + space_between_houses)


def get_rel_perf(maximum, minimum, perf):
    return (perf-minimum)/(maximum-minimum)

def get_loc(house, idx):
    """Get location of bar in plot for perf measure 'idx' in building 'house'."""
    return house - height*(num_values_per_house/2 - 0.5) + height * idx



def create_bar_chart(
    env_data,
    max_key="optimal", 
    min_key="random", 
    remove_keys=None, 
    include_legend=True, 
    file_name="test.png",
    ax=None
    ):
    if ax is None:
        # Figure Size
        fig, ax = plt.subplots(figsize =(4.5, 4.5))

    ys = []
    y_labels = []
    nocharge_lines = []

    # Create consistent color code for each method
    col_code = {}
    for i, key in enumerate(env_data[1].keys()):
        col_code[key] = i

    for i, size in enumerate(env_data.keys()):
        name = f"{size}kWh"
        rel_nocharge_perf = get_rel_perf(
            maximum = env_data[size][max_key],
            minimum = env_data[size][min_key], 
            perf = env_data[size]["no charge"]
        )

        perf_dict: dict = copy.deepcopy(env_data[size])
        perf_dict.pop(max_key)
        perf_dict.pop(min_key)

        if remove_keys is not None:
            for key in remove_keys:
                perf_dict.pop(key)
        
        for j, (key, value) in enumerate(perf_dict.items()):
            rel_value = get_rel_perf(
                maximum = env_data[size][max_key],
                minimum = env_data[size][min_key], 
                perf = env_data[size][key]
            )
            ax.barh(get_loc(i,j) , width=rel_value, height=height, color=palette[col_code[key]], label=key)

        ys.append(i)
        y_labels.append(name)
        



    # Add annotation to bars
    x_len = ax.get_xbound()[1] - ax.get_xbound()[0]
    for i in ax.patches:
        width = i.get_width()
        ax.text(width + x_len * 0.007 * np.sign(width), i.get_y()+ i.get_height()*0.55,
                str(round((width), 3)),
                fontsize = 8, #fontweight ='bold',
                color = 'black',
                horizontalalignment=("left" if width>0 else "right"),
                verticalalignment="center",
                #transform=ax.transAxes
                )

    # add the battery size labels
    ax.set_yticks(ys, y_labels)
    ax.set_xticks([0,1], [f"({min_key}) 0", f"({max_key}) 1"], rotation=35, horizontalalignment='right')
    ax.tick_params(axis="x", pad=-10) # reduce the padding of tick labels

    # add axis labels
    #ax.set_xlabel(f"Performance relative to {min_key} and {max_key} ")
    ax.set_ylabel("Battery size")

    if include_legend:
        # avoid duplicate labels
        handles, labels = ax.get_legend_handles_labels()
        by_label = dict(zip(labels, handles))
        ax.legend(by_label.values(), by_label.keys(),loc="lower left")

    # change order to from smallest to largest battery size
    ax.invert_yaxis()
    ax.set_title(f"Rel. to {min_key} and {max_key}")

    # remove top/bottom lines
    ax.vlines([0,1],*ax.get_ylim(),colors=["grey","grey"], linestyles=["solid","dotted"])

    # remove default frame around figure
    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)

    # extend figure slightly to left to show full vline at 0
    ax.set_xlim(left=ax.get_xlim()[0] - 0.005)

    #plt.tight_layout()
    #plt.savefig(file_name, dpi=300)
    #plt.show()

    try:
        return fig
    except:
        return None

In [None]:
def create_bar_chart_absolute(env_data, remove_keys=None, include_legend=True, file_name="test.png", ax=None):
    # Figure Size
    if ax is None:
        fig, ax = plt.subplots(figsize =(4.5, 4.5))

    ys = []
    y_labels = []
    nocharge_lines = []

    # Create consistent color code for each method
    col_code = {}
    for i, key in enumerate(env_data[1].keys()):
        col_code[key] = i

    for i, size in enumerate(env_data.keys()):
        name = f"{size}kWh"

        perf_dict: dict = copy.deepcopy(env_data[size])

        if remove_keys is not None:
            for key in remove_keys:
                perf_dict.pop(key)
        
        for j, (key, value) in enumerate(perf_dict.items()):
            ax.barh(get_loc(i,j) , width=value, height=height, color=palette[col_code[key]], label=key)

        ys.append(i)
        y_labels.append(name)
        



    # Add annotation to bars
    x_len = ax.get_xbound()[1] - ax.get_xbound()[0]
    for i in ax.patches:
        width = i.get_width()
        ax.text(width + x_len * 0.007 * np.sign(width), i.get_y()+ i.get_height()*0.55,
                str(round((width), 3)),
                fontsize = 8, #fontweight ='bold',
                color = 'black',
                horizontalalignment=("left" if width>0 else "right"),
                verticalalignment="center",
                #transform=ax.transAxes
                )

    # add the battery size labels
    ax.set_yticks(ys, y_labels)
    #ax.tick_params(axis="x", pad=-10) # reduce the padding of tick labels

    # add axis labels
    ax.set_xlabel(f"Absolute performance (price paid per timestep)")
    ax.set_ylabel("Battery size")

    if include_legend:
        # avoid duplicate labels
        handles, labels = ax.get_legend_handles_labels()
        by_label = dict(zip(labels, handles))
        ax.legend(by_label.values(), by_label.keys(),loc="lower left")

    # change order to from smallest to largest battery size
    ax.invert_yaxis()
    ax.set_title("Building distribution B performance")

    # remove top/bottom lines
    #ax.vlines([0,1],*ax.get_ylim(),colors=["grey","grey"], linestyles=["solid","dotted"])

    # remove default frame around figure
    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(True)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)

    # extend figure slightly to left to show full vline at 0
    ax.set_xlim(left=ax.get_xlim()[0] - 0.005)

    #plt.tight_layout()
    #plt.savefig(file_name, dpi=300)
    try:
        return fig
    except:
        return None

In [None]:
# Create shared plot with different relative performances

fig, axs = plt.subplots(1, 2)
fig.suptitle("Performance on distribution $B$")
create_bar_chart(env_data, "optimal","random", file_name="exp_barchart_randopt.png", ax=axs[0])
create_bar_chart(env_data, "optimal","no charge", remove_keys=["random"], file_name="exp_barchart_nochargeopt.png", ax=axs[1], include_legend=False)
axs[1].set_ylabel("")
axs[1].set_yticks([])
fig.tight_layout()
fig.savefig("combined_build_distb_plot.png", dpi=300)

In [None]:
fig = create_bar_chart_absolute(env_data=env_data, file_name="exp_barchart_absolute.png")
fig.savefig("exp_barchart_b_absolute.png", dpi=300)