In [None]:
%load_ext autoreload
%autoreload 2

import os
import json
from itertools import zip_longest
import numpy as np
import matplotlib.pyplot as plt
from experiments.base.iqm import get_iqm_and_conf_per_epoch
from experiments.atari import scale_score


games = ["BattleZone", "DoubleDunk", "NameThisGame"]
experiment_folders = {
    "activation": [
        "activation_tanh_relu_sigmoid_*/adadqnstatic",
        "activation_sigmoid_*/dqn",
        "lr_5e-5_*/dqn",
        "activation_tanh_*/dqn",
    ],
    "learning_rate": [
        "lr_1e-5_5e-5_1e-4_*/adadqnstatic",
        "lr_1e-5_*/dqn",
        "lr_5e-5_*/dqn",
        "lr_1e-4_*/dqn",
    ],
    "architecture": [
        "architecture_44_52_64_512__32_64_64_512__46_46_46_714_*/adadqnstatic",
        "architecture_44_52_64_512_*/dqn",
        "lr_5e-5_*/dqn",
        "architecture_46_46_46_714_*/dqn",
    ],
    # "epsilon_adam": [
    #     "architecture_44_52_64_512__32_64_64_512__46_46_46_714_*/adadqnstatic",
    #     "architecture_44_52_64_512_*/dqn",
    #     "lr_5e-5_*/dqn",
    #     "architecture_46_46_46_714_*/dqn",
    # ],
}

experiment_data = {
    experiment_type: {experiment: {game: {} for game in games} for experiment in experiment_folders[experiment_type]}
    for experiment_type in experiment_folders
}

base_path = os.path.join(os.path.abspath(''), "exp_output")

for experiment_type, list_experiments in experiment_folders.items():
	print(experiment_type)
	for experiment in list_experiments:
		print(experiment)
		for game in games:
			experiment_path = os.path.join(base_path, experiment.replace("*", game), "episode_returns_and_lenghts")

			returns_experiment_ = []
			for experiment_file in os.listdir(experiment_path):
				list_episode_returns = json.load(open(os.path.join(experiment_path, experiment_file), "r"))["episode_returns"]
				returns_experiment_.append([scale_score(np.mean(episode_returns), game) for episode_returns in list_episode_returns])

			returns_experiment = np.array(list(zip_longest(*returns_experiment_, fillvalue=np.nan))).T

			print(f"Plot {experiment.replace('*', game)} with {returns_experiment.shape[0]} seeds.")
			n_epochs = json.load(open(os.path.join(experiment_path, "..", "..", "parameters.json"), "r"))["n_epochs"]
			if returns_experiment.shape[1] != n_epochs:
				print(f"!!! None of the seeds are not complete !!!")
			elif np.isnan(returns_experiment).any():
				seeds = np.array(list(map(lambda path: int(path.strip(".json")), os.listdir(experiment_path))))
				print(f"!!! Seeds {seeds[np.isnan(returns_experiment).any(axis=1)]} are not complete !!!")

			experiment_data[experiment_type][experiment][game]["iqm"], experiment_data[experiment_type][experiment][game]["confidence"]  = get_iqm_and_conf_per_epoch(returns_experiment)
			experiment_data[experiment_type][experiment][game]["x_values"] = np.arange(1, returns_experiment.shape[1] + 1)

In [None]:
from matplotlib.lines import Line2D
from experiments.atari import NAMES, COLORS, STYLES, ORDERS


plt.rc("font", family="serif", serif="Times New Roman", size=15)
plt.rc("lines", linewidth=3, )

fig, axes = plt.subplots(6, 3, figsize=(12, 12), height_ratios=[0.5, 2, 0.5, 2, 0.5, 2])

for idx_y_axis, experiment_type in enumerate(experiment_data):
	axes[2 * idx_y_axis, 1].text(0.5, 0.1, NAMES[experiment_type]["name"], horizontalalignment='center', verticalalignment='center', fontsize=18)

	for experiment in experiment_data[experiment_type]:
		for idx_x_axis, game in enumerate(games):
			axes[2 * idx_y_axis, idx_x_axis].axison = False
			hyperparameter, algorithm = experiment.split("_*/")

			ax = axes[2 * idx_y_axis + 1, idx_x_axis]
			ax.plot(
				experiment_data[experiment_type][experiment][game]["x_values"],
				experiment_data[experiment_type][experiment][game]["iqm"],
				color=COLORS[experiment_type][experiment],
				linestyle=STYLES[experiment_type][experiment],
				zorder=ORDERS[experiment_type][experiment]
			)
			ax.fill_between(
				experiment_data[experiment_type][experiment][game]["x_values"],
				experiment_data[experiment_type][experiment][game]["confidence"][0],
				experiment_data[experiment_type][experiment][game]["confidence"][1],
				color=COLORS[experiment_type][experiment],
				alpha=0.3,
				zorder=ORDERS[experiment_type][experiment]
			)

			ax.set_title(game, fontsize=15)
			ax.grid(zorder=0)

			ax.set_xlabel("Env Steps (in millions)")
			ax.set_xticks(range(0, 41, 10))
			if idx_x_axis == 0:
				ax.set_ylabel("IQM Human Norm Score")

In [None]:
for idx_y_axis, experiment_type in enumerate(experiment_data):
	selected_legends = []
	selected_lines = []
	for experiment in experiment_data[experiment_type]:
		selected_legends.append(NAMES[experiment_type][experiment])
		selected_lines.append(Line2D([], [], color=COLORS[experiment_type][experiment], linestyle=STYLES[experiment_type][experiment]))		

	axes[2 * idx_y_axis + 1, 1].legend(selected_lines, selected_legends, ncols=4, frameon=False, loc="center", bbox_to_anchor=(0.5, 1.22))

fig.subplots_adjust(wspace=0.2, hspace=0.55)
fig.savefig(f"exp_output/performance.pdf", bbox_inches='tight')
fig