In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
from experiments.atari import EXPERIMENTED_GAME_SHORT, EXPERIMENTED_GAME_MEDIUM, EXPERIMENTED_GAME


experiments_dqn = ["ut30_uh6000_episode", "ut30_uh6000"]
ks_idqn = [5]
experiments_iqn = ["baseline_8000"]
ks_iiqn = [6]
baselines = [] # ['DQN (Nature)', 'Quantile (JAX)_dopamine', 'DQN (Adam)', 'C51', 'REM', 'Rainbow', 'IQN', 'M-IQN']
baselines_performance_profile = []
games = ["Asterix"]
ks_iiqn_weak = []
seeds = [11, 21, 12, 22, 13]
selected_epochs = np.array(list(np.arange(0, 200, 10)) + [199])
taus = np.linspace(0.0, 8.0, 40)
show = {"dqn": False, "iqn": False, "rem": False, "head_std": False, "approximation_error": False, "std": True}
experiments = experiments_dqn + experiments_iqn

### Extract data

In [16]:
from idqn.utils.baselines_scores import get_baselines_scores

def collect_data(scores, algorithm, experiments, idqn_key_k="", idqn_key_path=""):
    for experiment in experiments:
        experiment_key = f"{algorithm}_{experiment}{idqn_key_k}"
        scores[experiment_key] = {}
        for game in games:
            scores[experiment_key][game] = np.zeros((200, len(seeds))) * np.nan
            for idx_seed, seed in enumerate(seeds):
                scores[experiment_key][game][:, idx_seed] = np.load(f"figures/{experiment}/{game}/{algorithm}/{idqn_key_path}J_{seed}.npy")

if show["dqn"]:
    dqn_scores = {}
    collect_data(dqn_scores, "DQN", experiments_dqn)

if show["iqn"]:
    iqn_scores = {}
    collect_data(iqn_scores, "IQN", experiments_iqn)

if show["rem"]:
    rem_scores = {}
    collect_data(rem_scores, "REM", experiments_rem)

if len(ks_idqn) > 0:
    idqn_scores = {}
    for k in ks_idqn:
        collect_data(idqn_scores, "iDQN", experiments_dqn, f"_{k}", f"{k}_")

if len(ks_iiqn) > 0:
    iiqn_scores = {}
    for k in ks_iiqn:
        # collect_data(iiqn_scores, "iIQN", experiments_iqn, f"_{k}", f"{k}_")
        collect_data(iiqn_scores, "BootstrapDQN", experiments_iqn, f"_{k}", f"{k}_")

if len(ks_iiqn_weak) > 0:
    iiqn_weak_scores = {}
    for k in ks_iiqn_weak:
        collect_data(iiqn_weak_scores, "iIQN_weak", experiments_iqn, f"_{k}", f"{k}_")

baselines_scores = get_baselines_scores(baselines, games)
baselines_performance_profile_scores = get_baselines_scores(baselines_performance_profile, games)

### IQM vs iterations & performance profile

In [None]:
import matplotlib.pyplot as plt
from idqn.utils.process_scores import compute_iqm_and_confidence_interval
from experiments.atari import COLORS, LABEL, ORDER

plt.rc("font", size=28, family='serif', serif="Times New Roman")  # 21 for main paper, 18 for the table of figures and 15 big figures.
plt.rc("lines", linewidth=3)
fig = plt.figure("Main figure")
ax = fig.add_subplot(111)
fig_legend = plt.figure("Legend figure")
lines = []


def plot_iqm(scores, normalize=True, **kwargs):
    for experiment in scores.keys():
        iqms, iqms_confidence_interval = compute_iqm_and_confidence_interval(scores[experiment], selected_epochs, normalize)
        lines.append(ax.plot(selected_epochs + 1, iqms, label=LABEL[experiment], color=COLORS[experiment], zorder=ORDER[experiment], **kwargs)[0])
        if show["std"]:
            ax.fill_between(selected_epochs + 1, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[experiment], zorder=ORDER[experiment], alpha=0.3)


# plot_iqm({"iDQN_ut30_uh6000_5": idqn_scores["iDQN_ut30_uh6000_5"]}, linestyle="dashed")
# plot_iqm({"iDQN_ut30_uh6000_3_steps_5": idqn_scores["iDQN_ut30_uh6000_3_steps_5"]}, linestyle="dashed")
# plot_iqm({"iIQN_ut30_uh6000_3_steps_3": iiqn_scores["iIQN_ut30_uh6000_3_steps_3"]}, linestyle="dashed")
# plot_iqm({"DQN (Adam)": baselines_scores["DQN (Adam)"]})
# plot_iqm({"Rainbow": baselines_scores["Rainbow"]})
# plot_iqm({"IQN": baselines_scores["IQN"]})
            

if show["dqn"]:
    plot_iqm(dqn_scores)
    
if show["iqn"]:
    plot_iqm(iqn_scores)

plot_iqm(baselines_scores)

if show["rem"]:
    plot_iqm(rem_scores)

if len(ks_idqn) > 0:
    plot_iqm(idqn_scores, linestyle="dashed")

if len(ks_iiqn) > 0:
    # plot_iqm(iiqn_scores, linestyle="dashed")
    plot_iqm(iiqn_scores)

if len(ks_iiqn_weak) > 0:
    plot_iqm(iiqn_weak_scores)

ax.grid(zorder=0)
# ax.set_xticklabels([])
ax.set_xlabel("Num Frames (in millions)")
ax.set_ylabel("IQM Human Norm Score")

# ax.scatter([50], [0.85], s=170, c="red", zorder=100)
# ax.scatter([200], [1.2], s=170, c="red", zorder=100)
# ax.scatter([25], [0.68], s=170, c="orange", zorder=100)
# ax.scatter([100], [0.75], s=170, c="orange", zorder=100)


if len(lines) < 4:
    fig_legend.legend(lines, [line.get_label() for line in lines], ncols=2, frameon=False)
else:
    import itertools
    ncols = int(np.ceil(len(lines) // 2))
    def flip(items):
        return itertools.chain(*[items[i::ncols] for i in range(ncols)])
    fig_legend.legend(flip(lines), flip([line.get_label() for line in lines]), ncols=ncols, frameon=False)

if len(games) == 1 and len(experiments) > 0:
    ax.set_title(games[0])
    fig.savefig(f"figures/{experiments[0]}/{games[0]}/J.pdf", bbox_inches='tight')
    _ = fig_legend.savefig(f"figures/{experiments[0]}/{games[0]}/J_legend.pdf", bbox_inches='tight')
elif len(experiments) > 0:
    ax.set_title("Atari")
    fig.savefig(f"figures/{experiments[0]}/J.pdf", bbox_inches='tight')
    _ = fig_legend.savefig(f"figures/{experiments[0]}/J_legend.pdf", bbox_inches='tight')

In [None]:
import matplotlib.pyplot as plt
from idqn.utils.process_scores import compute_optimality_gap_and_confidence_interval
from experiments.atari import COLORS, LABEL, ORDER

plt.rc("font", size=28, family='serif', serif="Times New Roman")  # 21 for main paper, 18 for the table of figures and 15 big figures.
plt.rc("lines", linewidth=3)
fig = plt.figure("Main figure")
ax = fig.add_subplot(111)
fig_legend = plt.figure("Legend figure")
lines = []


def plot_optimality_gap(scores, normalize=True, **kwargs):
    for experiment in scores.keys():
        iqms, iqms_confidence_interval = compute_optimality_gap_and_confidence_interval(scores[experiment], selected_epochs, normalize)
        lines.append(ax.plot(selected_epochs + 1, iqms, label=LABEL[experiment], color=COLORS[experiment], zorder=ORDER[experiment], **kwargs)[0])
        if show["std"]:
            ax.fill_between(selected_epochs + 1, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[experiment], zorder=ORDER[experiment], alpha=0.3)


# plot_iqm({"iDQN_ut30_uh6000_5": idqn_scores["iDQN_ut30_uh6000_5"]}, linestyle="dashed")
# plot_iqm({"iDQN_ut30_uh6000_3_steps_5": idqn_scores["iDQN_ut30_uh6000_3_steps_5"]}, linestyle="dashed")
# plot_iqm({"iIQN_ut30_uh6000_3_steps_3": iiqn_scores["iIQN_ut30_uh6000_3_steps_3"]}, linestyle="dashed")
# plot_iqm({"DQN (Adam)": baselines_scores["DQN (Adam)"]})
# plot_iqm({"Rainbow": baselines_scores["Rainbow"]})
# plot_iqm({"IQN": baselines_scores["IQN"]})
            

if show["dqn"]:
    plot_optimality_gap(dqn_scores)
    
if show["iqn"]:
    plot_optimality_gap(iqn_scores)

plot_optimality_gap(baselines_scores)

if show["rem"]:
    plot_optimality_gap(rem_scores)

if len(ks_idqn) > 0:
    plot_optimality_gap(idqn_scores, linestyle="dashed")

if len(ks_iiqn) > 0:
    plot_optimality_gap(iiqn_scores, linestyle="dashed")

if len(ks_iiqn_weak) > 0:
    plot_optimality_gap(iiqn_weak_scores)

ax.grid(zorder=0)
# ax.set_xticklabels([])
ax.set_xlabel("Num Frames (in millions)")
ax.set_ylabel("Optimality Gap")

# ax.scatter([50], [0.85], s=170, c="red", zorder=100)
# ax.scatter([200], [1.2], s=170, c="red", zorder=100)
# ax.scatter([25], [0.68], s=170, c="orange", zorder=100)
# ax.scatter([100], [0.75], s=170, c="orange", zorder=100)


if len(lines) < 4:
    fig_legend.legend(lines, [line.get_label() for line in lines], ncols=2, frameon=False)
else:
    import itertools
    ncols = int(np.ceil(len(lines) // 2))
    def flip(items):
        return itertools.chain(*[items[i::ncols] for i in range(ncols)])
    fig_legend.legend(flip(lines), flip([line.get_label() for line in lines]), ncols=4, frameon=False)

if len(games) == 1 and len(experiments) > 0:
    ax.set_title(games[0])
    fig.savefig(f"figures/{experiments[0]}/{games[0]}/optimality_gap.pdf", bbox_inches='tight')
    _ = fig_legend.savefig(f"figures/{experiments[0]}/{games[0]}/optimality_gap_legend.pdf", bbox_inches='tight')
elif len(experiments) > 0:
    ax.set_title("Atari")
    fig.savefig(f"figures/{experiments[0]}/optimality_gap.pdf", bbox_inches='tight')
    _ = fig_legend.savefig(f"figures/{experiments[0]}/optimality_gap_legend.pdf", bbox_inches='tight')

In [None]:
print("DQN training time", np.around(((7*60+31) / (200 - 106 * 1.31) * 200) / 60, 2), "h. Per epoch", np.around((7*60+31) / (200 - 106 * 1.31) / 60, 4), "h.")
print("i-DQN training time", np.around(((7*60+31) / (200 - 106 * 1.31) * 200) / 60 * 1.31, 2), "h. Per epoch", np.around((7*60+31) / (200 - 106 * 1.31) * 1.31 / 60, 4), "h.")
print("IQN training time", np.around(((18*60+23) / (200 - 42 * 1.52) * 200) / 60, 2), "h. Per epoch", np.around((18*60+23) / (200 - 42 * 1.52) / 60, 4), "h.")
print("i-IQN training time", np.around(((18*60+23) / (200 - 42 * 1.52) * 200) / 60 * 1.52, 2), "h. Per epoch", np.around((18*60+23) / (200 - 42 * 1.52) * 1.52 / 60, 4), "h.")

In [20]:
# min_step_overlap_sac = np.min(np.arange(aggregated_metrics["iSAC"]["x_values"].shape[0])[aggregated_metrics["iSAC"]["values"] > baselines_scores["DQN (Adam)"]["intervals"][0][-1]])
# 1e6 * (2 * 60 + 1) / 50000 - aggregated_metrics["iSAC"]["x_values"][min_step_overlap_sac] * (2 * 60 + 13) / 50000

In [None]:
import matplotlib.pyplot as plt
from idqn.utils.process_scores import compute_iqm_and_confidence_interval
from experiments.atari import COLORS, LABEL, ORDER

plt.rc("font", size=28, family='serif', serif="Times New Roman")  # 21 for main paper, 18 for the table of figures and 15 big figures.
plt.rc("lines", linewidth=3)
fig = plt.figure("Main figure", figsize=(6.4, 4.8))
ax = fig.add_subplot(111)


experiment = "DQN (Adam)"
iqms, iqms_confidence_interval = compute_iqm_and_confidence_interval(baselines_scores[experiment], selected_epochs, True)
ax.plot((selected_epochs + 1) * 0.1229, iqms, label=LABEL[experiment], color=COLORS[experiment], zorder=ORDER[experiment])[0]
ax.fill_between((selected_epochs + 1) * 0.1229, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[experiment], zorder=ORDER[experiment], alpha=0.3)

experiment = "IQN"
iqms, iqms_confidence_interval = compute_iqm_and_confidence_interval(baselines_scores[experiment], selected_epochs, True)
ax.plot((selected_epochs + 1) * 0.135, iqms, label=LABEL[experiment], color=COLORS[experiment], zorder=ORDER[experiment])[0]
ax.fill_between((selected_epochs + 1) * 0.135, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[experiment], zorder=ORDER[experiment], alpha=0.3)

experiment = "iDQN_ut30_uh6000_5"
iqms, iqms_confidence_interval = compute_iqm_and_confidence_interval(idqn_scores[experiment], selected_epochs, True)
ax.plot((selected_epochs + 1) * 0.1611, iqms, label=LABEL[experiment], color=COLORS[experiment], zorder=ORDER[experiment], linestyle="dashed")[0]
ax.fill_between((selected_epochs + 1) * 0.1611, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[experiment], zorder=ORDER[experiment], alpha=0.3)

experiment = "iIQN_ut30_uh6000_3_steps_3"
iqms, iqms_confidence_interval = compute_iqm_and_confidence_interval(iiqn_scores[experiment], selected_epochs, True)
ax.plot((selected_epochs + 1) * 0.2052, iqms, label=LABEL[experiment], color=COLORS[experiment], zorder=ORDER[experiment], linestyle="dashed")[0]
ax.fill_between((selected_epochs + 1) * 0.2052, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[experiment], zorder=ORDER[experiment], alpha=0.3)


ax.grid(zorder=0)
# DQN training time 24.59 h. Per epoch 0.1229 h.
# i-DQN training time 32.21 h. Per epoch 0.1611 h.
# IQN training time 27.0 h. Per epoch 0.135 h.
# i-IQN training time 41.04 h. Per epoch 0.2052 h.
xticks = np.array([1] + list(range(50, 201, 50))) * 0.2052
ax.set_xticks(xticks)
ax.set_xticklabels(np.around(xticks).astype(int))
ax.set_xlabel("Clock time (in hours)")
ax.set_ylabel("IQM Human Norm Score")

ax.set_title("Atari")
_ = fig.savefig(f"figures/{experiments[0]}/J_time.pdf", bbox_inches='tight')

In [None]:
if len(games) > 1:
    from idqn.utils.process_scores import compute_performance_profile_and_confidence_interval

    plt.rc("font", size=28, family='serif', serif="Times New Roman")
    plt.rc("lines", linewidth=3)
    fig = plt.figure("Main figure")
    ax = fig.add_subplot(111)
    fig_legend = plt.figure("Legend figure")
    lines = []


    def plot_performance_profile(scores,**kwargs):
        for experiment in scores.keys():
            performance_profile, performance_profile_confidence_interval = compute_performance_profile_and_confidence_interval(scores[experiment], taus)
            lines.append(ax.plot(taus, performance_profile, label=LABEL[experiment], color=COLORS[experiment], zorder=ORDER[experiment], **kwargs)[0])
            if show["std"]:
                ax.fill_between(taus, performance_profile_confidence_interval[0, :], performance_profile_confidence_interval[1, :], color=COLORS[experiment], zorder=ORDER[experiment], alpha=0.3)


    plot_performance_profile({"iDQN_ut30_uh6000_5": idqn_scores["iDQN_ut30_uh6000_5"]}, linestyle="dashed")
    plot_performance_profile({"DQN (Adam)": baselines_performance_profile_scores["DQN (Adam)"]})
    plot_performance_profile({"iIQN_ut30_uh6000_3_steps_3": iiqn_scores["iIQN_ut30_uh6000_3_steps_3"]}, linestyle="dashed")
    plot_performance_profile({"IQN": baselines_performance_profile_scores["IQN"]})

    # if show["dqn"]:
    #     plot_performance_profile(dqn_scores)

    # if show["iqn"]:
    #     plot_performance_profile(iqn_scores)

    # if show["rem"]:
    #     plot_performance_profile(rem_scores)

    # if len(ks_idqn) > 0:
    #     plot_performance_profile(idqn_scores)

    # if len(ks_iiqn) > 0:
    #     plot_performance_profile(iiqn_scores)

    # if len(ks_iiqn_weak) > 0:
    #     plot_performance_profile(iiqn_weak_scores)

    # plot_performance_profile(baselines_performance_profile_scores)

    ax.grid(zorder=0)
    ax.set_title("Atari")
    ax.set_xlabel(r"Human Norm Score $(\zeta)$")
    ax.set_ylabel("Fraction of runs \n" + r"with score $> \zeta$")
    fig_legend.legend(lines, [line.get_label() for line in lines], ncols=2, frameon=False)
    if len(experiments_dqn + experiments_iqn) > 0:
        fig.savefig(f"figures/{experiments[0]}/P.pdf", bbox_inches='tight')
        _ = fig_legend.savefig(f"figures/{experiments[0]}/P_legend.pdf", bbox_inches='tight')

In [None]:
baselines_performance_profile_scores

### Head std

In [None]:
if show["head_std"]:
    plt.rc("font", size=28, family='serif', serif="Times New Roman")
    plt.rc("lines", linewidth=3)
    fig = plt.figure("Main figure")
    ax = fig.add_subplot(111)
    fig_legend = plt.figure("Legend figure")
    lines = []

    head_stds = {}
    for experiment in experiments:
        for k in ks_idqn:
            head_stds[f"iDQN_{experiment}_{k}"] = {}
            for game in games:
                head_stds[f"iDQN_{experiment}_{k}"][game] = np.zeros((200, len(seeds))) * np.nan
                for idx_seed, seed in enumerate(seeds):
                    head_stds[f"iDQN_{experiment}_{k}"][game][:, idx_seed] = np.load(f"figures/{experiment}/{game}/iDQN/{k}_S_{seed}.npy")


    plot_iqm(head_stds, normalize=False, linestyle="dashed")

    ax.grid(zorder=0)
    ax.set_xlabel("Num Frames (in millions)")
    # ax.set_ylabel("IQM inter-head std")
    fig_legend.legend(lines, [line.get_label() for line in lines], ncols=len(lines))
    if len(games) == 1 and len(experiments) > 0:
        ax.set_title(games[0])
        fig.savefig(f"figures/{experiments[0]}/{games[0]}/S.pdf", bbox_inches='tight')
        _ = fig_legend.savefig(f"figures/{experiments[0]}/{games[0]}/S_legend.pdf", bbox_inches='tight')

### Approximation error

In [None]:
if show["approximation_error"]:
    plt.rc("font", size=15)
    plt.rc("lines", linewidth=3)
    fig = plt.figure("Main figure")
    ax = fig.add_subplot(111)
    fig_legend = plt.figure("Legend figure")
    lines = []

    approximation_errors = {}
    for experiment in experiments:
        for k in ks_idqn:
            approximation_errors[f"{experiment}_{k}"] = {}
            for game in games:
                approximation_errors[f"{experiment}_{k}"][game] = np.zeros((200, len(seeds))) * np.nan
                for idx_seed, seed in enumerate(seeds):
                    approximation_errors[f"{experiment}_{k}"][game][:, idx_seed] = np.load(f"figures/{experiment}/{game}/iDQN/{k}_A_{seed}.npy")


    plot_iqm(approximation_errors)

    ax.grid(zorder=0)
    ax.set_xlabel("Number of Frames (in millions)")
    ax.set_ylabel("IQM approximation error")
    fig_legend.legend(lines, [line.get_label() for line in lines], ncols=len(lines))
    if len(games) == 1 and len(experiments) > 0:
        ax.set_title(games[0])
        fig.savefig(f"figures/{experiments[0]}/{games[0]}/A.pdf", bbox_inches='tight')
        _ = fig_legend.savefig(f"figures/{experiments[0]}/{games[0]}/A_legend.pdf", bbox_inches='tight')