In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
from experiments.atari import EXPERIMENTED_GAME


experiments = ["ut30_uh6000"]
baselines = ['DQN (Nature)', 'DQN (Adam)', 'C51', 'REM'] # ['DQN (Nature)', 'Quantile (JAX)_dopamine', 'DQN (Adam)', 'C51', 'REM', 'Rainbow', 'IQN', 'M-IQN']
baselines_performance_profile = [] # ['DQN (Nature)', 'DQN (Adam)', 'C51', 'REM']
games = EXPERIMENTED_GAME
ks = [5]
seeds = [11, 21, 12, 22, 13]
plot_std = True
selected_epochs = np.arange(200) # np.array([1, 10, 25, 50, 75, 100, 125, 150, 175, 200]) - 1
taus = np.linspace(0.0, 8.0, 81)
add_dqn = False
add_head_std = False
add_approximation_error = False

### Extract data

In [None]:
from idqn.utils.baselines_scores import get_baselines_scores

if add_dqn:
    dqn_scores = {}
    for experiment in experiments:
        dqn_scores[experiment] = {}
        for game in games:
            dqn_scores[experiment][game] = np.zeros((200, len(seeds))) * np.nan
            for idx_seed, seed in enumerate(seeds):
                dqn_scores[experiment][game][:, idx_seed] = np.load(f"figures/{experiment}/{game}/DQN/J_{seed}.npy")


idqn_scores = {}
for experiment in experiments:
    for k in ks:
        idqn_scores[f"{experiment}_{k}"] = {}
        for game in games:
            idqn_scores[f"{experiment}_{k}"][game] = np.zeros((200, len(seeds))) * np.nan
            for idx_seed, seed in enumerate(seeds):
                idqn_scores[f"{experiment}_{k}"][game][:, idx_seed] = np.load(f"figures/{experiment}/{game}/iDQN/{k}_J_{seed}.npy")

baselines_scores = get_baselines_scores(baselines, games)

### IQM vs iterations & performance profile

In [None]:
import matplotlib.pyplot as plt
from idqn.utils.process_scores import compute_iqm_and_confidence_interval
from experiments.atari import COLORS, LABEL, ORDER


plt.rc("font", size=15)
plt.rc("lines", linewidth=3)
fig = plt.figure("Main figure")
ax = fig.add_subplot(111)
fig_legend = plt.figure("Legend figure")
lines = []

for experiment in experiments:
    for k in ks:
        iqms, iqms_confidence_interval = compute_iqm_and_confidence_interval(idqn_scores[f"{experiment}_{k}"], selected_epochs)
        lines.append(ax.plot(selected_epochs + 1, iqms, label=f"iDQN {LABEL[f'{experiment}_{k}']}", color=COLORS[f"{experiment}_{k}"], zorder=ORDER[f"{experiment}_{k}"])[0])
        if plot_std:
            ax.fill_between(selected_epochs + 1, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[f"{experiment}_{k}"], alpha=0.3, zorder=ORDER[f"{experiment}_{k}"])

if add_dqn:
    for experiment in experiments:
        iqms, iqms_confidence_interval = compute_iqm_and_confidence_interval(dqn_scores[experiment], selected_epochs)
        lines.append(ax.plot(selected_epochs + 1, iqms, label=f"DQN {LABEL[experiment]}", color=COLORS[experiment], zorder=ORDER[experiment])[0])
        if plot_std:
            ax.fill_between(selected_epochs + 1, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[experiment], zorder=ORDER[experiment], alpha=0.3)

for baseline in baselines:
    iqms, iqms_confidence_interval = compute_iqm_and_confidence_interval(baselines_scores[baseline], selected_epochs)
    lines.append(ax.plot(selected_epochs + 1, iqms, label=LABEL[baseline], color=COLORS[baseline], zorder=ORDER[baseline])[0])
    if plot_std:
        ax.fill_between(selected_epochs + 1, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[baseline], zorder=ORDER[baseline], alpha=0.3)

ax.grid(zorder=0)
ax.set_xlabel("Number of Frames (in millions)")
ax.set_ylabel("IQM Human Normalized Score")
# ax.set_xticklabels([])

if len(lines) < 6:
    fig_legend.legend(lines, [line.get_label() for line in lines], ncols=len(lines))
else:
    import itertools
    ncols = int(np.ceil(len(lines) / 2))
    def flip(items):
        return itertools.chain(*[items[i::ncols] for i in range(ncols)])
    fig_legend.legend(flip(lines), flip([line.get_label() for line in lines]), ncols=ncols)

if len(games) == 1 and len(experiments) > 0:
    ax.set_title(games[0])
    fig.savefig(f"figures/{experiments[0]}/{games[0]}/J.pdf", bbox_inches='tight')
    _ = fig_legend.savefig(f"figures/{experiments[0]}/{games[0]}/J_legend.pdf", bbox_inches='tight')
elif len(experiments) > 0:
    fig.savefig(f"figures/{experiments[0]}/J.pdf", bbox_inches='tight')
    _ = fig_legend.savefig(f"figures/{experiments[0]}/J_legend.pdf", bbox_inches='tight')

In [None]:
import matplotlib.pyplot as plt
from idqn.utils.process_scores import compute_performance_profile_and_confidence_interval
from experiments.atari import COLORS, LABEL, ORDER


if len(games) > 1:
    plt.rc("font", size=15)
    plt.rc("lines", linewidth=3)
    fig = plt.figure("Main figure")
    ax = fig.add_subplot(111)
    fig_legend = plt.figure("Legend figure")
    lines = []

    for experiment in experiments:
        for k in ks:
            performance_profile, performance_profile_confidence_interval = compute_performance_profile_and_confidence_interval(idqn_scores[f"{experiment}_{k}"], taus)
            lines.append(ax.plot(taus, performance_profile, label=f"iDQN {LABEL[f'{experiment}_{k}']}", color=COLORS[f"{experiment}_{k}"], zorder=ORDER[f"{experiment}_{k}"])[0])
            if plot_std:
                ax.fill_between(taus, performance_profile_confidence_interval[0, :], performance_profile_confidence_interval[1, :], color=COLORS[f"{experiment}_{k}"], zorder=ORDER[f"{experiment}_{k}"], alpha=0.3)

    for baseline in baselines_performance_profile:
        performance_profile, performance_profile_confidence_interval = compute_performance_profile_and_confidence_interval(baselines_scores[baseline], taus)
        lines.append(ax.plot(taus, performance_profile, label=LABEL.get(baseline), color=COLORS[baseline], zorder=ORDER[baseline])[0])
        if plot_std:
            ax.fill_between(taus, performance_profile_confidence_interval[0, :], performance_profile_confidence_interval[1, :], color=COLORS[baseline], zorder=ORDER[baseline], alpha=0.3)

    ax.grid(zorder=0)
    ax.set_xlabel(r"Human Normalized Score $(\tau)$")
    ax.set_ylabel(r"Fraction of runs with score $> \tau$")
    fig_legend.legend(lines, [line.get_label() for line in lines], ncols=len(lines))
    if len(experiments) > 0:
        fig.savefig(f"figures/{experiments[0]}/P.pdf", bbox_inches='tight')
        _ = fig_legend.savefig(f"figures/{experiments[0]}/P_legend.pdf", bbox_inches='tight')

### Head std

In [None]:
import matplotlib.pyplot as plt
from idqn.utils.process_scores import compute_iqm_and_confidence_interval
from experiments.atari import COLORS, LABEL


if add_head_std:
    plt.rc("font", size=15)
    plt.rc("lines", linewidth=3)
    fig = plt.figure("Main figure")
    ax = fig.add_subplot(111)
    fig_legend = plt.figure("Legend figure")
    lines = []

    head_stds = {}
    for experiment in experiments:
        for k in ks:
            head_stds[f"{experiment}_{k}"] = {}
            for game in games:
                head_stds[f"{experiment}_{k}"][game] = np.zeros((200, len(seeds))) * np.nan
                for idx_seed, seed in enumerate(seeds):
                    head_stds[f"{experiment}_{k}"][game][:, idx_seed] = np.load(f"figures/{experiment}/{game}/iDQN/{k}_S_{seed}.npy")


    for experiment in experiments:
        for k in ks:
            iqms, iqms_confidence_interval = compute_iqm_and_confidence_interval(head_stds[f"{experiment}_{k}"], selected_epochs, normalize=False)
            lines.append(ax.plot(selected_epochs + 1, iqms, label=f"iDQN {LABEL[f'{experiment}_{k}']}", color=COLORS[f"{experiment}_{k}"])[0])
            if plot_std:
                ax.fill_between(selected_epochs + 1, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[f"{experiment}_{k}"], alpha=0.3)


    ax.grid(zorder=0)
    ax.set_xlabel("Number of Frames (in millions)")
    ax.set_ylabel("IQM inter-head standard deviation")
    fig_legend.legend(lines, [line.get_label() for line in lines], ncols=len(lines))
    if len(games) == 1 and len(experiments) > 0:
        ax.set_title(games[0])
        fig.savefig(f"figures/{experiments[0]}/{games[0]}/S.pdf", bbox_inches='tight')
        _ = fig_legend.savefig(f"figures/{experiments[0]}/{games[0]}/S_legend.pdf", bbox_inches='tight')

### Approximation error

In [None]:
import matplotlib.pyplot as plt
from idqn.utils.process_scores import compute_iqm_and_confidence_interval
from experiments.atari import COLORS, LABEL


if add_approximation_error:
    plt.rc("font", size=15)
    plt.rc("lines", linewidth=3)
    fig = plt.figure("Main figure")
    ax = fig.add_subplot(111)
    fig_legend = plt.figure("Legend figure")
    lines = []

    approximation_errors = {}
    for experiment in experiments:
        for k in ks:
            approximation_errors[f"{experiment}_{k}"] = {}
            for game in games:
                approximation_errors[f"{experiment}_{k}"][game] = np.zeros((200, len(seeds))) * np.nan
                for idx_seed, seed in enumerate(seeds):
                    approximation_errors[f"{experiment}_{k}"][game][:, idx_seed] = np.load(f"figures/{experiment}/{game}/iDQN/{k}_A_{seed}.npy")


    for experiment in experiments:
        for k in ks:
            iqms, iqms_confidence_interval = compute_iqm_and_confidence_interval(approximation_errors[f"{experiment}_{k}"], selected_epochs, normalize=False)
            lines.append(ax.plot(selected_epochs + 1, iqms, label=f"iDQN {LABEL[f'{experiment}_{k}']}" if k > 1 else "DQN (Adam)", color=COLORS[f"{experiment}_{k}"])[0])
            if plot_std:
                ax.fill_between(selected_epochs + 1, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[f"{experiment}_{k}"], alpha=0.3)


    ax.grid(zorder=0)
    ax.set_xlabel("Number of Frames (in millions)")
    ax.set_ylabel("IQM approximation error")
    fig_legend.legend(lines, [line.get_label() for line in lines], ncols=len(lines))
    if len(games) == 1 and len(experiments) > 0:
        ax.set_title(games[0])
        fig.savefig(f"figures/{experiments[0]}/{games[0]}/A.pdf", bbox_inches='tight')
        _ = fig_legend.savefig(f"figures/{experiments[0]}/{games[0]}/A_legend.pdf", bbox_inches='tight')