In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
from experiments.atari import EXPERIMENTED_GAME


experiments = ["std_head"]
baselines = []  # ['IQN', 'Rainbow', 'REM', 'DQN (Adam)']
baselines_performance_profile = [] # ['IQN', 'REM', 'DQN (Adam)']
games = ["Breakout"] # EXPERIMENTED_GAME
ks = [5]
seeds = [11]
plot_std = True
selected_epochs = np.array([1, 10, 25, 50, 75, 100, 125, 150, 175, 200]) - 1
taus = np.linspace(0.0, 8.0, 81)
add_dqn = False
add_head_std = True

### Extract data

In [None]:
from idqn.utils.baselines_scores import get_baselines_scores

if add_dqn:
    dqn_scores = {}
    for experiment in experiments:
        dqn_scores[experiment] = {}
        for game in games:
            dqn_scores[experiment][game] = np.zeros((200, len(seeds))) * np.nan
            for idx_seed, seed in enumerate(seeds):
                dqn_scores[experiment][game][:, idx_seed] = np.load(f"figures/{experiment}/{game}/DQN/J_{seed}.npy")


idqn_scores = {}
for experiment in experiments:
    for k in ks:
        idqn_scores[f"{experiment}_{k}"] = {}
        for game in games:
            idqn_scores[f"{experiment}_{k}"][game] = np.zeros((200, len(seeds))) * np.nan
            for idx_seed, seed in enumerate(seeds):
                idqn_scores[f"{experiment}_{k}"][game][:, idx_seed] = np.load(f"figures/{experiment}/{game}/iDQN/{k}_J_{seed}.npy")

baselines_scores = get_baselines_scores(baselines, games)

### IQM vs iterations & performance profile

In [None]:
import matplotlib.pyplot as plt
from idqn.utils.process_scores import compute_iqm_and_confidence_interval
from experiments.atari import COLORS, LABEL

plt.rc("font", size=15)
plt.rc("lines", linewidth=3)

if add_dqn:
    for experiment in experiments:
        iqms, iqms_confidence_interval = compute_iqm_and_confidence_interval(dqn_scores[experiment], selected_epochs)
        plt.plot(selected_epochs + 1, iqms, label=f"DQN {LABEL.get(experiment)}", color=COLORS[experiment])
        if plot_std:
            plt.fill_between(selected_epochs + 1, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[experiment], alpha=0.3)

for experiment in experiments:
    for k in ks:
        iqms, iqms_confidence_interval = compute_iqm_and_confidence_interval(idqn_scores[f"{experiment}_{k}"], selected_epochs)
        plt.plot(selected_epochs + 1, iqms, label=f"iDQN {LABEL.get(f'{experiment}_{k}')}", color=COLORS[f"{experiment}_{k}"])
        if plot_std:
            plt.fill_between(selected_epochs + 1, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[f"{experiment}_{k}"], alpha=0.3)

for baseline in baselines:
    iqms, iqms_confidence_interval = compute_iqm_and_confidence_interval(baselines_scores[baseline], selected_epochs)
    plt.plot(selected_epochs + 1, iqms, label=baseline, color=COLORS[baseline])
    if plot_std:
        plt.fill_between(selected_epochs + 1, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[baseline], alpha=0.3)

plt.grid()
plt.xlabel("Number of Frames (in millions)")
plt.ylabel("IQM Human Normalized Score")
plt.legend(ncols=len(baselines) + len(experiments) * len(ks) * (1 + int(add_dqn)), bbox_to_anchor=[1, 1])
if len(games) == 1 and len(experiments) > 0:
    plt.title(games[0])
    _ = plt.savefig(f"figures/{experiments[0]}/{games[0]}/J.pdf", bbox_inches='tight')
elif len(experiments) > 0:
    _ = plt.savefig(f"figures/{experiments[0]}/J.pdf", bbox_inches='tight')
else:
    _ = plt.legend()

In [None]:
import matplotlib.pyplot as plt
from idqn.utils.process_scores import compute_performance_profile_and_confidence_interval

plt.rc("font", size=15)
plt.rc("lines", linewidth=3)


if len(games) > 1:
    for experiment in experiments:
        for k in ks:
            performance_profile, performance_profile_confidence_interval = compute_performance_profile_and_confidence_interval(idqn_scores[f"{experiment}_{k}"], taus)
            plt.plot(taus, performance_profile, label=f"iDQN {LABEL.get(f'{experiment}_{k}')}", color=COLORS[f"{experiment}_{k}"])
            if plot_std:
                plt.fill_between(taus, performance_profile_confidence_interval[0, :], performance_profile_confidence_interval[1, :], color=COLORS[f"{experiment}_{k}"], alpha=0.3)

    for baseline in baselines_performance_profile:
        performance_profile, performance_profile_confidence_interval = compute_performance_profile_and_confidence_interval(baselines_scores[baseline], taus)
        plt.plot(taus, performance_profile, label=baseline, color=COLORS[baseline])
        if plot_std:
            plt.fill_between(taus, performance_profile_confidence_interval[0, :], performance_profile_confidence_interval[1, :], color=COLORS[baseline], alpha=0.3)

    plt.grid()
    plt.xlabel(r"Human Normalized Score $(\tau)$")
    plt.ylabel(r"Fraction of runs with score $> \tau$")
    if len(games) == 1 and len(experiments) > 0:
        plt.legend(bbox_to_anchor=[1, 1])
        plt.title(games[0])
        _ = plt.savefig(f"figures/{experiments[0]}/{games[0]}/P.pdf", bbox_inches='tight')
    elif len(experiments) > 0:
        plt.legend(ncols=len(baselines) + len(experiments) * len(ks), bbox_to_anchor=[1, 1])
        _ = plt.savefig(f"figures/{experiments[0]}/P.pdf", bbox_inches='tight')

### Head std

In [None]:
if add_head_std:
    head_stds = {}
    for experiment in experiments:
        for k in ks:
            head_stds[f"{experiment}_{k}"] = {}
            for game in games:
                head_stds[f"{experiment}_{k}"][game] = np.zeros((200, len(seeds))) * np.nan
                for idx_seed, seed in enumerate(seeds):
                    head_stds[f"{experiment}_{k}"][game][:20, idx_seed] = np.load(f"figures/{experiment}/{game}/iDQN/{k}_S_{seed}.npy")


    for experiment in experiments:
        for k in ks:
            iqms, iqms_confidence_interval = compute_iqm_and_confidence_interval(head_stds[f"{experiment}_{k}"], selected_epochs, normalize=False)
            plt.plot(selected_epochs + 1, iqms, label=f"iDQN {LABEL.get(f'{experiment}_{k}')}", color=COLORS[f"{experiment}_{k}"])
            if plot_std:
                plt.fill_between(selected_epochs + 1, iqms_confidence_interval[0, :], iqms_confidence_interval[1, :], color=COLORS[f"{experiment}_{k}"], alpha=0.3)


    plt.grid()
    plt.xlabel("Number of Frames (in millions)")
    plt.ylabel("IQM inter-head standard deviation")
    plt.legend(ncols=len(baselines) + len(experiments) * len(ks) * (1 + int(add_dqn)), bbox_to_anchor=[1, 1])
    if len(games) == 1 and len(experiments) > 0:
        plt.title(games[0])
        _ = plt.savefig(f"figures/{experiments[0]}/{games[0]}/S.pdf", bbox_inches='tight')
    elif len(experiments) > 0:
        _ = plt.savefig(f"figures/{experiments[0]}/S.pdf", bbox_inches='tight')
    else:
        _ = plt.legend()