In [26]:
import pandas
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter

from scipy import stats
def r2(x, y):
    return stats.pearsonr(x, y)[0] ** 2

In [2]:
der_scores = dict(
    alien=739.9, 
    amidar=188.6,
    assault=431.2,
    asterix=470.8,
    bank_heist=51.0,
    battle_zone=10124.6,
    boxing=0.2,
    breakout=1.9, 
    chopper_command=861.8,
    crazy_climber=16185.3,
    demon_attack=508, 
    freeway=27.9, 
    frostbite=866.8,
    gopher=349.5, 
    hero=6857.0, 
    jamesbond=301.6,
    kangaroo=779.3,
    krull=2851.5,
    kung_fu_master=14346.1,
    ms_pacman=1204.1, 
    pong=-19.3,
    private_eye=97.8,
    qbert=1152.9,
    road_runner=9600.0,
    seaquest=354.1, 
    up_n_down=2877.4,
)

random_scores = dict(
    alien=227.8,
    amidar=5.8,
    assault=222.4,
    asterix=210.0,
    bank_heist=14.2,
    battle_zone=2360,
    boxing=0.1,
    breakout=1.7, 
    chopper_command=811.0,
    crazy_climber=10780.5, 
    demon_attack=152.1,
    freeway=0.0, 
    frostbite=65.2,
    gopher=257.6, 
    hero=1027.0, 
    jamesbond=29.0,
    kangaroo=52.0,
    krull=1598.0,
    kung_fu_master=258.5,
    ms_pacman=307.3, 
    pong=-20.7,
    private_eye=24.9,
    qbert=163.9,
    road_runner=11.5, 
    seaquest=68.4,
    up_n_down=533.4
)

curl_scores = dict(
    alien=1148.2,
    amidar=232.3,
    assault=543.7,
    asterix=524.3,
    bank_heist=193.7,
    battle_zone=11208.0,
    boxing=4.8,
    breakout=18.2, 
    chopper_command=1198.0,
    crazy_climber=27805.6, 
    demon_attack=834.0,
    freeway=27.9, 
    frostbite=924.0,
    gopher=801.4, 
    hero=6235.1, 
    jamesbond=400.1,
    kangaroo=345.3,
    krull=3833.6,
    kung_fu_master=14280.0,
    ms_pacman=1492.8, 
    pong=2.1,
    private_eye=105.2,
    qbert=1225.6,
    road_runner=6786.7, 
    seaquest=408.0,
    up_n_down=2735.2
)


human_scores = dict(
    alien=7127.7,
    amidar=1719.5,
    assault=742,
    asterix=8503.3,
    bank_heist=753.1,
    battle_zone=37187.5,
    boxing=12.1,
    breakout=30.5,
    chopper_command=7387.8,
    crazy_climber=35829.4,
    demon_attack=1971,
    freeway=29.6,
    frostbite=4334.7,
    gopher=2412.5,
    hero=30826.4,
    jamesbond=302.8,
    kangaroo=3035.0, 
    krull=2665.5, 
    kung_fu_master=22736.3,
    ms_pacman=6951.6,
    pong=14.6,
    private_eye=69571.3,
    qbert=13455.0, 
    road_runner=7845.0,
    seaquest=42054.7,
    up_n_down=11693.2
)


nature_scores = dict(
    alien=3069, amidar=739.5, assault=3359,
    asterix=6012, bank_heist=429.7, battle_zone=26300.,
    boxing=71.8, breakout=401.2, chopper_command=6687.,
    crazy_climber=114103, demon_attack=9711., freeway=30.3,
    frostbite=328.3, gopher=8520., hero=19950., jamesbond=576.7,
    kangaroo=6740., krull=3805., kung_fu_master=23270.,
    ms_pacman=2311., pong=18.9, private_eye=1788.,
    qbert=10596., road_runner=18257., seaquest=5286., up_n_down=8456.
)

ilya_scores = dict([
 ['alien', 761.4339839039528],
 ['amidar', 97.27108214038698],
 ['assault', 489.0723402551533],
 ['asterix', 637.4622709981793],
 ['bank_heist', 196.57212118002022],
 ['battle_zone', 13520.618932259886],
 ['boxing', 6.865714285714286],
 ['breakout', 14.517297825886795],
 ['chopper_command', 646.5952380952382],
 ['crazy_climber', 19694.06343491709],
 ['demon_attack', 1222.1726236854836],
 ['freeway', 15.406451612903222],
 ['frostbite', 449.6546661390856],
 ['gopher', 598.3884550163659],
 ['hero', 4001.6417938101877],
 ['jamesbond', 272.33614614912597],
 ['kangaroo', 1052.3677908693933],
 ['krull', 4002.30714259636],
 ['kung_fu_master', 7106.436288099127],
 ['ms_pacman', 1065.5951100912523],
 ['pong', -11.374356103459368],
 ['private_eye', 49.17872340425532],
 ['qbert', 1100.9153082502767],
 ['road_runner', 8069.789540523381],
 ['seaquest', 321.8232367384383],
 ['up_n_down', 3924.879963371488]])

def to_df(score_dict):
    df = pandas.DataFrame(np.array(list(score_dict.values())).reshape(-1, 1),
                                     index=list(score_dict.keys()),
                                     columns=["GameScoreAverage"])
    return df

atari_der_scores = to_df(der_scores)
atari_human_scores = to_df(human_scores)
atari_random_scores = to_df(random_scores)
atari_nature_scores = to_df(nature_scores)
atari_ilya_scores = to_df(ilya_scores)
atari_curl_scores = to_df(curl_scores)


atari_nature_scores["game"] = list(nature_scores.keys())
atari_der_scores["game"] = list(der_scores.keys())
atari_human_scores["game"] = list(human_scores.keys())
atari_random_scores["game"] = list(random_scores.keys())
atari_ilya_scores["game"] = list(ilya_scores.keys())
atari_curl_scores["game"] = list(curl_scores.keys())

In [87]:
def split_on_fields(df, fields=["n_step", "encoder", "tag",
                                 "batch_size", "replay_ratio",
                                 "batch_t", "jumps", "nce",
                                 "use_all_targets", "time_contrastive",
                                 "model_rl_weight", "detach_model"],
                   min_len=0,
                   min_games=0):
    strings = [""]
    df.tag = [str(t).replace("q_l1", "ql1") for t in df.tag]
    dfs = [df]
    
    for field in fields:
        new_dfs = []
        new_strs = []
        for df, string in zip(dfs, strings):
            if len(set(df[field].astype(str))) == 1:
                new_dfs.append(df)
                new_strs.append(string)
            else:
                for value in set(df[field].astype(str)):
                    new_str = string + "{}: {}  ".format(field[:5], value)
                    new_df = df[np.array([str(x) == str(value) for x in df[field]])]
                    if len(new_df) > min_len and len(set(new_df.game)) >= min_games:
                        new_dfs.append(new_df)
                        new_strs.append(new_str)
    
                    
        dfs = new_dfs
        strings = new_strs
        
    return dfs, [string[:-2] for string in strings]

In [88]:
def mean_pairwise_score(df1, df2):
    s1 = (df1["GameScoreAverage"] - atari_random_scores["GameScoreAverage"])/\
          (df2["GameScoreAverage"] - atari_random_scores["GameScoreAverage"])
    s2 = (df2["GameScoreAverage"] - atari_random_scores["GameScoreAverage"])/\
          (df1["GameScoreAverage"] - atari_random_scores["GameScoreAverage"])
    score = (s1.mean() - s2.mean())
    return score

def median_pairwise_score(df1, df2):
    s1 = (df1["GameScoreAverage"] - atari_random_scores["GameScoreAverage"])/\
          (df2["GameScoreAverage"] - atari_random_scores["GameScoreAverage"])
    s2 = (df2["GameScoreAverage"] - atari_random_scores["GameScoreAverage"])/\
          (df1["GameScoreAverage"] - atari_random_scores["GameScoreAverage"])
    return (s1.median() - s2.median())

def group_dfs(dfs):
    proc_dfs = []
    for df in dfs:
        if "seed" in df.keys():
            proc_dfs.append(df.groupby(["seed", "game"]).mean().groupby("game").mean())
        else:
            proc_dfs.append(df.groupby("game").mean())
            
    return proc_dfs

def compare_dfs(dfs, names, min_games=0, pairwise_top_n=-1):
    if min_games > 0:
        games = Counter()
        games_count = [Counter(df.game) for df in dfs]
        for count in games_count:
            games = games + count
        mandatory_games = set([g[0] for g in games.most_common()[:min_games]])
        usable_dfs = []
        usable_names = []
        for df, name in zip(dfs, names):
            if set(df.game) >= mandatory_games:
                usable_dfs.append(df)
                usable_names.append(name)
        dfs = usable_dfs
        names = usable_names
    games = set(dfs[0].game)
    for df in dfs[1:]:
        games = games & set(df.game)
    
    games_dfs = group_dfs(dfs)
    
    games_dfs = [df[[g in games for g in df.index]] for df in games_dfs]
    for df in games_dfs:
        gsn = []
        for game, score in zip(df.index, df["GameScoreAverage"]):
            gsn.append((score - random_scores[game])/(human_scores[game] - random_scores[game]))

        df["GameScoreNormalized"] = np.array(gsn)
    
    for df in games_dfs:
        gsn = []
        for game, score in zip(df.index, df["GameScoreAverage"]):
            gsn.append((score - random_scores[game])/(nature_scores[game] - random_scores[game]))

        df["GameScoreNatureNormalized"] = np.array(gsn)
        
    for df in games_dfs:
        gsn = []
        for game, score in zip(df.index, df["GameScoreAverage"]):
            gsn.append((score - random_scores[game])/(der_scores[game] - random_scores[game]))

        df["GameScoreDERNormalized"] = np.array(gsn)
        
    scores = [-df["GameScoreNormalized"].median() for df in games_dfs]
    indices = np.argsort(scores)
    games_dfs = np.array(games_dfs)[indices]
    names = np.array(names)[indices]
    
    print()
    print("Nature Scores:")
    for df, name in zip(games_dfs, names):
        print("{0}: Median: {1:.3f}, Mean: {2:.3f}".format(name, df["GameScoreNatureNormalized"].median(), df["GameScoreNatureNormalized"].mean()))
        
    print()    
    print("Human Scores:")
    for df, name in zip(games_dfs, names):
        print("{0}: Median: {1:.3f}, Mean: {2:.3f}".format(name, df["GameScoreNormalized"].median(), df["GameScoreNormalized"].mean()))
    
    print()
    print("Comparison over {} games:".format(len(games)))
    if pairwise_top_n < 0:
        pairwise_top_n = len(dfs)
    for i in range(pairwise_top_n):
        print()
        for j in range(pairwise_top_n):
            if i == j:
                continue
            better_games = np.sum(games_dfs[i]["GameScoreAverage"] > games_dfs[j]["GameScoreAverage"])
            mean_comp_score = mean_pairwise_score(games_dfs[i], games_dfs[j])
            median_comp_score = median_pairwise_score(games_dfs[i], games_dfs[j])
            
            print("{} above {}: {}, {:.3f}, {:.3f}".format(names[i], names[j],
                                                           better_games, 
                                                           median_comp_score,
                                                          mean_comp_score))
            
    return games_dfs, names

def sort_games(game_df, key="GameScoreNormalized"):
    indices = np.argsort(game_df[key])
    games_dfs[-1][key][indices]
    for i, game in enumerate(indices):
        i = i+1
        median = (i == len(game_df)//2 or i == len(game_df)//2+1)
        print("{} {} : {:.3f} {:.3f} {}".format(i, 
                                                game_df.index[game],
                                                game_df[key][game],
                                                game_df["GameScoreAverage"][game],
                                                "median" if median else ""))

In [89]:
tui_1_base = pandas.read_csv("wandb_3232_tui_1.csv")
uat_tui1_no_dueling_c51 = pandas.read_csv("uat_tui1_c51_no_aug.csv")
hn_1_base = pandas.read_csv("wandb_hn1_base.csv")
df = pandas.read_csv("wandb_100k_v3.csv")

In [90]:
fixed_jat_gl = pandas.read_csv("uat_fixed.csv")
no_noisy_5k = pandas.read_csv("no_noisy_5k_eps.csv")
new_base = pandas.read_csv("wandb_new_base.csv")
new_base_3232 = pandas.read_csv("new_base_3232.csv")
uat_no_noisy = pandas.read_csv("uat_no_noisy.csv")
base_nt_dueling = pandas.read_csv("base_nt_dueling.csv")

In [91]:
our_drq = pandas.read_csv("drq_initial_results.csv")
our_drq_nd = our_drq[(our_drq["dueling"] == 0) &
                   (our_drq["distributional"] != 0)]
our_drq_d = our_drq[(our_drq["dueling"] == 1) &
                   (our_drq["distributional"] != 0)]
our_drq_dqn = our_drq[(our_drq["dueling"] == 1) &
                   (our_drq["distributional"] == 0)]
our_drq_512_no_pri = pandas.read_csv("drq_512_no_pri.csv")
our_drq_256_nonoops = pandas.read_csv("drq_256_nonoops.csv")

In [92]:
uat_aug = pandas.read_csv("g_uat_aug.csv")
uat_aug_ln = uat_aug[uat_aug["norm_type"] == "ln"]
uat_aug_bn = uat_aug[uat_aug["norm_type"] == "bn"]
nce_aug = pandas.read_csv("wandb_drq_nce.csv")

In [93]:
nce_no_dist = pandas.read_csv("nce1_no_dist.csv")
nce_no_dist_pri = nce_no_dist[nce_no_dist["prioritized_replay"] == 1]
nce_no_dist_no_pri = nce_no_dist[nce_no_dist["prioritized_replay"] == 0]

In [94]:
controls = df[df["n_step"] == 10]
controls = controls[controls["nce"] == 1]
controls = controls[controls["encoder"] == "nature"]
controls = controls[controls["User"] == "ankeshanand"]
controls = controls[controls["batch_size"] == 32]
controls = controls[controls["tag"].astype(str) == "nan"]

our_der = df[df["n_step"] == 10]
our_der = our_der[our_der["nce"] == 0]
our_der = our_der[our_der["encoder"] == "nature"]
our_der = our_der[our_der["User"] == "ankeshanand"]
our_der = our_der[our_der["batch_size"] == 32]
our_der = our_der[our_der["tag"].astype(str) == "nan"]


jat_1_gl = df[(df["n_step"] == 10) & 
         (df["nce"] == 1) & 
         (df["encoder"] == "nature") &
         (df["User"] == "maxa-schwarzer") &
         (df["batch_size"] == 32) &
         (df["time_contrastive"] == 0) &
         (df["global_nce"] == 0) &
         (df["model_rl_weight"] == 0) &
         (df["jumps"] == 5) &
         (df["use_all_targets"] == 1)]

In [95]:
byol_data = pandas.read_csv("byol_results_v7.csv")

byol_sweeps, byol_names = split_on_fields(byol_data, ["tag", "noisy_nets", 
                                                      "distributional", 
                                                      "replay_ratio", "jumps",
                                                      "classifier",
                                                      "target_update_interval"], 100,
                                         min_games=26)


In [96]:
names =  ["Base",
          "New Base 32/32",
          "Base w/o target", 
          "Base NT dueling",
          "Base w/ hard negs",
          "Base no-dist Pri 64/32",
          "Base no-dist NoPri 64/32",
          "Base no-dist Pri Aug",
          *byol_names,
          "UAT", 
          "uat_tui1_no_dueling_c51",
          "UAT no-dist pri aug",
#           "UAT aug DQN LN T1",
          "5k-eps-no-target",
          "der", 
#           "nature",
          "our_der",
          "curl",
          "our drq c51 no dueling",
          "our drq c51 dueling",
          "our drq dqn dueling no-dist",
          "our drq 512 no pri",
          "our drq 256 no noops",
          "drq",
             ]
dfs = [new_base,
       new_base_3232,
       tui_1_base,
       base_nt_dueling,
       hn_1_base, 
       nce_no_dist_pri,
       nce_no_dist_no_pri,
       nce_aug,
       *byol_sweeps,
#        jat_1_gl,
       fixed_jat_gl,
       uat_tui1_no_dueling_c51,
       uat_aug_bn,
       no_noisy_5k,
       atari_der_scores, 
#        atari_nature_scores,
       our_der,
       atari_curl_scores,
       our_drq_nd,
       our_drq_d,
       our_drq_dqn,
       our_drq_512_no_pri,
       our_drq_256_nonoops,
       atari_ilya_scores,
                         ]
games_dfs, names = compare_dfs(dfs, names, min_games=26)


Nature Scores:
tag: byol_ql1_aug: Median: 0.220, Mean: 0.569
tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1000: Median: 0.236, Mean: 0.440
drq: Median: 0.153, Mean: 0.287
tag: byol_l_j5_no_t0: Median: 0.201, Mean: 0.384
curl: Median: 0.277, Mean: 0.419
tag: byol_l_j5_aug_no_t0: Median: 0.235, Mean: 0.444
Base: Median: 0.172, Mean: 0.260
UAT: Median: 0.191, Mean: 0.296
tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1: Median: 0.234, Mean: 0.321
tag: byol_ql1_rew_test: Median: 0.164, Mean: 0.487
tag: byol_g: Median: 0.129, Mean: 0.346
tag: byol_ql1_ema_test: Median: 0.172, Mean: 0.486
UAT no-dist pri aug: Median: 0.180, Mean: 0.409
tag: byol_aug_l: Median: 0.181, Mean: 0.382
Base no-dist NoPri 64/32: Median: 0.160, Mean: 0.404
tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: none  targe: 1: Median: 0.258, Mean: 0.377
our_der: Median: 0.129, Mean: 0.265
Base no-dist Pri 64/32: Median: 0.155, Mean: 0.251
Base w/ hard negs:

drq above tag: byol_ql1_aug: 11, -0.281, -0.305
drq above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1000: 7, -0.424, -0.185
drq above tag: byol_l_j5_no_t0: 13, 0.043, 0.594
drq above curl: 9, -0.613, -0.411
drq above tag: byol_l_j5_aug_no_t0: 11, -0.475, -0.057
drq above Base: 15, 0.200, 0.571
drq above UAT: 11, -0.143, -0.044
drq above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1: 11, -0.043, 0.548
drq above tag: byol_ql1_rew_test: 12, -0.259, 0.415
drq above tag: byol_g: 17, 0.574, 1.410
drq above tag: byol_ql1_ema_test: 12, -0.002, 0.250
drq above UAT no-dist pri aug: 10, -0.092, -0.281
drq above tag: byol_aug_l: 12, -0.138, -0.085
drq above Base no-dist NoPri 64/32: 18, 0.239, 3.116
drq above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: none  targe: 1: 10, -0.506, 0.087
drq above our_der: 18, 0.517, 3.363
drq above Base no-dist Pri 64/32: 18, 0.334, 3.308
drq above Base w/ hard negs: 11, -0.058, 1.180
drq

Base above der: 14, 0.214, 3.496
Base above our drq dqn dueling no-dist: 11, -0.160, -0.233
Base above Base w/o target: 13, 0.044, 1.101
Base above our drq 256 no noops: 10, -0.246, 12.785
Base above our drq 512 no pri: 11, -0.131, 0.011
Base above tag: nan  noisy: 1  distr: 1  repla: 64  jumps: 1: 14, 0.126, 0.123
Base above tag: byol_l_j5_aug: 15, 0.046, -4.644
Base above New Base 32/32: 15, 0.300, 0.372
Base above our drq c51 dueling: 11, -0.102, 0.257
Base above Base no-dist Pri Aug: 15, 0.172, 0.721
Base above our drq c51 no dueling: 13, -0.042, 1.482

UAT above tag: byol_ql1_aug: 10, -0.473, -1.863
UAT above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1000: 10, -0.396, -0.610
UAT above drq: 15, 0.143, 0.044
UAT above tag: byol_l_j5_no_t0: 13, 0.011, -1.522
UAT above curl: 6, -0.943, -2.193
UAT above tag: byol_l_j5_aug_no_t0: 11, -0.241, -1.623
UAT above Base: 15, 0.028, -0.641
UAT above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilin

tag: byol_ql1_ema_test above our drq dqn dueling no-dist: 13, 0.108, -0.194
tag: byol_ql1_ema_test above Base w/o target: 14, 0.317, 1.561
tag: byol_ql1_ema_test above our drq 256 no noops: 12, 0.021, -1.767
tag: byol_ql1_ema_test above our drq 512 no pri: 14, 0.091, 0.042
tag: byol_ql1_ema_test above tag: nan  noisy: 1  distr: 1  repla: 64  jumps: 1: 14, 0.123, 0.090
tag: byol_ql1_ema_test above tag: byol_l_j5_aug: 18, 0.650, -3.854
tag: byol_ql1_ema_test above New Base 32/32: 16, 0.464, 0.777
tag: byol_ql1_ema_test above our drq c51 dueling: 14, 0.256, 0.525
tag: byol_ql1_ema_test above Base no-dist Pri Aug: 13, 0.260, 0.841
tag: byol_ql1_ema_test above our drq c51 no dueling: 15, 0.381, 1.236

UAT no-dist pri aug above tag: byol_ql1_aug: 10, -0.268, -0.814
UAT no-dist pri aug above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1000: 12, -0.202, -0.185
UAT no-dist pri aug above drq: 16, 0.092, 0.281
UAT no-dist pri aug above tag: byol_l_j5_no_t0: 13, 0.00

our_der above our drq dqn dueling no-dist: 8, -0.797, -1.522
our_der above Base w/o target: 10, -0.177, -0.565
our_der above our drq 256 no noops: 9, -0.808, -3.852
our_der above our drq 512 no pri: 10, -0.615, 0.257
our_der above tag: nan  noisy: 1  distr: 1  repla: 64  jumps: 1: 9, -0.463, 0.166
our_der above tag: byol_l_j5_aug: 12, -0.150, -5.204
our_der above New Base 32/32: 8, -0.557, -1.252
our_der above our drq c51 dueling: 7, -0.270, -1.891
our_der above Base no-dist Pri Aug: 8, -0.468, -1.524
our_der above our drq c51 no dueling: 11, -0.365, -2.091

Base no-dist Pri 64/32 above tag: byol_ql1_aug: 8, -1.096, -2.266
Base no-dist Pri 64/32 above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1000: 4, -0.820, -1.352
Base no-dist Pri 64/32 above drq: 8, -0.334, -3.308
Base no-dist Pri 64/32 above tag: byol_l_j5_no_t0: 7, -0.321, -0.745
Base no-dist Pri 64/32 above curl: 3, -1.109, -3.071
Base no-dist Pri 64/32 above tag: byol_l_j5_aug_no_t0: 6, -0.738, -

uat_tui1_no_dueling_c51 above Base: 12, -0.124, -0.507
uat_tui1_no_dueling_c51 above UAT: 8, -0.099, -0.911
uat_tui1_no_dueling_c51 above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1: 9, -0.240, -0.645
uat_tui1_no_dueling_c51 above tag: byol_ql1_rew_test: 10, -0.452, -1.167
uat_tui1_no_dueling_c51 above tag: byol_g: 15, 0.234, 1.382
uat_tui1_no_dueling_c51 above tag: byol_ql1_ema_test: 11, -0.327, -1.177
uat_tui1_no_dueling_c51 above UAT no-dist pri aug: 10, -0.303, -1.225
uat_tui1_no_dueling_c51 above tag: byol_aug_l: 10, -0.493, -1.223
uat_tui1_no_dueling_c51 above Base no-dist NoPri 64/32: 18, 0.205, -0.550
uat_tui1_no_dueling_c51 above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: none  targe: 1: 11, -0.354, -0.894
uat_tui1_no_dueling_c51 above our_der: 16, 0.248, 0.657
uat_tui1_no_dueling_c51 above Base no-dist Pri 64/32: 15, 0.299, 0.331
uat_tui1_no_dueling_c51 above Base w/ hard negs: 10, -0.233, -0.377
uat_tui1_no_dueling_c51 above ta

Base w/o target above Base no-dist Pri Aug: 14, 0.222, -0.443
Base w/o target above our drq c51 no dueling: 12, -0.201, -0.048

our drq 256 no noops above tag: byol_ql1_aug: 11, -0.425, -24.872
our drq 256 no noops above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1000: 13, 0.029, -4.705
our drq 256 no noops above drq: 13, -0.007, 9.298
our drq 256 no noops above tag: byol_l_j5_no_t0: 13, 0.028, -30.048
our drq 256 no noops above curl: 7, -0.800, -27.715
our drq 256 no noops above tag: byol_l_j5_aug_no_t0: 11, -0.450, -17.354
our drq 256 no noops above Base: 16, 0.246, -12.785
our drq 256 no noops above UAT: 14, 0.076, -1.385
our drq 256 no noops above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1: 16, 0.361, -3.266
our drq 256 no noops above tag: byol_ql1_rew_test: 11, -0.605, -18.530
our drq 256 no noops above tag: byol_g: 16, 0.711, -4.526
our drq 256 no noops above tag: byol_ql1_ema_test: 14, -0.021, 1.767
our drq 256 no

tag: byol_l_j5_aug above curl: 6, -0.857, 3.631
tag: byol_l_j5_aug above tag: byol_l_j5_aug_no_t0: 3, -0.806, 2.208
tag: byol_l_j5_aug above Base: 11, -0.046, 4.644
tag: byol_l_j5_aug above UAT: 8, -0.321, 2.788
tag: byol_l_j5_aug above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1: 9, -0.326, 5.195
tag: byol_l_j5_aug above tag: byol_ql1_rew_test: 8, -0.854, 2.666
tag: byol_l_j5_aug above tag: byol_g: 12, 0.065, 1.496
tag: byol_l_j5_aug above tag: byol_ql1_ema_test: 8, -0.650, 3.854
tag: byol_l_j5_aug above UAT no-dist pri aug: 5, -0.612, 1.791
tag: byol_l_j5_aug above tag: byol_aug_l: 9, -0.604, 2.067
tag: byol_l_j5_aug above Base no-dist NoPri 64/32: 10, -0.298, 4.773
tag: byol_l_j5_aug above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: none  targe: 1: 9, -0.443, 6.973
tag: byol_l_j5_aug above our_der: 14, 0.150, 5.204
tag: byol_l_j5_aug above Base no-dist Pri 64/32: 13, 0.060, 3.780
tag: byol_l_j5_aug above Base w/ hard negs: 10, -0.242, 4

In [61]:
games_dfs[0]["model_rl_weight"]

game
alien              0.0
amidar             0.0
assault            0.0
asterix            0.0
bank_heist         0.0
battle_zone        0.0
boxing             0.0
breakout           0.0
chopper_command    0.0
crazy_climber      0.0
demon_attack       0.0
freeway            0.0
frostbite          0.0
gopher             0.0
hero               0.0
jamesbond          0.0
kangaroo           0.0
krull              0.0
kung_fu_master     0.0
ms_pacman          0.0
pong               0.0
private_eye        0.0
qbert              0.0
road_runner        0.0
seaquest           0.0
up_n_down          0.0
Name: model_rl_weight, dtype: float64

In [169]:
for game_df, name in zip(games_dfs, names):
    
    print(name)
    sort_games(game_df, "GameScoreNatureNormalized")
    print()
    print()
    

Base
1 private_eye : -0.015 -1.464 
2 demon_attack : 0.010 248.660 
3 breakout : 0.016 8.080 
4 gopher : 0.019 415.960 
5 chopper_command : 0.030 986.500 
6 seaquest : 0.044 295.560 
7 qbert : 0.063 816.400 
8 boxing : 0.064 4.656 
9 assault : 0.064 421.715 
10 crazy_climber : 0.079 18923.000 
11 asterix : 0.086 706.900 
12 kangaroo : 0.135 952.800 
13 alien : 0.170 711.180 median
14 amidar : 0.174 133.310 median
15 pong : 0.308 -8.502 
16 hero : 0.342 7490.510 
17 battle_zone : 0.385 11566.000 
18 jamesbond : 0.399 247.800 
19 bank_heist : 0.480 213.640 
20 road_runner : 0.495 9049.000 
21 freeway : 0.510 15.445 
22 ms_pacman : 0.535 1378.660 
23 kung_fu_master : 0.539 12651.800 
24 up_n_down : 0.571 5054.060 
25 krull : 0.605 2932.486 
26 frostbite : 0.660 238.760 


New Base 32/32
1 gopher : 0.005 300.200 
2 breakout : 0.013 6.842 
3 chopper_command : 0.015 901.800 
4 demon_attack : 0.021 352.250 
5 private_eye : 0.029 76.046 
6 qbert : 0.033 508.850 
7 seaquest : 0.056 358.400 
8 a

In [170]:
corrs = []
import seaborn as sns

from scipy import stats
import matplotlib.pyplot as plt
def r2(x, y):
    return stats.pearsonr(x, y)[0]

for game in games_dfs[0].index:

    game_scores = [df.loc[game]["GameScoreNormalized"] for df in games_dfs]
    hn_medians = [df["GameScoreNormalized"].median() for df in games_dfs]

    corr = r2(game_scores, hn_medians)
    corrs.append(corr)
    
#     sns.jointplot(game_scores, hn_medians, kind="reg", stat_func=r2)
#     plt.xlabel("Human-normalized score on {}".format(game))
#     plt.ylabel("Median human-normalized score on Atari26")
#     plt.show()
corrs = np.array(corrs)
print(np.sum(corrs**2 > 0.1))

indices = np.argsort(-corrs)
for i in indices:
    print(games_dfs[0].index[i], corrs[i])

8
pong 0.5090847265632586
battle_zone 0.48143334563990037
demon_attack 0.4488147286349544
frostbite 0.4009904110982694
bank_heist 0.3432572312196401
alien 0.31316590840684544
crazy_climber 0.28582976445425007
qbert 0.2545447611100411
kung_fu_master 0.24984354270758277
chopper_command 0.19239128623706553
breakout 0.18010068216181882
gopher 0.18002257803013152
ms_pacman 0.17400878847729184
hero 0.15819498408678173
kangaroo 0.11205482847573886
asterix 0.10567109932367197
amidar 0.007323008690871575
assault -0.037557582215034004
krull -0.1028654912410975
private_eye -0.19246540534261225
freeway -0.19703076638787
road_runner -0.2690732053642612
jamesbond -0.30038198431265994
up_n_down -0.3724127032576494
boxing -0.4218905371283183
seaquest -0.4761991002148161


In [82]:
uat_data = pandas.read_csv("uat_sweep.csv")
byol_data = pandas.read_csv("byol_results_v6.csv")

byol_sweeps, byol_names = split_on_fields(byol_data, ["tag", "noisy_nets", "distributional", 
                                                      "replay_ratio", "jumps", "classifier",
                                                      "target_update_interval"], 25,
                                         min_games=8)

hnf_1 = pandas.read_csv("uat_hnf_1.csv")
hnf_1_sweeps, hnf_1_names = split_on_fields(hnf_1, ["tag"], 30)

sweeps, names = split_on_fields(uat_data, ["tag"], 30)
sweeps = byol_sweeps + \
        [
       fixed_jat_gl,
       uat_aug_bn,
       our_drq_dqn,
       atari_ilya_scores]
names =  byol_names + [
          "UAT", 
          "UAT no-dist pri aug",
          "Our DRQ",
          "old_drq"]

games_dfs, names = compare_dfs(sweeps, names, min_games=8, pairwise_top_n=8)


Nature Scores:
tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: none  targe: 1: Median: 0.558, Mean: 0.458
old_drq: Median: 0.267, Mean: 0.276
tag: byol_ematarget_test: Median: 0.423, Mean: 0.377
tag: nan  noisy: 1  distr: 0  repla: 64  jumps: 1: Median: 0.367, Mean: 0.372
tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1000: Median: 0.423, Mean: 0.335
tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1: Median: 0.335, Mean: 0.302
tag: byol_ql1_aug: Median: 0.328, Mean: 0.358
tag: byol_ql1_no_aug: Median: 0.366, Mean: 0.370
tag: nan  noisy: 1  distr: 0  repla: 32  jumps: 1: Median: 0.350, Mean: 0.347
tag: nan  noisy: 0  repla: 32: Median: 0.366, Mean: 0.312
tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 5  class: bilinear: Median: 0.336, Mean: 0.310
tag: byol_g: Median: 0.315, Mean: 0.356
tag: bignature_test_none_l: Median: 0.312, Mean: 0.354
tag: byol_l_j5_aug_no_t0: Median: 0.269, Mean: 0.302
tag: nan  noisy: 1  distr: 1 

tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1000 above tag: byol_ql1_aug: 4, -0.266, -0.325
tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1000 above tag: byol_ql1_no_aug: 4, -0.145, 0.476

tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1 above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: none  targe: 1: 1, -0.741, -0.598
tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1 above old_drq: 4, 0.011, -0.946
tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1 above tag: byol_ematarget_test: 3, -0.209, -0.204
tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1 above tag: nan  noisy: 1  distr: 0  repla: 64  jumps: 1: 4, -0.005, 0.416
tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1 above tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1000: 3, -0.123, -0.111
tag: na

In [97]:
for i in range(len(names)):
    print(names[i])
    indices = np.array(list(np.argsort(-games_dfs[i]["GameScoreNormalized"])))
    print(games_dfs[i].iloc[indices][["GameScoreAverage", "GameScoreNormalized"]])
    print()

tag: byol_ql1_aug
                 GameScoreAverage  GameScoreNormalized
game                                                  
krull                    3520.596             1.801027
boxing                     16.118             1.334833
kangaroo                 3693.400             1.220717
road_runner              9345.000             1.191485
jamesbond                 237.900             0.762966
assault                   605.976             0.738214
kung_fu_master          16780.000             0.735014
freeway                    19.806             0.669122
breakout                   17.998             0.565903
frostbite                2032.500             0.460780
bank_heist                311.520             0.402382
battle_zone             14948.000             0.361439
crazy_climber           18945.400             0.325958
up_n_down                3605.680             0.275299
pong                      -12.110             0.243343
gopher                    748.240             0

In [294]:

from sklearn import linear_model
import sklearn

for df, name in zip(byol_sweeps, byol_names):

    # sns.jointplot(df.ModelNCELoss,
    #               df.GameScoreDERNormalized,
    #               kind="reg", stat_func=r2)
    import statsmodels.api as sm
    preprocessing = sklearn.preprocessing.OneHotEncoder()
    game_vars = np.array(list(df.game)).reshape(-1, 1)
    game_vars = preprocessing.fit_transform(game_vars).todense()
    byol_losses = np.array(list(df.ModelNCELoss)).reshape(-1, 1)
#     byol_losses = sklearn.preprocessing.scale(byol_losses)
    variables = np.concatenate([game_vars, byol_losses], axis=1)
    targets = np.array(list(df.GameScoreNatureNormalized)).reshape(-1, 1)

    regression = sm.GLS(targets, variables).fit()
#     regression.fit(variables, targets)

    print(name)
    print("Regression Coefficients:")
    print(regression.summary())
    print()

tag: nan  noisy: 1  distr: 1  repla: 32  jumps: 1  class: bilinear  targe: 1
Regression Coefficients:
                            GLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.366
Model:                            GLS   Adj. R-squared:                  0.206
Method:                 Least Squares   F-statistic:                     2.287
Date:                Fri, 19 Jun 2020   Prob (F-statistic):            0.00176
Time:                        11:00:55   Log-Likelihood:                -110.82
No. Observations:                 130   AIC:                             275.6
Df Residuals:                     103   BIC:                             353.1
Df Model:                          26                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------

In [135]:
byol_losses

array([[ 1.46184365],
       [ 1.28201339],
       [ 0.03169247],
       [ 0.46597327],
       [ 1.22099737],
       [ 0.20654752],
       [ 1.04586147],
       [ 1.53582798],
       [-0.33333608],
       [ 1.81306034],
       [ 1.55166359],
       [-1.07056921],
       [ 1.32591071],
       [ 2.29228583],
       [-0.58003067],
       [-1.03890424],
       [ 1.59842194],
       [-0.92972404],
       [-1.03590448],
       [-0.85290183],
       [-0.59149245],
       [-0.83804447],
       [ 0.50157612],
       [ 0.71582237],
       [-0.69820863],
       [ 1.01762519],
       [ 0.47943795],
       [-0.07601039],
       [-0.16562844],
       [-0.0549522 ],
       [ 0.58269311],
       [-0.12611077],
       [ 1.62707322],
       [-0.54162362],
       [-0.32499082],
       [ 1.40936847],
       [-0.04238934],
       [-1.11359713],
       [-0.10068766],
       [-0.28507367],
       [ 1.4454332 ],
       [ 0.57314194],
       [-0.59304822],
       [ 1.01932884],
       [-0.42944102],
       [ 0

In [84]:
print(" ".join(list(map(str, range(8995820, 8996056)))))

8995820 8995821 8995822 8995823 8995824 8995825 8995826 8995827 8995828 8995829 8995830 8995831 8995832 8995833 8995834 8995835 8995836 8995837 8995838 8995839 8995840 8995841 8995842 8995843 8995844 8995845 8995846 8995847 8995848 8995849 8995850 8995851 8995852 8995853 8995854 8995855 8995856 8995857 8995858 8995859 8995860 8995861 8995862 8995863 8995864 8995865 8995866 8995867 8995868 8995869 8995870 8995871 8995872 8995873 8995874 8995875 8995876 8995877 8995878 8995879 8995880 8995881 8995882 8995883 8995884 8995885 8995886 8995887 8995888 8995889 8995890 8995891 8995892 8995893 8995894 8995895 8995896 8995897 8995898 8995899 8995900 8995901 8995902 8995903 8995904 8995905 8995906 8995907 8995908 8995909 8995910 8995911 8995912 8995913 8995914 8995915 8995916 8995917 8995918 8995919 8995920 8995921 8995922 8995923 8995924 8995925 8995926 8995927 8995928 8995929 8995930 8995931 8995932 8995933 8995934 8995935 8995936 8995937 8995938 8995939 8995940 8995941 8995942 8995943 8995944 