In [1]:
import pandas
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter

from scipy import stats
def r2(x, y):
    return stats.pearsonr(x, y)[0] ** 2

In [2]:
der_scores = dict(
    alien=739.9, 
    amidar=188.6,
    assault=431.2,
    asterix=470.8,
    bank_heist=51.0,
    battle_zone=10124.6,
    boxing=0.2,
    breakout=1.9, 
    chopper_command=861.8,
    crazy_climber=16185.3,
    demon_attack=508, 
    freeway=27.9, 
    frostbite=866.8,
    gopher=349.5, 
    hero=6857.0, 
    jamesbond=301.6,
    kangaroo=779.3,
    krull=2851.5,
    kung_fu_master=14346.1,
    ms_pacman=1204.1, 
    pong=-19.3,
    private_eye=97.8,
    qbert=1152.9,
    road_runner=9600.0,
    seaquest=354.1, 
    up_n_down=2877.4,
)

simple_scores = dict(
    alien=616.9, 
    amidar=88,
    assault=527.2,
    asterix=1128.3,
    bank_heist=34.2,
    battle_zone=5184.4,
    boxing=9.1,
    breakout=16.4, 
    chopper_command=1246.9,
    crazy_climber=62583.6,
    demon_attack=208.1, 
    freeway=20.3, 
    frostbite=254.7,
    gopher=771.0, 
    hero=2656.6, 
    jamesbond=125.3,
    kangaroo=323.1,
    krull=4539.9,
    kung_fu_master=17257.2,
    ms_pacman=1480.0, 
    pong=12.8,
    private_eye=58.3,
    qbert=1288.8,
    road_runner=5640.6,
    seaquest=683.3, 
    up_n_down=3350.3,
)

sunrise_scores = dict(
    alien=872, 
    amidar=122.6,
    assault=594.8,
    asterix=755.0,
    bank_heist=266.7,
    battle_zone=15700,
    boxing=6.7,
    breakout=1.8, 
    chopper_command=1040,
    crazy_climber=22230,
    demon_attack=919.8, 
    freeway=30.2, 
    frostbite=2026.7,
    gopher=654.7, 
    hero=8072.5, 
    jamesbond=425.0,
    kangaroo=2726.7,
    krull=3171.9,
    kung_fu_master=9896.7,
    ms_pacman=1482.3,
    pong=-13.8,
    private_eye=100,
    qbert=1830.8,
    road_runner=11913.3,
    seaquest=570.7,
    up_n_down=3522.0,
)

random_scores = dict(
    alien=227.8,
    amidar=5.8,
    assault=222.4,
    asterix=210.0,
    bank_heist=14.2,
    battle_zone=2360,
    boxing=0.1,
    breakout=1.7, 
    chopper_command=811.0,
    crazy_climber=10780.5, 
    demon_attack=152.1,
    freeway=0.0, 
    frostbite=65.2,
    gopher=257.6, 
    hero=1027.0, 
    jamesbond=29.0,
    kangaroo=52.0,
    krull=1598.0,
    kung_fu_master=258.5,
    ms_pacman=307.3, 
    pong=-20.7,
    private_eye=24.9,
    qbert=163.9,
    road_runner=11.5, 
    seaquest=68.4,
    up_n_down=533.4
)


curl_scores = dict(
    alien=558.2,
    amidar=142.1,
    assault=600.6,
    asterix=734.5,
    bank_heist=131.6,
    battle_zone=14870.,
    boxing=1.2,
    breakout=4.9,
    chopper_command=1058.5,
    crazy_climber=12146.5, 
    demon_attack=817.6,
    freeway=26.7, 
    frostbite=1181.3,
    gopher=669.3, 
    hero=6279.3, 
    jamesbond=471.0,
    kangaroo=872.5,
    krull=4229.6,
    kung_fu_master=14307.8,
    ms_pacman=1465.5, 
    pong=-16.5,
    private_eye=218.4,
    qbert=1042.4,
    road_runner=5661., 
    seaquest=384.5,
    up_n_down=2955.2
)

human_scores = dict(
    alien=7127.7,
    amidar=1719.5,
    assault=742,
    asterix=8503.3,
    bank_heist=753.1,
    battle_zone=37187.5,
    boxing=12.1,
    breakout=30.5,
    chopper_command=7387.8,
    crazy_climber=35829.4,
    demon_attack=1971,
    freeway=29.6,
    frostbite=4334.7,
    gopher=2412.5,
    hero=30826.4,
    jamesbond=302.8,
    kangaroo=3035.0, 
    krull=2665.5, 
    kung_fu_master=22736.3,
    ms_pacman=6951.6,
    pong=14.6,
    private_eye=69571.3,
    qbert=13455.0, 
    road_runner=7845.0,
    seaquest=42054.7,
    up_n_down=11693.2
)

otrainbow_scores = dict(
    alien=824.7,
    amidar=82.8,
    assault=351.9,
    asterix=628.5,
    bank_heist=182.1,
    battle_zone=4060.6,
    boxing=2.5,
    breakout=9.8,
    chopper_command=1033.3,
    crazy_climber=21327.8,
    demon_attack=711.8,
    freeway=25.0,
    frostbite=231.6,
    gopher=778,
    hero=6458.8,
    jamesbond=112.3,
    kangaroo=605.4, 
    krull=3277.9, 
    kung_fu_master=5722.2,
    ms_pacman=941.9,
    pong=1.3,
    private_eye=100,
    qbert=509.3, 
    road_runner=2696.7,
    seaquest=286.9,
    up_n_down=2847.6,
)


nature_scores = dict(
    alien=1620, amidar=978, assault=4280.4,
    asterix=4359.0, bank_heist=455, battle_zone=29900.,
    boxing=88, breakout=385.5, chopper_command=6126.,
    crazy_climber=110763, demon_attack=12149.4, freeway=30.8,
    frostbite=797.4, gopher=8777.4, hero=20437.8, jamesbond=768.5,
    kangaroo=7259., krull=8422.3, kung_fu_master=26059.,
    ms_pacman=3085.6, pong=19.5, private_eye=146.7,
    qbert=13117.3, road_runner=39544.0, seaquest=5860.6, up_n_down=9989.9
)

ilya_scores = dict([
 ['alien', 771.2],
 ['amidar', 102.8],
 ['assault', 452.4],
 ['asterix', 603.5],
 ['bank_heist', 168.9],
 ['battle_zone', 12954.0],
 ['boxing', 6.0],
 ['breakout', 16.1],
 ['chopper_command', 780.3],
 ['crazy_climber', 20516.5],
 ['demon_attack', 1113.4],
 ['freeway', 9.8],
 ['frostbite', 331.1],
 ['gopher', 636.3],
 ['hero', 3736.3],
 ['jamesbond', 236.0],
 ['kangaroo', 940.6],
 ['krull', 4018.1],
 ['kung_fu_master', 9111.0],
 ['ms_pacman', 960.5],
 ['pong', -8.5],
 ['private_eye', -13.6],
 ['qbert', 854.4],
 ['road_runner', 8895.1],
 ['seaquest', 301.2],
 ['up_n_down', 3180.8]])

def to_df(score_dict):
    df = pandas.DataFrame(np.array(list(score_dict.values())).reshape(-1, 1),
                                     index=list(score_dict.keys()),
                                     columns=["GameScoreAverage"])
    return df

atari_der_scores = to_df(der_scores)
atari_otrainbow_scores = to_df(otrainbow_scores)
atari_human_scores = to_df(human_scores)
atari_random_scores = to_df(random_scores)
atari_nature_scores = to_df(nature_scores)
atari_ilya_scores = to_df(ilya_scores)
atari_curl_scores = to_df(curl_scores)
atari_simple_scores = to_df(simple_scores)
atari_sunrise_scores = to_df(sunrise_scores)



atari_nature_scores["game"] = list(nature_scores.keys())
atari_der_scores["game"] = list(der_scores.keys())
atari_otrainbow_scores["game"] = list(otrainbow_scores.keys())
atari_human_scores["game"] = list(human_scores.keys())
atari_random_scores["game"] = list(random_scores.keys())
atari_ilya_scores["game"] = list(ilya_scores.keys())
atari_curl_scores["game"] = list(curl_scores.keys())
atari_simple_scores["game"] = list(simple_scores.keys())
atari_sunrise_scores["game"] = list(sunrise_scores.keys())

In [507]:
def split_on_fields(df, fields=["n_step", "encoder", "tag",
                                 "batch_size", "replay_ratio",
                                 "batch_t", "jumps", "nce",
                                 "use_all_targets", "time_contrastive",
                                 "model_rl_weight", "detach_model"],
                   min_len=0,
                   min_games=0,
                   show_all_fields=False):
    strings = [""]
    df.tag = [str(t).replace("q_l1", "ql1") for t in df.tag]
    dfs = [df]
    
    for field in fields:
        new_dfs = []
        new_strs = []
        for df, string in zip(dfs, strings):
            if not show_all_fields and len(set(df[field].astype(str))) == 1:
                new_dfs.append(df)
                new_strs.append(string)
            else:
                for value in set(df[field].astype(str)):
                    new_str = string + "{}:{} ".format(field.replace("algo.", "")[:5], value)
                    new_df = df[np.array([str(x) == str(value) for x in df[field]])]
                    if len(new_df) > min_len and len(set(new_df.game)) >= min_games:
                        new_dfs.append(new_df)
                        new_strs.append(new_str)
    
        dfs = new_dfs
        strings = new_strs
        
    return dfs, [string[:-1] for string in strings]

def find_missing_pairs(dfs, names):
    games = []
    for df in dfs:
        games += list(df.game)
    games = set(games)
    
    for df, name in zip(dfs, names):
        seeds = set(df.seed)
        found_pairs = set([(game, seed) for game, seed in zip(df.game, df.seed)])

        missing_pairs = []
        for seed in seeds:
            for game in games:
                if (game, seed) not in found_pairs:
                    missing_pairs.append(game, seed)
                    "\"--game {} --seed {}\""

        missing_pairs_strs = ["\"--game {} --seed {}\"".format(game, seed) for game, seed in missing_pairs]
        missing_pairs = " ".join(missing_pair_strs)

        print(name, len(df))
        print("PAIRS=( {} )".format(missing_pairs))
        print()
        
        
def produce_missing_pair_runs(dfs,  games, seeds=range(1, 11), wandb_dir="\"/home/schwarzm/Github/mpr-release/\""):
    i = 1
    for df in dfs:
        found_pairs = set([(game, seed) for game, seed in zip(df.game, df.seed)])

        missing_pairs = []
        for game in games:
            for seed in seeds:
                if (game, seed) not in found_pairs:
                    missing_pairs.append((game, seed))
    
        arguments = ["--target-update-tau {}".format(list(df.target_update_tau)[0]),
                     "--jumps {}".format(list(df.jumps)[0]),
                     "--noisy-nets-std {}".format(list(df.noisy_nets_std)[0]),
                     "--eps-init {}".format(list(df.eps_init)[0]),
                     "--eps-steps {}".format(int(list(df["algo.eps_steps"])[0])),
                     "--min-steps-learn {}".format(int(list(df["algo.min_steps_learn"])[0])),
                     "--tag {}".format(list(df.tag)[0]),
                     "--augmentation {}".format(
                         list(df.augmentation)[0].replace("[", "").replace("]", "").replace('"', '').replace(",", " ")),
                     "--target-augmentation {}".format(int(list(df.target_augmentation)[0])),
                     "--dropout {}".format(list(df.dropout)[0]),
                     "--momentum-tau {}".format(list(df.momentum_tau)[0]),
                     "--shared-encoder {}".format(list(df.shared_encoder)[0]),
                     "--momentum-encoder {}".format(list(df.momentum_encoder)[0]),
                     "--model-mpr-weight {}".format(list(df.model_mpr_weight)[0]),
                     "--t0-mpr-loss-weight {}".format(list(df.t0_mpr_loss_weight)[0]),
                     "--mpr {}".format(int(list(df.mpr)[0])),
                     "--residual-tm {}".format(int(list(df.residual_tm)[0]))]
        command = "./rlpyt_submit.sh 5 1 rerun_{} --game {} --num-logs 1 --seed {} --wandb-dir {} "
        
        for game, seed in missing_pairs:
            string = command.format(i, game, seed, wandb_dir)
            i += 1
            print(string + " ".join(arguments))

In [565]:
def mean_pairwise_score(df1, df2, random_scores):
    s1 = (df1["GameScoreAverage"] - random_scores["GameScoreAverage"])/\
          (np.clip(df2["GameScoreAverage"] - random_scores["GameScoreAverage"], 
                   0.1*random_scores["GameScoreAverage"], None))
    s2 = (df2["GameScoreAverage"] - random_scores["GameScoreAverage"])/\
          (np.clip(df1["GameScoreAverage"] - random_scores["GameScoreAverage"], 
                   0.1*random_scores["GameScoreAverage"], None))
    score = (s1.mean() - s2.mean())
    return score

def median_pairwise_score(df1, df2, random_scores, quantile=0.5):
    s1 = (df1["GameScoreAverage"] - random_scores["GameScoreAverage"])/\
          (df2["GameScoreAverage"] - random_scores["GameScoreAverage"])
    s2 = (df2["GameScoreAverage"] - random_scores["GameScoreAverage"])/\
          (df1["GameScoreAverage"] - random_scores["GameScoreAverage"])
    return (np.quantile(s1 - s2, 0.5, interpolation="midpoint"))

def group_dfs(dfs):
    proc_dfs = []
    for df in dfs:
        if "seed" in df.keys():
            proc_dfs.append(df.groupby(["game", "seed"]).mean().groupby("game").mean())
        else:
            proc_dfs.append(df.groupby("game").mean())
            
    return proc_dfs

def compare_dfs(dfs, names, min_games=0, pairwise_top_n=-1, quantile=0.5, sort_key=np.median):
    if min_games > 0:
        games = Counter()
        games_count = [Counter(df.game) for df in dfs]
        for count in games_count:
            games = games + count
        mandatory_games = set([g[0] for g in games.most_common()[:min_games]])
        usable_dfs = []
        usable_names = []
        for df, name in zip(dfs, names):
            if set(df.game) >= mandatory_games:
                usable_dfs.append(df)
                usable_names.append(name)
        dfs = usable_dfs
        names = usable_names
    games = set(dfs[0].game)
    for df in dfs[1:]:
        games = games & set(df.game)
    
    games_dfs = group_dfs(dfs)
    
    games_dfs = [df[[g in games for g in df.index]] for df in games_dfs]
    for df in games_dfs:
        gsn = []
        for game, score in zip(df.index, df["GameScoreAverage"]):
            gsn.append((score - random_scores[game])/(human_scores[game] - random_scores[game]))

        df["GameScoreNormalized"] = np.array(gsn)
    
    for df in games_dfs:
        gsn = []
        for game, score in zip(df.index, df["GameScoreAverage"]):
            gsn.append((score - random_scores[game])/(nature_scores[game] - random_scores[game]))

        df["GameScoreNatureNormalized"] = np.array(gsn)
        
    for df in games_dfs:
        gsn = []
        for game, score in zip(df.index, df["GameScoreAverage"]):
            gsn.append((score - random_scores[game])/(der_scores[game] - random_scores[game]))

        df["GameScoreDERNormalized"] = np.array(gsn)
        
    scores = [-sort_key(df["GameScoreNormalized"]) for df in games_dfs]
    indices = np.argsort(scores)
    games_dfs = np.array(games_dfs)[indices]
    names = np.array(names)[indices]
    
    print()    
    print("Human Scores:")
    for df, name in zip(games_dfs, names):
        print("{0}: Median: {1:.3f}, Mean: {2:.3f}".format(name,
                                                           np.quantile(df["GameScoreNormalized"],
                                                                       quantile,
                                                                       interpolation="midpoint"),
                                                           df["GameScoreNormalized"].mean()))
    
    print()
    print("Nature Scores:")
    for df, name in zip(games_dfs, names):
        print("{0}: Median: {1:.3f}, Mean: {2:.3f}".format(name, 
                                                           np.quantile(df["GameScoreNatureNormalized"],
                                                                       quantile, 
                                                                       interpolation="midpoint"),
                                                           df["GameScoreNatureNormalized"].mean()))
        
    print()
    print("Comparison over {} games:".format(len(games)))
    if pairwise_top_n < 0:
        pairwise_top_n = len(dfs)
    pairwise_means = np.zeros((pairwise_top_n, pairwise_top_n))
    pairwise_medians = np.zeros((pairwise_top_n, pairwise_top_n))
    pairwise_games = np.zeros((pairwise_top_n, pairwise_top_n))
    game_random_scores = atari_random_scores[np.array([g in games for g in atari_random_scores.game])]
    for i in range(pairwise_top_n):
        for j in range(pairwise_top_n):
            if i == j:
                continue
            better_games = np.sum(games_dfs[i]["GameScoreAverage"] > games_dfs[j]["GameScoreAverage"])
            mean_comp_score = mean_pairwise_score(games_dfs[i], games_dfs[j], game_random_scores,)
            pairwise_means[i, j] = mean_comp_score
            median_comp_score = median_pairwise_score(games_dfs[i], games_dfs[j], game_random_scores, quantile=quantile)
            pairwise_medians[i, j] = median_comp_score
            pairwise_games[i, j] = better_games
            
    mean_condorcet_ranks = condorcet(pairwise_means, [df.GameScoreNormalized.mean() for df in games_dfs])
    median_condorcet_ranks = condorcet(pairwise_medians, 
                                       [np.quantile(df.GameScoreNormalized, quantile)
                                        for df in games_dfs])
    
    print("Mean Condorcet ranks:")
    for i, name in enumerate(names[mean_condorcet_ranks]):
        print("    ", i, name)
    print("Median Condorcet ranks:")
    for i, name in enumerate(names[median_condorcet_ranks]):
        print("    ", i, name)

    for i in range(pairwise_top_n):
        print()
        for j in range(pairwise_top_n):
            if i == j:
                continue
            print("{} above {}: {}, {:.3f}, {:.3f}".format(names[i], names[j],
                                                       pairwise_games[i, j], 
                                                       pairwise_medians[i, j],
                                                       pairwise_means[i, j]))

    return games_dfs, names


def condorcet(pairwise, tiebreaker):
    beats = pairwise > 0
    n_wins = np.sum(beats, -1)
    if max(n_wins) == len(pairwise) - 1:
        return np.argsort(-n_wins)
    else:
        print("Using tiebreaker: {}".format(tiebreaker))
        return np.argsort(-np.array(tiebreaker))
    

def sort_games(game_df, key="GameScoreNormalized"):
    indices = np.argsort(game_df[key])
    games_dfs[-1][key][indices]
    for i, game in enumerate(indices):
        i = i+1
        median = (i == len(game_df)//2 or i == len(game_df)//2+1)
        print("{} {} : {:.3f} {:.3f} {}".format(i, 
                                                game_df.index[game],
                                                game_df[key][game],
                                                game_df["GameScoreAverage"][game],
                                                "median" if median else ""))

In [203]:
def add_bootstrap(df, num_runs=5):
    games = set(df.game)
    games_dfs = [df]
    
    for game in games:
        game_df = df[df.game == game]
        
        indices = np.random.randint(0, len(game_df), (num_runs,))
        
        new_df = game_df.iloc[indices]
        games_dfs.append(new_df)
        
    return pandas.concat(games_dfs)

def bootstrap(df, num_runs=5):
    games = set(df.game)
    games_dfs = []
    
    for game in games:
        game_df = df[df.game == game]
        
        indices = np.random.randint(0, len(game_df), (num_runs,))
        
        new_df = game_df.iloc[indices]
        games_dfs.append(new_df)
        
    return pandas.concat(games_dfs)

def seed_bootstrap(df, num_runs=5):
    seed_dfs = []
    seeds_used = []
    
    seeds = list(set(df.seed))
    for i in range(num_runs):
        seed = seeds[np.random.randint(len(seeds))]
        seed_df = df[df.seed == seed]
        
        seed_dfs.append(seed_df)
        seeds_used.append(seed)

    return pandas.concat(seed_dfs)

def add_norm_scores(df):
    games = set(df.game)
    games_dfs = [df[df.game == game] for game in games]
    for game, games_df in zip(games, games_dfs):
        games_df["GameScoreNormalized"] = (np.array(games_df["GameScoreAverage"]) - random_scores[game])\
                                          /(human_scores[game] - random_scores[game])
        games_df["GameScoreNatureNormalized"] = (np.array(games_df["GameScoreAverage"]) - random_scores[game])\
                                          /(nature_scores[game] - random_scores[game])
        
    df = pandas.concat(games_dfs)
    return df
    
def bootstrap_distribution(df, samples=100, runs=5, bs_type="normal"):
    medians = []
    means = []
    seeds = []
    
    df = add_norm_scores(df)
    
    if bs_type == "add":
        bootstrap_fn = add_bootstrap
    elif bs_type == "seed":
        bootstrap_fn = seed_bootstrap
    else:
        bootstrap_fn = bootstrap
    
    for i in range(samples):
        b_df = bootstrap_fn(df, runs)
        b_df = b_df.groupby("game").mean()
        medians.append(b_df.GameScoreNatureNormalized.median())
        means.append(b_df.GameScoreNatureNormalized.mean())
                
    sns.distplot(medians)
    plt.title("Median Distribution")
    plt.ylabel("Count")
    plt.xlabel("Median HNS")
    plt.show()
    sns.distplot(means)
    plt.title("Mean Distribution")
    plt.ylabel("Count")
    plt.xlabel("Mean HNS")
    plt.show()
    
    return np.array(medians), np.array(means)

In [6]:
medians, means = bootstrap_distribution(byol_sweeps[2], samples=100, runs=10, bs_type="normal")
print(np.quantile(medians, [0.025, 0.25, 0.5, 0.75, 0.975]))
print(np.quantile(means, [0.025, 0.25, 0.5, 0.75, 0.975]))

NameError: name 'byol_sweeps' is not defined

In [582]:
byol_data = pandas.read_csv("retuning.csv")
# byol_data = byol_data[np.array([augmentation != '["none"]' for augmentation in byol_data.augmentation])]
byol_data = byol_data[np.array([tm != 1 for tm in byol_data.residual_tm])]
# byol_data = byol_data[np.array([ta == 0 for ta in byol_data.target_augmentation])]
# byol_data = byol_data[np.array([g == "jamesbond" for g in byol_data.game])]
# byol_data = byol_data[byol_data.tag != "release_search_fixed"]
# byol_data = byol_data[np.array([seed in (1,2,3,4,5) for seed in byol_data.seed])]
byol_sweeps, byol_names = split_on_fields(byol_data, 
                                          ["augmentation",
                                           "tag",
                                           "jumps",
                                           "target_update_tau",
                                           "target_update_interval",
                                           "t0_mpr_loss_weight",
                                           "model_mpr_weight",
                                           "noisy_nets_std",
                                           "momentum_tau",
                                           "eps_init",
                                           "mpr",
                                           "algo.min_steps_learn",
                                           "algo.eps_steps",
#                                            "residual_tm",
                                           "dropout",
                                          ],
                                         min_games=26,show_all_fields=True)

# produce_missing_pair_runs(byol_sweeps, games=set(byol_data.game), wandb_dir="\"./\"")
for sweep, name in zip(byol_sweeps, byol_names):
    print(name, len(sweep), len(sweep.groupby(["game", "seed"]).mean().reset_index()))

augme:["none"] tag:release_search_no_aug jumps:5 targe:1.0 targe:1 t0_mp:0 model:5 noisy:0.5 momen:0.01 eps_i:1 mpr:1 min_s:2000 eps_s:2001 dropo:0.5 260 260
augme:["none"] tag:release_search_no_aug jumps:0 targe:1.0 targe:1 t0_mp:0 model:5 noisy:0.5 momen:0.01 eps_i:1 mpr:0 min_s:2000 eps_s:2001 dropo:0.0 260 260
augme:["none"] tag:release_search_no_aug jumps:0 targe:1.0 targe:1 t0_mp:0 model:5 noisy:0.5 momen:0.01 eps_i:1 mpr:0 min_s:2000 eps_s:2001 dropo:0.5 260 260
augme:["shift","intensity"] tag:mpr_tau1_control jumps:5 targe:1.0 targe:1 t0_mp:0 model:5 noisy:0.5 momen:1.0 eps_i:1 mpr:1 min_s:2000 eps_s:2001 dropo:0.0 360 259
augme:["shift","intensity"] tag:mpr_shared_control jumps:5 targe:1.0 targe:1 t0_mp:0 model:5 noisy:0.5 momen:1.0 eps_i:1 mpr:1 min_s:2000 eps_s:2001 dropo:0.0 350 260
augme:["shift","intensity"] tag:release_search_fixed jumps:5 targe:1.0 targe:1 t0_mp:0 model:5 noisy:0.5 momen:0.01 eps_i:1 mpr:1 min_s:2000 eps_s:2001 dropo:0.0 269 260
augme:["shift","intensit

In [583]:
names =  [
          *byol_names,
          "der", 
          "curl",
          "drq",
          "simple",
          "random",
          "otrainbow",
          "sunrise",
             ]
dfs = [
       *byol_sweeps,
       atari_der_scores, 
       atari_curl_scores,
       atari_ilya_scores,
       atari_simple_scores,
       atari_random_scores,
       atari_otrainbow_scores,
       atari_sunrise_scores,
                         ]
games_dfs, names = compare_dfs(dfs, names, min_games=26, sort_key=np.median, pairwise_top_n=-1)


Human Scores:
augme:["shift","intensity"] tag:release_search_fixed jumps:5 targe:1.0 targe:1 t0_mp:0 model:5 noisy:0.5 momen:0.01 eps_i:1 mpr:1 min_s:2000 eps_s:2001 dropo:0.0: Median: 0.396, Mean: 0.622
augme:["shift","intensity"] tag:mpr_tau1_control jumps:5 targe:1.0 targe:1 t0_mp:0 model:5 noisy:0.5 momen:1.0 eps_i:1 mpr:1 min_s:2000 eps_s:2001 dropo:0.0: Median: 0.380, Mean: 0.612
augme:["shift","intensity"] tag:release_search_fixed jumps:0 targe:1.0 targe:1 t0_mp:0 model:5 noisy:0.5 momen:0.01 eps_i:1 mpr:0 min_s:2000 eps_s:2001 dropo:0.0: Median: 0.346, Mean: 0.480
augme:["none"] tag:release_search_no_aug jumps:5 targe:1.0 targe:1 t0_mp:0 model:5 noisy:0.5 momen:0.01 eps_i:1 mpr:1 min_s:2000 eps_s:2001 dropo:0.5: Median: 0.307, Mean: 0.463
sunrise: Median: 0.305, Mean: 0.445
augme:["shift","intensity"] tag:mpr_shared_control jumps:5 targe:1.0 targe:1 t0_mp:0 model:5 noisy:0.5 momen:1.0 eps_i:1 mpr:1 min_s:2000 eps_s:2001 dropo:0.0: Median: 0.278, Mean: 0.515
augme:["shift","int

In [584]:
for i in range(len(names)):
    print(names[i])
    indices = np.array(list(np.argsort(-games_dfs[i]["GameScoreNormalized"])))
    games_dfs[i]["gshn"] = games_dfs[i].GameScoreNormalized
    games_dfs[i]["gsnn"] = games_dfs[i].GameScoreNatureNormalized
    games_dfs[i]["gsa"] = games_dfs[i].GameScoreAverage
    print(games_dfs[i].iloc[indices][["gsa", "gshn", "gsnn"]])
    print()

augme:["shift","intensity"] tag:release_search_fixed jumps:5 targe:1.0 targe:1 t0_mp:0 model:5 noisy:0.5 momen:0.01 eps_i:1 mpr:1 min_s:2000 eps_s:2001 dropo:0.0
                       gsa      gshn      gsnn
game                                          
boxing              42.670  3.547500  0.484300
krull             3518.366  1.798938  0.281401
road_runner      11583.600  1.477258  0.292724
kangaroo          3796.600  1.255313  0.519578
jamesbond          372.300  1.253835  0.464233
breakout            25.525  0.827257  0.062077
crazy_climber    30818.800  0.799967  0.200418
freeway             23.357  0.789088  0.758344
up_n_down         8253.640  0.691790  0.816395
assault            558.810  0.647440  0.082900
kung_fu_master   13261.100  0.578464  0.503967
pong                -3.712  0.481246  0.422587
battle_zone      16296.000  0.400144  0.506028
bank_heist         304.420  0.392773  0.658394
frostbite         1049.660  0.230580  1.344523
demon_attack       498.360  0.190368  0

In [529]:
for i, n in enumerate(names):
    print(i, n)

0 augme:["none"] tag:release_search_no_aug jumps:5 targe:1.0 targe:1 noisy:0.5 eps_i:1 mpr:1 min_s:2000 eps_s:2001 resid:0.0 dropo:0.5
1 sunrise
2 drq
3 otrainbow
4 curl
5 der
6 simple
7 random


In [530]:
simple = games_dfs[list(names).index("simple")]
random = games_dfs[list(names).index("random")]
human = games_dfs[list(names).index("human")]
der = games_dfs[list(names).index("der")]
otrainbow = games_dfs[list(names).index("otrainbow")]
sunrise = games_dfs[list(names).index("sunrise")]
curl = games_dfs[list(names).index("curl")]
drq = games_dfs[list(names).index("drq")]
no_aug = games_dfs[list(names).index("tag: nd_1k_no_norm_naug_dist_byol_ql1_squared_nogs_m5")]
aug = games_dfs[list(names).index("tag: dist_byol_ql1_squared_nogs_m5_norm_ql1_va")]


table_names = ["Random", "Human", "SimPLe", "DER", "OTRainbow", "CuRL", "DrQ", "Sunrise", "MPM", "MPM+Aug"]
table_dfs = [random, 
             human, 
             simple,
             der, 
             otrainbow,
             curl,
             drq,
             sunrise,
             no_aug,
             aug]
results_df = pandas.DataFrame({name:df.GameScoreAverage for df, name in zip(table_dfs, table_names)},
                              index=games_dfs[0].index)
for i in range(len(results_df)):
    results_df.iloc[i] = ["{:0.1f}".format(v) for v in results_df.iloc[i]]

results_df.loc["Median Human-Norm'd"] = ["{:0.3f}".format(float(df.GameScoreNormalized.median())) for df in table_dfs]
results_df.loc["Mean Human-Norm'd"] = ["{:0.3f}".format(float(df.GameScoreNormalized.mean())) for df in table_dfs]
results_df.loc["Median DQN-Norma'd"] = ["{:0.3f}".format(float(df.GameScoreNatureNormalized.median())) for df in table_dfs]
results_df.loc["Mean DQN-Norm'd"] = ["{:0.3f}".format(float(df.GameScoreNatureNormalized.mean())) for df in table_dfs]
results_df.loc["# Superhuman"] = ["{}".format(int((df.GameScoreNormalized >= 1.).sum())) for df in table_dfs]

results_df.index = [g.replace("_", " ").title() for g in results_df.index]
print(results_df.to_latex(float_format="%.2f"))

ValueError: 'human' is not in list

In [531]:
simple = games_dfs[list(names).index("simple")]
random = games_dfs[list(names).index("random")]
human = games_dfs[list(names).index("human")]
der = games_dfs[list(names).index("drq")]
curl = games_dfs[list(names).index("curl")]
drq = games_dfs[list(names).index("drq")]


table_names = ["Random", "Human", "SimPLe", "DER", "CuRL", "DrQ", "MPM", "MPM+Aug"]
table_dfs = [random, 
             human, 
             simple,
             der, 
             curl,
             drq,
             games_dfs[8],
             games_dfs[1]]
results_df = pandas.DataFrame({name:df.GameScoreAverage for df, name in zip(table_dfs, table_names)},
                              index=games_dfs[0].index)
for i in range(len(results_df)):
    results_df.iloc[i] = ["{:0.1f}".format(v) for v in results_df.iloc[i]]

results_df.loc["Median Human-Normalized"] = ["{:0.3f}".format(float(df.GameScoreNormalized.median())) for df in table_dfs]
results_df.loc["Mean Human-Normalized"] = ["{:0.3f}".format(float(df.GameScoreNormalized.mean())) for df in table_dfs]
results_df.loc["Median DQN-Normalized"] = ["{:0.3f}".format(float(df.GameScoreNatureNormalized.median())) for df in table_dfs]
results_df.loc["Mean DQN-Normalized"] = ["{:0.3f}".format(float(df.GameScoreNatureNormalized.mean())) for df in table_dfs]

results_df.index = [g.replace("_", " ").title() for g in results_df.index]
print(results_df.to_latex(float_format="%.2f"))

ValueError: 'human' is not in list

In [None]:
corrs = []
import seaborn as sns

from scipy import stats
import matplotlib.pyplot as plt
def r2(x, y):
    return stats.pearsonr(x, y)[0]

for game in games_dfs[0].index:

    game_scores = [df.loc[game]["GameScoreAverage"] for df in games_dfs[2:]]
    hn_medians = [df["GameScoreNormalized"].median() for df in games_dfs[2:]]

    corr = r2(game_scores, hn_medians)
    corrs.append(corr)
    
#     sns.jointplot(game_scores, hn_medians, kind="reg", stat_func=r2)
#     plt.xlabel("Human-normalized score on {}".format(game))
#     plt.ylabel("Median human-normalized score on Atari26")
#     plt.show()
corrs = np.array(corrs)
print(np.sum(corrs**2 > 0.1))

indices = np.argsort(-corrs)
for i in indices:
    print(games_dfs[0].index[i], corrs[i])

In [None]:
produce_missing_pair_runs([df for df in byol_sweeps if len(df) > 0], set(byol_data.game))

In [580]:
byol_data = pandas.read_csv("retuning.csv")
byol_data = byol_data[np.array([str(game) in ("pong", 
                                         "kangaroo",
                                         "breakout", 
                                         "bank_heist",
                                         "crazy_climber", 
                                         "boxing", 
                                         "frostbite",
                                         "up_n_down",
                                         "battle_zone",
                                         "assault") for game in byol_data.game])]
# byol_data = byol_data[np.array([augmentation == '["none"]' for augmentation in byol_data.augmentation])]
byol_data = byol_data[np.array([tm != 1 for tm in byol_data.residual_tm])]
byol_sweeps, byol_names = split_on_fields(byol_data, 
                                          ["tag",
                                           "augmentation",
                                           "jumps",
                                           "target_update_tau",
                                           "target_update_interval",
                                           "noisy_nets_std",
                                           "eps_init",
                                           "algo.min_steps_learn",
                                           "algo.eps_steps",
                                           "residual_tm",
                                           "dropout",
                                           "mpr",
                                           "momentum_tau",
                                           "momentum_encoder",
                                          ],
                                          0,
                                         min_games=10,show_all_fields=False)


for sweep, name in zip(byol_sweeps, byol_names):
    print(name, len(sweep))
# find_missing_pairs(byol_sweeps, byol_names)

tag:mpr_tau1_control 144
tag:mpr_shared_control 135
tag:release_search_fixed jumps:5 targe:1.0 noisy:0.1 eps_i:0 min_s:500 100
tag:release_search_fixed jumps:5 targe:1.0 noisy:0.1 eps_i:0 min_s:2000 300
tag:release_search_fixed jumps:5 targe:1.0 noisy:0.1 eps_i:1 min_s:500 100
tag:release_search_fixed jumps:5 targe:1.0 noisy:0.1 eps_i:1 min_s:2000 eps_s:2001 113
tag:release_search_fixed jumps:5 targe:1.0 noisy:0.1 eps_i:1 min_s:2000 eps_s:5000 100
tag:release_search_fixed jumps:5 targe:1.0 noisy:0.3 min_s:2000 100
tag:release_search_fixed jumps:5 targe:1.0 noisy:1.0 eps_i:0 min_s:500 100
tag:release_search_fixed jumps:5 targe:1.0 noisy:1.0 eps_i:0 min_s:2000 100
tag:release_search_fixed jumps:5 targe:1.0 noisy:1.0 eps_i:1 min_s:500 100
tag:release_search_fixed jumps:5 targe:1.0 noisy:1.0 eps_i:1 min_s:2000 125
tag:release_search_fixed jumps:5 targe:1.0 noisy:0.5 eps_i:0 min_s:500 100
tag:release_search_fixed jumps:5 targe:1.0 noisy:0.5 eps_i:0 min_s:2000 100
tag:release_search_fixed ju

In [581]:
sweeps = byol_sweeps + \
        [
       atari_ilya_scores,
       atari_sunrise_scores,
       atari_curl_scores, atari_der_scores]
names =  byol_names + [
          "drq", "sunrise", "curl", "der"]

games_dfs, names = compare_dfs(sweeps,
                               names, 
                               min_games=10, 
                               pairwise_top_n=-1,
                               sort_key=lambda x: np.mean(sorted(x)[1:3]),
                               quantile=0.15)


Human Scores:
tag:release_search_fixed jumps:1 targe:0.01 noisy:0.5: Median: 0.409, Mean: 0.740
tag:release_search_fixed jumps:5 targe:1.0 noisy:0.1 eps_i:0 min_s:2000: Median: 0.402, Mean: 0.762
tag:release_search_fixed jumps:5 targe:1.0 noisy:0.5 eps_i:1 min_s:2000 eps_s:2001: Median: 0.396, Mean: 0.927
tag:release_search_fixed jumps:5 targe:1.0 noisy:0.5 eps_i:1 min_s:2000 eps_s:5000: Median: 0.381, Mean: 1.009
tag:mpr_tau1_control: Median: 0.380, Mean: 0.855
tag:release_search_fixed jumps:5 targe:1.0 noisy:0.5 eps_i:1 min_s:100: Median: 0.376, Mean: 0.761
tag:release_search_fixed jumps:5 targe:0.01 noisy:0.5: Median: 0.373, Mean: 0.919
tag:release_search_fixed jumps:5 targe:1.0 noisy:0.5 eps_i:1 min_s:500: Median: 0.373, Mean: 0.860
tag:release_search_fixed jumps:0 mpr:0: Median: 0.346, Mean: 0.603
tag:release_search_fixed jumps:5 targe:0.01 noisy:0.1: Median: 0.344, Mean: 0.755
tag:release_search_fixed jumps:5 targe:1.0 noisy:0.1 eps_i:0 min_s:500: Median: 0.340, Mean: 0.766
tag:

In [431]:
games_dfs = [s.groupby(["game", "seed"]).mean().groupby("game").mean() for s in byol_sweeps]

In [432]:
indices = np.argsort([-np.mean(sorted(df.GameScoreNormalized)[1:3]) for df in games_dfs])

In [433]:
for i in indices:
    print(byol_names[i], len(byol_sweeps[i]))
    indices = np.array(list(np.argsort(-games_dfs[i]["GameScoreNormalized"])))
    games_dfs[i]["gsn"] = games_dfs[i].GameScoreNormalized
    games_dfs[i]["gsnn"] = games_dfs[i].GameScoreNatureNormalized
    games_dfs[i]["gsa"] = games_dfs[i].GameScoreAverage
    print(games_dfs[i].iloc[indices][["gsa", "gsn", "gsnn"]])
    print()

jumps:1 targe:0.01 noisy:0.5 183
                     gsa       gsn      gsnn
game                                        
boxing            27.761  2.305083  0.385788
crazy_climber  40980.100  1.205626  0.292285
kangaroo        2481.800  0.814549  0.363307
assault          501.686  0.537502  0.089041
up_n_down       6256.490  0.512831  0.722375
breakout          16.102  0.500069  0.036050
pong              -3.803  0.478669  0.426692
battle_zone    16713.000  0.412117  0.599541
bank_heist       314.080  0.405847  0.721733
frostbite       1023.260  0.224396  3.641429

jumps:5 targe:1.0 noisy:0.1 eps_i:0 min_s:2000 300
                     gsa       gsn      gsnn
game                                        
boxing            31.069  2.580750  0.431925
up_n_down       9813.280  0.831545  1.171317
crazy_climber  29379.800  0.742520  0.180012
breakout          19.672  0.624028  0.044986
pong               1.211  0.620708  0.553308
kangaroo        1821.700  0.593262  0.264608
assault        

In [558]:
import sklearn
def make_variables(df, fields=[("target_update_interval", "target_update_tau"), 
                               "jumps", 
                               "target_augmentation",
                               "noisy_nets_std",
                               "eps_init", 
                               "algo.eps_steps",
                               "algo.min_steps_learn",
                               "residual_tm",
                               "momentum_tau",
                               "momentum_encoder",
                               "mpr",
                               "dropout",
                               ("game", "seed")],
                                drop=[0]):
    var_names = []
    variables = []
    
    for field in fields:
        if isinstance(field, tuple):
            new_name = "_".join(field)
            data = [list(map(str, df[f])) for f in field]
            df[new_name] = ["_".join(entry) for entry in zip(*data)]
            field = new_name
        var = np.array(list(map(str, df[field]))).reshape(-1, 1)
        var, names = to_one_hot(var, drop=drop)
        variables.append(var)
        names = [field + "_" + n for n in names]
        var_names.append(names)
        
    var_names.append(["const"])
    variables.append(np.ones((variables[0].shape[0], 1)))
    names = [name.replace("_x0", "") for name_list in var_names for name in name_list]
    variables = np.concatenate(variables, axis=1)
    variables = pandas.DataFrame(variables, columns=names)
    
    return variables   

def to_one_hot(data, drop=[0]):
    data = list(map(str, data))
    data = [val.replace("nan", "0.0") for val in data]
    values = set(data)
    value_dict = {v:i for i, v in enumerate(list(values))}
    
    np_values = np.zeros((len(data), len(values)))
    
    indices = [value_dict[v] for v in data]
    np_values[np.arange(len(data)), indices] = 1.
    
    for ind in drop:
        np_values[:, ind] = 0
        
    names = list(value_dict.keys())
    names = [n.replace("['", "").replace("']", "") for n in names]
        
    return np_values, names


def add_norm_scores(df, scores, name):
    games = set(df.game)
    games_dfs = [df[df.game == game] for game in games]
    for game, games_df in zip(games, games_dfs):
        games_df[name] = (np.array(games_df["GameScoreAverage"]) - random_scores[game])\
                                          /(scores[game] - random_scores[game])
        
    df = pandas.concat(games_dfs)
    return df


In [560]:
# for game in set(byol_data.game):

#     df = byol_data[byol_data.game == game]
df = byol_data#[byol_data.game == "battle_zone"]
dfs = [df.groupby(["game", "seed"]).mean().reset_index(drop=False) for df in byol_sweeps]
df = pandas.concat(dfs)
df.noisy_nets_std = np.around(df.noisy_nets_std, 3)
# df.noisy_nets_std = np.around(df.target_update_tau, 3)
# df.noisy_nets_std = np.around(df.target_update_interval, 3)

df = add_norm_scores(df, dict(df.groupby("game").mean().GameScoreAverage), "GameScoreSelfNormalized")

    
name = "All Sweeps"

# sns.jointplot(df.ModelNCELoss,
#               df.GameScoreDERNormalized,
#               kind="reg", stat_func=r2)
import statsmodels.api as sm

for drop in ([0], [-1]):
    variables = make_variables(df, drop=drop)
    targets = df.GameScoreNormalized
    targets = targets - targets.mean()
    targets.index = variables.index

    regression = sm.GLSAR(targets, variables, hasconst=True, family=sm.families.Gaussian())
    results = regression.fit()#_regularized(alpha=0.001, refit=True)
    #     regression.exog_names = names
    #     regression.fit(variables, targets)

    print("Regression Coefficients:")
    print("\n".join([line for line in str(results.summary()).split("\n") if "game" not in line]))
    print()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Regression Coefficients:
                            GLSAR Regression Results                           
Dep. Variable:     GameScoreNormalized   R-squared:                       0.468
Model:                           GLSAR   Adj. R-squared:                  0.448
Method:                  Least Squares   F-statistic:                     22.98
Date:                 Fri, 31 Jul 2020   Prob (F-statistic):               0.00
Time:                         11:06:47   Log-Likelihood:                -3214.0
No. Observations:                 3148   AIC:                             6662.
Df Residuals:                     3031   BIC:                             7370.
Df Model:                          116                                         
Covariance Type:             nonrobust                                         
                                                          coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------

  return self.params / self.bse
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


In [335]:
df.groupby("game").mean().GameScoreSelfNormalized

game
assault          1.0
bank_heist       1.0
battle_zone      1.0
boxing           1.0
breakout         1.0
crazy_climber    1.0
frostbite        1.0
kangaroo         1.0
pong             1.0
up_n_down        1.0
Name: GameScoreSelfNormalized, dtype: float64

In [None]:
byol_sweeps[6].seed

In [492]:
print(" ".join(list(map(str, range(10231160, 10231191)))))
    

10231160 10231161 10231162 10231163 10231164 10231165 10231166 10231167 10231168 10231169 10231170 10231171 10231172 10231173 10231174 10231175 10231176 10231177 10231178 10231179 10231180 10231181 10231182 10231183 10231184 10231185 10231186 10231187 10231188 10231189 10231190
