In [197]:
import pandas as pd
import plotly.express as px

from pathlib import Path
from sklearn.preprocessing import normalize
from scipy.stats import zscore

DATA_PATH = "/home/sequenzia/dev/repos/atari-rl/data/evals/"

all_infer_data = {}

for file_path in Path(DATA_PATH).glob("**/*.parquet"):

    # print(f"Reading {file_path}")

    data = pd.read_parquet(file_path)

    if data.shape[0] > 0:

        run_key = data["run_key"].values[0]

        all_infer_data[run_key] = data


# ---- stats ----

all_algos = []
all_games = []
all_run_keys = []

all_data_list = []

for key, data in all_infer_data.items():

    algo = data['algo'].values[0].upper()
    game = data['game'].values[0]
    run_key = data['run_key'].values[0]

    all_algos.append(algo)
    all_games.append(game)
    all_run_keys.append(run_key)

    scores = data['scores'].values.tolist()
    lengths = data['lengths'].values.tolist()

    for idx in range(len(scores)):
        score = scores[idx]
        length = lengths[idx]

        all_data_list.append([key, algo, game, length, score])

all_data = pd.DataFrame(all_data_list, columns=["run_key", 
                                                "algo", 
                                                "game", 
                                                "length",
                                                "score",])

# all_data['score_norm'] = normalize(all_data['score'].values.reshape(-1, 1), axis=0, norm='l2') * 1
# all_data['score_norm'] = normalize(all_data['score'].values.reshape(-1, 1),
#                                    axis=1, 
#                                    norm='l2') * 1

all_data['length_zscore'] = zscore(all_data['length'])
all_data['score_zscore'] = zscore(all_data['score'])

# # --- by algo --- #

# data_by_algo = all_data[['length',
#                          'length_zscore',
#                          'score',
#                          'score_zscore',
#                          'score_norm',
#                          'score_norm_zscore',
#                          'algo']].groupby(['algo']).agg(['count', 'mean', 'std', 'min', 'max', 'median'])

# # --- by game --- #

# data_by_game = all_data[['length',
#                          'length_zscore',
#                          'score',
#                          'score_zscore',
#                          'score_norm',
#                          'score_norm_zscore',
#                          'game']].groupby(['game']).agg(['count', 'mean', 'std', 'min', 'max', 'median'])

# # --- by algo game --- #

games_data = all_data[['score',
                        'length',
                        'algo',
                        'game']].groupby(['algo', 'game']).agg(['mean', 'std', 'min', 'max', 'median','sum','count'])

games_data.columns = ['score_mean', 'score_std', 'score_min', 'score_max', 'score_median', 'score_sum', 'count_score',
                      'length_mean', 'length_std', 'length_min', 'length_max', 'length_median', 'length_sum', 'count']

games_data = games_data[['score_mean', 'score_std', 'score_min', 'score_max', 'score_median', 'score_sum',
                        'length_mean', 'length_std', 'length_min', 'length_max', 'length_median', 'length_sum', 'count']]

games_data.reset_index(level=1, inplace=True)


In [198]:
games_data

Unnamed: 0_level_0,game,score_mean,score_std,score_min,score_max,score_median,score_sum,length_mean,length_std,length_min,length_max,length_median,length_sum,count
algo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
A2C,Assault,2504.230769,1708.867713,693.0,6256.0,2642.0,32555.0,2940.538462,1870.102966,1027.0,7058.0,3235.0,38227.0,13
A2C,Asterix,10486.111111,8457.920158,1050.0,32900.0,6750.0,188750.0,2223.055556,997.980695,533.0,4652.0,2009.0,40015.0,18
A2C,Asteroids,1677.5,506.83755,830.0,2550.0,1650.0,73810.0,959.590909,260.709258,462.0,1603.0,984.0,42222.0,44
A2C,Breakout,51.142857,41.01875,14.0,272.0,43.0,1790.0,1198.942857,231.817365,639.0,1655.0,1174.0,41963.0,35
A2C,Centipede,33094.0,20441.60346,6827.0,71429.0,37231.0,297846.0,4310.444444,2524.006196,1090.0,9235.0,4380.0,38794.0,9
A2C,MsPacman,4122.272727,1236.311456,2130.0,6110.0,4510.0,90690.0,1763.636364,340.347938,1143.0,2457.0,1875.0,38800.0,22
A2C,Pong,-1.0,5.802298,-10.0,7.0,-3.0,-7.0,5535.571429,700.141856,4463.0,6536.0,5765.0,38749.0,7
A2C,Qbert,26895.833333,2016.433433,24100.0,29325.0,27962.5,322750.0,3377.166667,400.928884,2896.0,4040.0,3257.5,40526.0,12
A2C,Seaquest,1652.222222,93.843145,1280.0,1700.0,1680.0,29740.0,2245.833333,106.773124,1818.0,2271.0,2271.0,40425.0,18
A2C,SpaceInvaders,1018.548387,432.142325,515.0,1855.0,970.0,31575.0,1278.516129,449.651633,704.0,2124.0,1126.0,39634.0,31


In [203]:
algos = all_data['algo'].unique().tolist()
games = all_data['game'].unique().tolist()

all_games = pd.DataFrame()

for game in games:

    all_games[game] = games_data[games_data['game'] == game]['score_mean']

In [204]:
all_games

Unnamed: 0_level_0,Pong,SpaceInvaders,Asterix,Breakout,Centipede,Asteroids,MsPacman,Assault,Qbert,Seaquest
algo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
A2C,-1.0,1018.548387,10486.111111,51.142857,33094.0,1677.5,4122.272727,2504.230769,26895.833333,1652.222222
DQN,10.909091,465.0,4439.0625,30.395349,4170.238095,560.0,2824.358974,4689.75,13105.357143,1841.176471
PPO,0.428571,1064.53125,3645.0,35.804878,11692.125,1766.595745,4382.083333,4257.0,25831.818182,3180.0


In [88]:
plots_shape = (800, 600)
plots_colors = px.colors.qualitative.Dark24

algos = all_data['algo'].unique().tolist()
games = all_data['game'].unique().tolist()

game_mean_lenths = pd.DataFrame()
game_mean_scores = pd.DataFrame()
game_mean_scores_norm = pd.DataFrame()

game_std_lenths = pd.DataFrame()
game_std_scores = pd.DataFrame()
game_std_scores_norm = pd.DataFrame()

game_min_lenths = pd.DataFrame()
game_min_scores = pd.DataFrame() 
game_min_scores_norm = pd.DataFrame()

game_max_lenths = pd.DataFrame()
game_max_scores = pd.DataFrame()
game_max_scores_norm = pd.DataFrame()

game_zscore_lenths = pd.DataFrame()
game_zscore_scores = pd.DataFrame()
game_zscore_scores_norm = pd.DataFrame()

algo_mean_lenths = pd.DataFrame()
algo_mean_scores = pd.DataFrame()
algo_mean_scores_norm = pd.DataFrame()

algo_std_lenths = pd.DataFrame()
algo_std_scores = pd.DataFrame()
algo_std_scores_norm = pd.DataFrame()

algo_min_lenths = pd.DataFrame()
algo_min_scores = pd.DataFrame() 
algo_min_scores_norm = pd.DataFrame()

algo_max_lenths = pd.DataFrame()
algo_max_scores = pd.DataFrame()
algo_max_scores_norm = pd.DataFrame()

algo_zscore_lenths = pd.DataFrame()
algo_zscore_scores = pd.DataFrame()
algo_zscore_scores_norm = pd.DataFrame()


for game in games:

    game_mean_scores[game] = all_data[all_data['game'] == game].groupby('algo')["score"].mean()

    # game_mean_lenths[game] = all_data[all_data['game'] == game].groupby('algo')["length"].mean()
    # # game_mean_scores_norm[game] = all_data[all_data['game'] == game].groupby('algo')["score_norm"].mean()

    # game_std_lenths[game] = all_data[all_data['game'] == game].groupby('algo')["length"].std()
    # game_std_scores[game] = all_data[all_data['game'] == game].groupby('algo')["score"].std()
    # # game_std_scores_norm[game] = all_data[all_data['game'] == game].groupby('algo')["score_norm"].std()

    # game_min_lenths[game] = all_data[all_data['game'] == game].groupby('algo')["length"].min()
    # game_min_scores[game] = all_data[all_data['game'] == game].groupby('algo')["score"].min()
    # # game_min_scores_norm[game] = all_data[all_data['game'] == game].groupby('algo')["score_norm"].min()

    # game_max_lenths[game] = all_data[all_data['game'] == game].groupby('algo')["length"].max()
    # game_max_scores[game] = all_data[all_data['game'] == game].groupby('algo')["score"].max()
    # game_max_scores_norm[game] = all_data[all_data['game'] == game].groupby('algo')["score_norm"].max()

    # game_zscore_lenths[game] = all_data[all_data['game'] == game].groupby('algo')["length_zscore"].mean()
    # game_zscore_scores[game] = all_data[all_data['game'] == game].groupby('algo')["score_zscore"].mean()
    # game_zscore_scores_norm[game] = all_data[all_data['game'] == game].groupby('algo')["score_norm_zscore"].mean()


# for algo in algos:

#     algo_mean_scores[algo] = all_data[all_data['algo'] == algo].groupby('game')["score"].mean()

#     algo_mean_lenths[algo] = all_data[all_data['algo'] == algo].groupby('game')["length"].mean()
#     algo_mean_scores[algo] = all_data[all_data['algo'] == algo].groupby('game')["score"].mean()
#     algo_mean_scores_norm[algo] = all_data[all_data['algo'] == algo].groupby('game')["score_norm"].mean()

#     algo_std_lenths[algo] = all_data[all_data['algo'] == algo].groupby('game')["length"].std()
#     algo_std_scores[algo] = all_data[all_data['algo'] == algo].groupby('game')["score"].std()
#     algo_std_scores_norm[algo] = all_data[all_data['algo'] == algo].groupby('game')["score_norm"].std()

#     algo_min_lenths[algo] = all_data[all_data['algo'] == algo].groupby('game')["length"].min()
#     algo_min_scores[algo] = all_data[all_data['algo'] == algo].groupby('game')["score"].min()
#     algo_min_scores_norm[algo] = all_data[all_data['algo'] == algo].groupby('game')["score_norm"].min()

#     algo_max_lenths[algo] = all_data[all_data['algo'] == algo].groupby('game')["length"].max()
#     algo_max_scores[algo] = all_data[all_data['algo'] == algo].groupby('game')["score"].max()
#     algo_max_scores_norm[algo] = all_data[all_data['algo'] == algo].groupby('game')["score_norm"].max()

#     algo_zscore_lenths[algo] = all_data[all_data['algo'] == algo].groupby('game')["length_zscore"].mean()
#     algo_zscore_scores[algo] = all_data[all_data['algo'] == algo].groupby('game')["score_zscore"].mean()
#     algo_zscore_scores_norm[algo] = all_data[all_data['algo'] == algo].groupby('game')["score_norm_zscore"].mean()
    

In [112]:
algo_mean_scores = game_mean_scores.T

algo_mean_scores['A2C_NORM'] = normalize(algo_mean_scores['A2C'].values.reshape(-1, 1), axis=0, norm='l2')
algo_mean_scores['PPO_NORM'] = normalize(algo_mean_scores['PPO'].values.reshape(-1, 1), axis=0, norm='l2')
algo_mean_scores['DQN_NORM'] = normalize(algo_mean_scores['DQN'].values.reshape(-1, 1), axis=0, norm='l2')




In [116]:
game_mean_scores

Unnamed: 0_level_0,Pong,SpaceInvaders,Asterix,Breakout,Centipede,Asteroids,MsPacman,Assault,Qbert,Seaquest
algo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
A2C,-1.0,1018.548387,10486.111111,51.142857,33094.0,1677.5,4122.272727,2504.230769,26895.833333,1652.222222
DQN,10.909091,465.0,4439.0625,30.395349,4170.238095,560.0,2824.358974,4689.75,13105.357143,1841.176471
PPO,0.428571,1064.53125,3645.0,35.804878,11692.125,1766.595745,4382.083333,4257.0,25831.818182,3180.0


In [205]:
df = all_games
title = "Evaluation: Mean Episode Score (Normalized) by Algo"
labels = {'variable':'Algo', 'game':'Game', 'value':'Mean Episode Score'}

fig = px.bar(df, 
             x=df.index,
             y=df.columns,
             barmode="group",
             width=plots_shape[0],
             height=plots_shape[1],
             color_discrete_sequence=plots_colors,
             labels=labels,
             title=title)
fig.show()

In [12]:
df

Unnamed: 0_level_0,PPO,DQN,A2C
game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Assault,4257.0,4689.75,2504.230769
Asterix,3645.0,4439.0625,10486.111111
Asteroids,1766.595745,560.0,1677.5
Breakout,35.804878,30.395349,51.142857
Centipede,11692.125,4170.238095,33094.0
MsPacman,4382.083333,2824.358974,4122.272727
Pong,0.428571,10.909091,-1.0
Qbert,25831.818182,13105.357143,26895.833333
Seaquest,3180.0,1841.176471,1652.222222
SpaceInvaders,1064.53125,465.0,1018.548387
