In [115]:
import pandas as pd
import plotly.express as px

from pathlib import Path
from sklearn.preprocessing import normalize
from scipy.stats import zscore

DATA_PATH = "/home/sequenzia/dev/repos/atari-rl/data/evals/"

all_infer_data = {}

for file_path in Path(DATA_PATH).glob("**/*.parquet"):

    # print(f"Reading {file_path}")

    data = pd.read_parquet(file_path)

    if data.shape[0] > 0:

        run_key = data["run_key"].values[0]

        all_infer_data[run_key] = data


# ---- stats ----

all_algos = []
all_games = []
all_run_keys = []

all_data_list = []

for key, data in all_infer_data.items():

    algo = data['algo'].values[0].upper()
    game = data['game'].values[0]
    run_key = data['run_key'].values[0]

    all_algos.append(algo)
    all_games.append(game)
    all_run_keys.append(run_key)

    scores = data['scores'].values.tolist()
    lengths = data['lengths'].values.tolist()
    frame_nums = data['run_frame_numbers'].values.tolist()

    for idx in range(len(scores)):
        score = scores[idx]
        length = lengths[idx]
        frame_num = frame_nums[idx]

        all_data_list.append([key, algo, game, length, score, frame_num])

all_data = pd.DataFrame(all_data_list, columns=["run_key", 
                                                "algo", 
                                                "game", 
                                                "length",
                                                "score",
                                                'frame_num'])

all_data['score_norm'] = normalize(all_data['score'].values.reshape(-1, 1), axis=0, norm='l1') * 100

In [129]:
plots_shape = (800, 600)
plots_colors = px.colors.qualitative.Dark24

algos = all_data['algo'].unique().tolist()
games = all_data['game'].unique().tolist()

mean_scores = pd.DataFrame()
mean_scores_norm = pd.DataFrame()
mean_lengths = pd.DataFrame()

n_episodes = pd.DataFrame()
game_data = {}

game_algo_data = {}

stats_data = pd.DataFrame()

for game in games:

    mean_scores[game] = all_data[all_data['game'] == game].groupby('algo')['score'].mean()
    mean_scores_norm[game] = all_data[all_data['game'] == game].groupby('algo')['score_norm'].mean()
    mean_lengths[game] = all_data[all_data['game'] == game].groupby('algo')['length'].mean()

    n_episodes[game] = all_data[all_data['game'] == game].groupby('algo')['score'].count()
    game_data[game] = all_data[all_data['game'] == game][['frame_num','game','algo','score','length']]

    game_algo_data[game] = {}

    for algo in algos:

        data = all_data[(all_data['game'] == game) & (all_data['algo'] == algo)][['frame_num','game','algo','score','length']]
        data['score_zscore'] = zscore(data['score'])
        data['length_zscore'] = zscore(data['length'])

        stats_data = pd.concat([stats_data, data])



In [130]:
stats_data

Unnamed: 0,frame_num,game,algo,score,length,score_zscore,length_zscore
0,22322.0,Pong,PPO,1.0,5581.0,0.079714,-0.175406
1,41504.0,Pong,PPO,11.0,4796.0,1.474701,-1.603338
2,67280.0,Pong,PPO,-3.0,6444.0,-0.478281,1.394411
3,87323.0,Pong,PPO,-11.0,5011.0,-1.594271,-1.212248
4,111097.0,Pong,PPO,3.0,5944.0,0.358711,0.484899
...,...,...,...,...,...,...,...
760,125331.0,Seaquest,A2C,1680.0,2271.0,0.304584,0.242536
761,134413.0,Seaquest,A2C,1680.0,2271.0,0.304584,0.242536
762,143494.0,Seaquest,A2C,1680.0,2271.0,0.304584,0.242536
763,152575.0,Seaquest,A2C,1680.0,2271.0,0.304584,0.242536


In [68]:
df = mean_scores_norm
title = "Evaluation: Average Episode Score (Normalized)"
labels = {'variable':'Game', 'algo':'Algo', 'value':'Average Episode Score'}

fig = px.bar(df, 
             x=df.index,
             y=df.columns,
             barmode="group",
             width=plots_shape[0],
             height=plots_shape[1],
             color_discrete_sequence=plots_colors,
             labels=labels,
             title=title)
fig.show()

In [137]:
df = stats_data[stats_data['game'] == 'Pong']

fig = px.line(df, x='frame_num', y='score_zscore', color='algo', width=plots_shape[0], height=plots_shape[1])
fig.show()

In [51]:
df = mean_lengths
title = "Evaluation: Average Episode Length"
labels = {'variable':'Game', 'algo':'Algo', 'value':'Episode Length'}

fig = px.bar(df, 
             y=df.index,
             x=df.columns,
             barmode="relative",
             width=plots_shape[0],
             height=plots_shape[1],
             color_discrete_sequence=plots_colors,
             labels=labels,
             title=title)
fig.show()

In [60]:
df = n_episodes
title = "Evaluation: Number of Episodes Played"
labels = {'variable':'Game', 'algo':'Algo', 'value':'Number of Episodes'}

fig = px.bar(df, 
             x=df.index,
             y=df.columns,
             barmode="group",
             width=plots_shape[0],
             height=plots_shape[1],
             color_discrete_sequence=plots_colors,
             labels=labels,
             title=title)
fig.show()