In [16]:
import pandas as pd
import plotly.express as px

from pathlib import Path
from sklearn.preprocessing import normalize

DATA_DIR = "/home/sequenzia/dev/solen-rl-project/data"

all_infer_data = {}

for file_path in  Path(DATA_DIR).glob("*.parquet"):
    
    run_key = file_path.stem

    all_infer_data[run_key] = pd.read_parquet(file_path)

In [19]:
all_scores_list = []

for key, data in all_infer_data.items():
    
    for score in data['scores'].values.tolist():

        key_split = key.split("_")
        algo = key_split[0]
        rom = key_split[1]

        all_scores_list.append([key, algo, rom, score])

all_scores = pd.DataFrame(all_scores_list, columns=["run_key","algo","rom","score"])

all_scores['score_norm'] = normalize(all_scores['score'].values.reshape(-1,1), axis=0, norm='l2') * 1000

breakout_scores = all_scores[all_scores['rom'] == 'BREAKOUT']
pong_scores = all_scores[all_scores['rom'] == 'PONG']
spaceinvaders_scores = all_scores[all_scores['rom'] == 'SPACEINVADERS']
mspacman_scores = all_scores[all_scores['rom'] == 'MSPACMAN']

a2c_scores = all_scores[all_scores['algo'] == 'A2C']
ppo_scores = all_scores[all_scores['algo'] == 'PPO']
dqn_scores = all_scores[all_scores['algo'] == 'DQN']

breakout_scores_summary = breakout_scores[['run_key','score']].groupby('run_key').agg(['count','mean', 'std', 'min', 'max'])
pong_scores_summary = pong_scores[['run_key','score']].groupby('run_key').agg(['count', 'mean', 'std', 'min', 'max'])
spaceinvaders_scores_summary = spaceinvaders_scores[['run_key','score']].groupby('run_key').agg(['count','mean', 'std', 'min', 'max'])
mspacman_scores_summary = mspacman_scores[['run_key','score']].groupby('run_key').agg(['count','mean', 'std', 'min', 'max'])

a2c_scores_summary = a2c_scores[['score_norm']].agg(['count','mean', 'std', 'min', 'max'])
ppo_scores_summary = ppo_scores[['score_norm']].agg(['count','mean', 'std', 'min', 'max'])
dqn_scores_summary = dqn_scores[['score_norm']].agg(['count','mean', 'std', 'min', 'max'])




In [20]:
algo_breakdown_cols = ['Algorithm', 'Min', 'Max', 'Mean', 'STD']

ppo_breakdown_vals = ['PPO', ppo_scores['score_norm'].min(), ppo_scores['score_norm'].max(), ppo_scores['score_norm'].mean(), ppo_scores['score_norm'].std()]
dqn_breakdown_vals = ['DQN', dqn_scores['score_norm'].min(), dqn_scores['score_norm'].max(), dqn_scores['score_norm'].mean(), dqn_scores['score_norm'].std()]
a2c_breakdown_vals = ['A2C', a2c_scores['score_norm'].min(), a2c_scores['score_norm'].max(), a2c_scores['score_norm'].mean(), a2c_scores['score_norm'].std()]

algo_breakdown = pd.DataFrame([ppo_breakdown_vals, dqn_breakdown_vals, a2c_breakdown_vals], columns=algo_breakdown_cols)

In [80]:
fig = px.bar(algo_breakdown, 
             x="Algorithm", 
             y="Mean", 
             color="Algorithm", 
             color_discrete_sequence=px.colors.qualitative.Dark24,
             width=800,
             height=600,
             title="Normalized Scores by Algorithm: Mean")
fig.show()

In [81]:
fig = px.bar(algo_breakdown, 
             x="Algorithm", 
             y="Max", 
             color="Algorithm", 
             color_discrete_sequence=px.colors.qualitative.Dark24,
             width=800,
             height=600,
             title="Normalized Scores by Algorithm: Max")
fig.show()

In [87]:
fig = px.bar(algo_breakdown, 
             x="Algorithm",
             y="STD", 
             color="Algorithm", 
             color_discrete_sequence=px.colors.qualitative.Dark24,
             width=800,
             height=600,
             title="Normalized Scores by Algorithm: Standard Deviation")
fig.show()

In [83]:
games_breakdown = pd.DataFrame()

games_breakdown['Breakout'] = breakout_scores.groupby('algo')["score"].mean()
games_breakdown['Pong'] = pong_scores.groupby('algo')["score"].mean()
games_breakdown['SpaceInvaders'] = spaceinvaders_scores.groupby('algo')["score"].mean()
games_breakdown['MsPacman'] = mspacman_scores.groupby('algo')["score"].mean()

games_breakdown

Unnamed: 0_level_0,Breakout,Pong,SpaceInvaders,MsPacman
algo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A2C,475.333333,19.380952,674.907975,1801.932367
DQN,98.189974,20.199313,486.14841,1855.950096
PPO,332.544186,20.287273,719.055046,2201.456835


In [88]:
fig = px.bar(games_breakdown,
             y=games_breakdown.index,
             x=games_breakdown.columns,
             labels={'variable':'Games', 'algo':'Algorithm', 'value':'Mean Score'},
             color_discrete_sequence=px.colors.qualitative.Vivid, 
             width=1600,
             height=600,
             title="Mean Scores of Algorithms by Game")
fig.show()