In [1]:
import os
import sys

import importlib
import numpy as np
import pandas as pd
from pathlib import Path

MODULE_NAME = "infer"
MAIN_PATH = "/home/sequenzia/dev/repos/atari-rl"

PROJECT = "solen-rl-project-eval-2"

NO_RENDER = True

N_ENVS = 2
N_STEPS = 10000

module_path = f"{MAIN_PATH}/utils/{MODULE_NAME}.py"
agents_path = f"{MAIN_PATH}/agents"
data_path = f"{MAIN_PATH}/data"

spec = importlib.util.spec_from_file_location(MODULE_NAME, module_path)
infer = importlib.util.module_from_spec(spec)
sys.modules[MODULE_NAME] = infer
spec.loader.exec_module(infer)

all_infer_logs = {}
all_infer_data = {}


# ------------------------------------------------------------------------ #

ALGOS = ["ppo", "a2c"]

GAMES = ["Breakout",
         "Pong",
         "SpaceInvaders"]
        #  "Qbert"]
        #  "Seaquest",
        #  "Centipede",
        #  "MsPacman",
        #  "Asterix",
        #  "Asteroids",
        #  "Assault"]

for algo in ALGOS:

    for game in GAMES:
        
        ENV_ID = f"ALE/{game}-v5"

        RUN_KEY = f"{algo.upper()}_{game}"

        infer_logs = infer.infer(run_key=RUN_KEY,
                                 env_id=ENV_ID,
                                 algo=algo,
                                 game=game,
                                 agents_path=agents_path,
                                 n_envs=N_ENVS,
                                 n_steps=N_STEPS,
                                 no_render=NO_RENDER,
                                 project=PROJECT,
                                 debug_on=False)
        
        # all_infer_logs[RUN_KEY] = infer_logs
        
        infer_data_np = np.empty((0,5))

        for idx in range(len(infer_logs)):

            infer_data_np = np.vstack((infer_data_np, 
                                       np.array([infer_logs[idx].scores, 
                                                 infer_logs[idx].times, 
                                                 infer_logs[idx].lengths,
                                                 infer_logs[idx].frame_numbers,
                                                 infer_logs[idx].run_frame_numbers]).T))

        infer_data = pd.DataFrame(infer_data_np, 
                                  columns=["scores", 
                                           "times", 
                                           "lengths", 
                                           "frame_numbers", 
                                           "run_frame_numbers"])

        all_infer_data[RUN_KEY] = infer_data



Loading /home/sequenzia/dev/repos/atari-rl/agents/ppo/ALE-Breakout-v5_1/best_model.zip



A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]
A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


Stacking 4 frames


PPO_Breakout: 1 EPISODE: 1
Episode Score: 38.00
Episode Length: 1148


PPO_Breakout: 2 EPISODE: 1
Episode Score: 54.00
Episode Length: 1263


PPO_Breakout: 1 EPISODE: 2
Episode Score: 22.00
Episode Length: 934


PPO_Breakout: 2 EPISODE: 2
Episode Score: 36.00
Episode Length: 977


PPO_Breakout: 2 EPISODE: 3
Episode Score: 20.00
Episode Length: 673


PPO_Breakout: 1 EPISODE: 3
Episode Score: 57.00
Episode Length: 1242


PPO_Breakout: 2 EPISODE: 4
Episode Score: 24.00
Episode Length: 844


PPO_Breakout: 1 EPISODE: 4
Episode Score: 28.00
Episode Length: 1036


PPO_Breakout: 2 EPISODE: 5
Episode Score: 21.00
Episode Length: 767


PPO_Breakout: 2 EPISODE: 6
Episode Score: 16.00
Episode Length: 709


PPO_Breakout: 1 EPISODE: 5
Episode Score: 51.00
Episode Length: 1099


PPO_Breakout: 2 EPISODE: 7
Episode Score: 22.00
Episode Length: 865


PPO_Breakout: 1 EPISODE: 6
Episode Score: 31.00
Episode Length: 1061


PPO_Breakout: 2 EPISODE: 8
Episode Score: 37.00
Episode Length: 

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mappliedtheta[0m. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='0.018 MB of 0.018 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
PPO_Breakout/episode_length,▅▃▁▃▂▄▄▅▄▆▅▅▄▄▃▃▂▂▅▁▃▆▅▄▂▇▅▃▅▃▆▅▃█▅▆▂▄▂▆
PPO_Breakout/episode_lives,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
PPO_Breakout/episode_score,▃▂▂▂▂▅▃▅▃▆▄▄▃▄▂▃▂▂▅▁▂▅▃▃▂▅▄▃▅▂▄▄▂█▄▅▁▃▁▆
PPO_Breakout/episode_time,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
PPO_Breakout/run_frame_number,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███

0,1
PPO_Breakout/episode_length,1257.0
PPO_Breakout/episode_lives,0.0
PPO_Breakout/episode_score,65.0
PPO_Breakout/episode_time,68.99475
PPO_Breakout/run_frame_number,171109.0



Loading /home/sequenzia/dev/repos/atari-rl/agents/ppo/ALE-Pong-v5_1/best_model.zip



A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]
A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


Stacking 4 frames


PPO_Pong: 2 EPISODE: 1
Episode Score: 2.00
Episode Length: 6218


PPO_Pong: 1 EPISODE: 1
Episode Score: -4.00
Episode Length: 6292


PPO_Pong: 1 EPISODE: 2
Episode Score: -11.00
Episode Length: 4009


PPO_Pong: 2 EPISODE: 2
Episode Score: 9.00
Episode Length: 4771


PPO_Pong: 2 EPISODE: 3
Episode Score: 2.00
Episode Length: 5837


PPO_Pong: 1 EPISODE: 3
Episode Score: 4.00
Episode Length: 7502


PPO_Pong: 2 EPISODE: 4
Episode Score: 4.00
Episode Length: 4935


PPO_Pong: 1 EPISODE: 4
Episode Score: 5.00
Episode Length: 6082


PPO_Pong: 2 EPISODE: 5
Episode Score: 1.00
Episode Length: 5984


PPO_Pong: 1 EPISODE: 5
Episode Score: 8.00
Episode Length: 4700


PPO_Pong: 2 EPISODE: 6
Episode Score: 10.00
Episode Length: 5836


PPO_Pong: 1 EPISODE: 6
Episode Score: 8.00
Episode Length: 6186


PPO_Pong: 2 EPISODE: 7
Episode Score: 11.00
Episode Length: 5042


VBox(children=(Label(value='0.018 MB of 0.018 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
PPO_Pong/episode_length,▅▆▁▃▅█▃▅▅▂▅▅▃
PPO_Pong/episode_lives,▁▁▁▁▁▁▁▁▁▁▁▁▁
PPO_Pong/episode_score,▅▃▁▇▅▆▆▆▅▇█▇█
PPO_Pong/episode_time,▁▁▂▂▃▄▄▅▆▆▇▇█
PPO_Pong/run_frame_number,▁▁▂▂▃▄▄▅▆▆▇▇█

0,1
PPO_Pong/episode_length,5042.0
PPO_Pong/episode_lives,0.0
PPO_Pong/episode_score,11.0
PPO_Pong/episode_time,58.03403
PPO_Pong/run_frame_number,154484.0



Loading /home/sequenzia/dev/repos/atari-rl/agents/ppo/ALE-SpaceInvaders-v5_1/best_model.zip



A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]
A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


Stacking 4 frames


PPO_SpaceInvaders: 1 EPISODE: 1
Episode Score: 905.00
Episode Length: 989


PPO_SpaceInvaders: 2 EPISODE: 1
Episode Score: 1140.00
Episode Length: 1574


PPO_SpaceInvaders: 1 EPISODE: 2
Episode Score: 980.00
Episode Length: 1280


PPO_SpaceInvaders: 2 EPISODE: 2
Episode Score: 1110.00
Episode Length: 1142


PPO_SpaceInvaders: 1 EPISODE: 3
Episode Score: 1430.00
Episode Length: 1510


PPO_SpaceInvaders: 2 EPISODE: 3
Episode Score: 1215.00
Episode Length: 1353


PPO_SpaceInvaders: 2 EPISODE: 4
Episode Score: 600.00
Episode Length: 1002


PPO_SpaceInvaders: 1 EPISODE: 4
Episode Score: 1100.00
Episode Length: 1459


PPO_SpaceInvaders: 1 EPISODE: 5
Episode Score: 455.00
Episode Length: 710


PPO_SpaceInvaders: 2 EPISODE: 5
Episode Score: 1345.00
Episode Length: 1642


PPO_SpaceInvaders: 1 EPISODE: 6
Episode Score: 1320.00
Episode Length: 1303


PPO_SpaceInvaders: 2 EPISODE: 6
Episode Score: 1140.00
Episode Length: 1079


PPO_SpaceInvaders: 1 EPISODE: 7
Episode Score: 115

VBox(children=(Label(value='0.018 MB of 0.018 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
PPO_SpaceInvaders/episode_length,▃▇▄▆▃▁▅▄▇▆▃▇▅▃▆▄█▃▃▂▄▁▇▇▆▃█▁▂▅▇▇▆▆▆▆▅▄█▃
PPO_SpaceInvaders/episode_lives,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
PPO_SpaceInvaders/episode_score,▄▆▆▇▂▁█▆▆▅▃▆▅▂▅▄▇▂▂▄▅▃█▆▆▃▇▂▄▆█▆▅▇▅▅▅▄▇▄
PPO_SpaceInvaders/episode_time,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
PPO_SpaceInvaders/run_frame_number,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████

0,1
PPO_SpaceInvaders/episode_length,891.0
PPO_SpaceInvaders/episode_lives,0.0
PPO_SpaceInvaders/episode_score,810.0
PPO_SpaceInvaders/episode_time,61.50854
PPO_SpaceInvaders/run_frame_number,165070.0



Loading /home/sequenzia/dev/repos/atari-rl/agents/a2c/ALE-Breakout-v5_1/best_model.zip



A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]
A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


Stacking 4 frames


A2C_Breakout: 2 EPISODE: 1
Episode Score: 43.00
Episode Length: 1228


A2C_Breakout: 1 EPISODE: 1
Episode Score: 40.00
Episode Length: 1300


A2C_Breakout: 1 EPISODE: 2
Episode Score: 41.00
Episode Length: 1136


A2C_Breakout: 2 EPISODE: 2
Episode Score: 59.00
Episode Length: 1317


A2C_Breakout: 1 EPISODE: 3
Episode Score: 29.00
Episode Length: 1014


A2C_Breakout: 2 EPISODE: 3
Episode Score: 44.00
Episode Length: 1258


A2C_Breakout: 1 EPISODE: 4
Episode Score: 31.00
Episode Length: 988


A2C_Breakout: 2 EPISODE: 4
Episode Score: 30.00
Episode Length: 1088


A2C_Breakout: 1 EPISODE: 5
Episode Score: 20.00
Episode Length: 790


A2C_Breakout: 2 EPISODE: 5
Episode Score: 62.00
Episode Length: 1064


A2C_Breakout: 1 EPISODE: 6
Episode Score: 30.00
Episode Length: 1008


A2C_Breakout: 2 EPISODE: 6
Episode Score: 33.00
Episode Length: 1061


A2C_Breakout: 1 EPISODE: 7
Episode Score: 59.00
Episode Length: 1372


A2C_Breakout: 2 EPISODE: 7
Episode Score: 43.00
Episode Len

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
A2C_Breakout/episode_length,▆▆▆▆▅▅▅▅▇▃▆▅██▄▆▂▄▄▆▇▂▅▇▅▆▆▃▅▄▅▇▅▅█▅▁▅▃▅
A2C_Breakout/episode_lives,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
A2C_Breakout/episode_score,▄▄▆▄▃▆▃▄▅▂▅▄█▇▃▄▂▄▄▅█▂▅▅▃▅▆▃▃▃▄▆▄▄▆▄▁▅▂▅
A2C_Breakout/episode_time,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
A2C_Breakout/run_frame_number,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███

0,1
A2C_Breakout/episode_length,1113.0
A2C_Breakout/episode_lives,0.0
A2C_Breakout/episode_score,47.0
A2C_Breakout/episode_time,67.89857
A2C_Breakout/run_frame_number,168619.0



Loading /home/sequenzia/dev/repos/atari-rl/agents/a2c/ALE-Pong-v5_1/best_model.zip



A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]
A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


Stacking 4 frames


A2C_Pong: 2 EPISODE: 1
Episode Score: -8.00
Episode Length: 5074


A2C_Pong: 1 EPISODE: 1
Episode Score: -3.00
Episode Length: 6027


A2C_Pong: 2 EPISODE: 2
Episode Score: -1.00
Episode Length: 6133


A2C_Pong: 1 EPISODE: 2
Episode Score: 3.00
Episode Length: 6137


A2C_Pong: 1 EPISODE: 3
Episode Score: -13.00
Episode Length: 3475


A2C_Pong: 2 EPISODE: 3
Episode Score: 2.00
Episode Length: 5917


A2C_Pong: 1 EPISODE: 4
Episode Score: -7.00
Episode Length: 5572


A2C_Pong: 2 EPISODE: 4
Episode Score: 8.00
Episode Length: 5775


A2C_Pong: 2 EPISODE: 5
Episode Score: -15.00
Episode Length: 3342


A2C_Pong: 1 EPISODE: 5
Episode Score: -4.00
Episode Length: 6152


A2C_Pong: 1 EPISODE: 6
Episode Score: -9.00
Episode Length: 4754


A2C_Pong: 2 EPISODE: 6
Episode Score: 2.00
Episode Length: 6590


A2C_Pong: 1 EPISODE: 7
Episode Score: -4.00
Episode Length: 5629


A2C_Pong: 2 EPISODE: 7
Episode Score: 4.00
Episode Length: 6325


VBox(children=(Label(value='0.018 MB of 0.018 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
A2C_Pong/episode_length,▅▇▇▇▁▇▆▆▁▇▄█▆▇
A2C_Pong/episode_lives,▁▁▁▁▁▁▁▁▁▁▁▁▁▁
A2C_Pong/episode_score,▃▅▅▆▂▆▃█▁▄▃▆▄▇
A2C_Pong/episode_time,▁▁▂▂▃▃▄▅▅▆▇▇██
A2C_Pong/run_frame_number,▁▁▂▂▃▃▄▅▅▆▇▇██

0,1
A2C_Pong/episode_length,6325.0
A2C_Pong/episode_lives,0.0
A2C_Pong/episode_score,4.0
A2C_Pong/episode_time,60.37914
A2C_Pong/run_frame_number,156612.0



Loading /home/sequenzia/dev/repos/atari-rl/agents/a2c/ALE-SpaceInvaders-v5_1/best_model.zip



A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]
A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


Stacking 4 frames


A2C_SpaceInvaders: 1 EPISODE: 1
Episode Score: 1295.00
Episode Length: 1293


A2C_SpaceInvaders: 2 EPISODE: 1
Episode Score: 1715.00
Episode Length: 1917


A2C_SpaceInvaders: 1 EPISODE: 2
Episode Score: 895.00
Episode Length: 1081


A2C_SpaceInvaders: 2 EPISODE: 2
Episode Score: 1385.00
Episode Length: 1449


A2C_SpaceInvaders: 1 EPISODE: 3
Episode Score: 1450.00
Episode Length: 1744


A2C_SpaceInvaders: 2 EPISODE: 3
Episode Score: 1235.00
Episode Length: 1405


A2C_SpaceInvaders: 1 EPISODE: 4
Episode Score: 680.00
Episode Length: 1038


A2C_SpaceInvaders: 1 EPISODE: 5
Episode Score: 935.00
Episode Length: 1127


A2C_SpaceInvaders: 2 EPISODE: 4
Episode Score: 1605.00
Episode Length: 1879


A2C_SpaceInvaders: 1 EPISODE: 6
Episode Score: 610.00
Episode Length: 748


A2C_SpaceInvaders: 2 EPISODE: 5
Episode Score: 1095.00
Episode Length: 1327


A2C_SpaceInvaders: 1 EPISODE: 7
Episode Score: 1555.00
Episode Length: 1929


A2C_SpaceInvaders: 2 EPISODE: 6
Episode Score: 98

VBox(children=(Label(value='0.002 MB of 0.018 MB uploaded\r'), FloatProgress(value=0.12730093071354706, max=1.…

0,1
A2C_SpaceInvaders/episode_length,▄▇▅▆▃▃▁▄▄▄▂▆▄▆▂▄▅▇▇▅▅▁▅▂▂█▁▄▁▇▄▅▁▂▁▃▂▁▅▂
A2C_SpaceInvaders/episode_lives,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
A2C_SpaceInvaders/episode_score,▄▆▅▅▂▃▂▄▃▄▂▄▃▅▂▄▄▆▆▄▄▂▄▂▂█▂▄▁▆▃▄▁▂▁▂▁▁▄▂
A2C_SpaceInvaders/episode_time,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
A2C_SpaceInvaders/run_frame_number,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████

0,1
A2C_SpaceInvaders/episode_length,942.0
A2C_SpaceInvaders/episode_lives,0.0
A2C_SpaceInvaders/episode_score,805.0
A2C_SpaceInvaders/episode_time,56.65741
A2C_SpaceInvaders/run_frame_number,161258.0


In [None]:
all_infer_data['A2C_Pong']

In [None]:
losses = [223,2232,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]

{f"losses/loss-{ii}": loss for ii, loss in enumerate(losses)}

In [None]:
import wandb

wandb.init(project=PROJECT,
           name="ppo_breakout_eval_14",
           group="ppo",
           job_type="eval",
           settings=wandb.Settings(disable_job_creation=True))


run_frame_numbers = []

episode_scores = []
episode_times = []
episode_lengths = []
episode_lives = []
episode_frame_numbers = []


for infer_log in all_infer_logs['PPO_Breakout']:

    for episode in infer_log.episode_logs:

        run_frame_numbers.append((episode.run_frame_number, episode.run_frame_number))

        episode_scores.append((episode.run_frame_number, episode.episode_score))
        episode_lengths.append((episode.run_frame_number, episode.episode_length))
        episode_times.append((episode.run_frame_number, episode.episode_time))
        episode_lives.append((episode.run_frame_number, episode.episode_lives))
        episode_frame_numbers.append((episode.run_frame_number, episode.episode_frame_number))


run_frame_numbers.sort(key=lambda x: x[0])

episode_scores.sort(key=lambda x: x[0])
episode_lengths.sort(key=lambda x: x[0])
episode_times.sort(key=lambda x: x[0])
episode_lives.sort(key=lambda x: x[0])
episode_frame_numbers.sort(key=lambda x: x[0])


for idx, run_frame_number in enumerate(run_frame_numbers):

    wandb.log(data={"episode_score": episode_scores[idx][1],
                    "episode_time": episode_times[idx][1],
                    "episode_length": episode_lengths[idx][1],
                    "episode_lives": episode_lives[idx][1],
                    "run_frame_number": run_frame_number[0]},
                    step=run_frame_number[0])

wandb.finish()




In [None]:
episode_frame_numbers

In [None]:
_run_frame_numbers

In [None]:
    # wandb.log(data={f"episode_times/time-{idx}": time for idx, time in enumerate(episode_times)})
    # wandb.log(data={f"episode_lengths/length-{idx}": length for idx, length in enumerate(episode_lengths)})
    # wandb.log(data={f"episode_lives/lives-{idx}": lives for idx, lives in enumerate(episode_lives)})
    # wandb.log(data={f"episode_frame_numbers/frame_number-{idx}": frame_number for idx, frame_number in enumerate(episode_frame_numbers)})
    # wandb.log(data={f"run_frame_numbers/frame_number-{idx}": frame_number for idx, frame_number in enumerate(run_frame_numbers)})


In [None]:
wandb_log

In [None]:
import wandb

wandb.init(project=PROJECT,
           name="ppo_breakout_eval",
           group="ppo",
           job_type="eval")

wandb_tbl = wandb.Table(dataframe=all_infer_data['PPO_Breakout'])

wandb.log({"ppo_breakout_eval": wandb_tbl})

wandb.finish()

In [None]:
all_infer_logs

In [None]:
all_infer_data['PPO_BREAKOUT']

In [None]:
all_infer_data['A2C_BREAKOUT']