<a href="https://colab.research.google.com/github/laurelkeys/machine-learning/blob/master/assignment-4/Trajectories.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# from google.colab import drive
# drive.mount('/content/drive', force_remount=True)
# PATH_TO_DATA = os.path.join("drive", "My Drive", "unicamp", "MC886", "atari")

import os
PATH_TO_DATA = ""

In [4]:
SAVE_DIR = os.path.join(PATH_TO_DATA, "data")
os.makedirs(SAVE_DIR, exist_ok=True)

SAVE_DIR # where the trajectories for each game will be saved to

'data'

In [5]:
LOG_DIR = os.path.join(PATH_TO_DATA, "data", "results")
os.makedirs(LOG_DIR, exist_ok=True)

LOG_DIR # where the stats for each game will be saved to

'data/results'

## Generate a dataset of trajectories from pre-trained RL agents on [Atari](https://gym.openai.com/envs/#atari) [environments](https://github.com/openai/gym/wiki/Table-of-environments).
That is, by the end of this notebook we will have $observation \rightarrow action$ mappings, where $observation$s are images of shape `IMG_SHAPE` and $action$s are integer values in the range $[0, 18)$, meaning:

| 0 | 1 | 2 | 3 | 4 | 5 |
| --- | --- | --- | --- | --- | --- |
| NOOP | FIRE | UP | RIGHT | LEFT | DOWN |


| 6 | 7 | 8 | 9 |
| --- | --- | --- | --- |
| UPRIGHT | UPLEFT | DOWNRIGHT | DOWNLEFT |


| 10 | 11 | 12 | 13 |
| --- | --- | --- | --- |
| UPFIRE | RIGHTFIRE | LEFTFIRE | DOWNFIRE |


| 14 | 15 | 16 | 17 |
| --- | --- | --- | --- |
| UPRIGHTFIRE | UPLEFTFIRE | DOWNRIGHTFIRE | DOWNLEFTFIRE |

In [0]:
# number of trajectories to generate
N_OF_TRAJECTORIES = 5

# number of steps per trajectory
N_OF_STEPS = 500

# list of string tuples in the format (RL Algorithm, Game Environment)
GAMES = [
    ("PPO2", "BreakoutNoFrameskip-v4"),
    ("PPO2", "PongNoFrameskip-v4"),
]

In [7]:
[env_id for algo, env_id in GAMES]

['BreakoutNoFrameskip-v4', 'PongNoFrameskip-v4']

## Install dependencies

Note that we're not installing [MPI](https://mpi4py.readthedocs.io/en/stable/), so the following algorithms will probably not work: `DDPG`, `GAIL`, `PPO1`, `TRPO`.

In [8]:
!apt-get update                                                  > /dev/null 2>&1
!apt-get install swig cmake zlib1g-dev ffmpeg freeglut3-dev xvfb > /dev/null 2>&1
!pip install pytablewriter #pyyaml optuna scikit-optimize         > /dev/null 2>&1



In [9]:
#### Stable Baselines only supports TF 1.x for now ####
try:
    # Colab only
    # %tensorflow_version 2.x
    %tensorflow_version 1.x
except Exception:
    pass

import tensorflow as tf
from tensorflow import keras
print(tf.__version__)

1.15.0


In [0]:
import os
from time import time

import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# NOTE use tqdm.write() instead of print() inside tqdm wrapped loops
from tqdm import tqdm

import gym
from gym.envs.atari.atari_env import ACTION_MEANING

### Update [Stable Baselines](https://github.com/hill-a/stable-baselines) and clone [RL Zoo Baselines](https://github.com/araffin/rl-baselines-zoo)

In [11]:
!pip list | grep baselines

stable-baselines         2.9.0a0    


In [0]:
!yes | pip uninstall stable-baselines                           > /dev/null 2>&1
!pip install git+https://github.com/hill-a/stable-baselines.git > /dev/null 2>&1

In [13]:
!pip list | grep baselines

stable-baselines         2.9.0a0    


In [0]:
!git clone https://github.com/araffin/rl-baselines-zoo.git      > /dev/null 2>&1

In [15]:
from stable_baselines.common.cmd_util import make_atari_env

from stable_baselines.common.vec_env import VecFrameStack, DummyVecEnv

# NOTE add more algorithms here
from stable_baselines import PPO2, ACER, ACKTR
ALGO_IMPL = {
    'PPO2': PPO2,
    'ACER': ACER,
    'ACKTR': ACKTR,
}

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



## Load the pre-trained agents

In [16]:
!ls rl-baselines-zoo/trained_agents/

a2c  acer  acktr  ddpg	dqn  her  ppo2	sac  td3  trpo


In [0]:
PATH_TO_AGENTS = os.path.join("rl-baselines-zoo", "trained_agents")

In [18]:
# check the available pre-trained models
algorithms = ["PPO2"]
for algo in algorithms:
    algo_path = os.path.join(PATH_TO_AGENTS, algo.lower())
    print(algo_path + '/')
    for f in sorted(os.listdir(algo_path), key=lambda x: x[::-1]):
        # sort by the reverse filename, so env types get grouped together
        if f.endswith(".pkl"):
            print("|___", f)
            # uncomment to print the model's observation and action spaces
            # try:
            #     model = ALGO_IMPL[algo].load(os.path.join(algo_path, f), verbose=0)
            #     print("     observation_space:", model.observation_space)
            #     print("     action_space:", model.action_space)
            # except:
            #     print("     ERROR: couldn't load model")

rl-baselines-zoo/trained_agents/ppo2/
|___ Pendulum-v0.pkl
|___ MountainCar-v0.pkl
|___ MountainCarContinuous-v0.pkl
|___ MinitaurBulletDuckEnv-v0.pkl
|___ Walker2DBulletEnv-v0.pkl
|___ HumanoidBulletEnv-v0.pkl
|___ HalfCheetahBulletEnv-v0.pkl
|___ InvertedDoublePendulumBulletEnv-v0.pkl
|___ InvertedPendulumSwingupBulletEnv-v0.pkl
|___ ReacherBulletEnv-v0.pkl
|___ HopperBulletEnv-v0.pkl
|___ MinitaurBulletEnv-v0.pkl
|___ AntBulletEnv-v0.pkl
|___ CartPole-v1.pkl
|___ Acrobot-v1.pkl
|___ BipedalWalkerHardcore-v2.pkl
|___ LunarLander-v2.pkl
|___ BipedalWalker-v2.pkl
|___ LunarLanderContinuous-v2.pkl
|___ PongNoFrameskip-v4.pkl
|___ MsPacmanNoFrameskip-v4.pkl
|___ EnduroNoFrameskip-v4.pkl
|___ BeamRiderNoFrameskip-v4.pkl
|___ SpaceInvadersNoFrameskip-v4.pkl
|___ QbertNoFrameskip-v4.pkl
|___ SeaquestNoFrameskip-v4.pkl
|___ BreakoutNoFrameskip-v4.pkl


In [19]:
for algo, env_id in GAMES:
    print(f"('{algo}', '{env_id}')")
    agent_path = os.path.join(PATH_TO_AGENTS, algo.lower(), env_id + '.pkl')
    model = ALGO_IMPL[algo].load(agent_path, verbose=0)
    print("observation_space:", model.observation_space)
    print("action_space:", model.action_space)
    print()

('PPO2', 'BreakoutNoFrameskip-v4')









Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



observation_space: Box(84, 84, 4)
action_space: Discrete(4)

('PPO2', 'PongNoFrameskip-v4')
observation_space: Box(84, 84, 4)
action_space: Discrete(6)



In [0]:
VERBOSE = 2 # 0, 1 or 2

## Evaluate agents
Let's evaluate a few trajectories of each game from `GAMES` to get a sense of how the agents are performing

In [0]:
# number of trajectories to evaluate each agent on
N_OF_EVAL_TRAJECTORIES = 2

results = {
    'algo': [],
    'env_id': [],
    'mean_reward': [],
    'std_reward': [],
    'n_timesteps': [],
    'n_episodes': []
}

### Import `create_test_env` from inside the `rl-baselines-zoo/` folder

In [22]:
cd rl-baselines-zoo/

/content/rl-baselines-zoo


In [0]:
from utils import create_test_env

In [24]:
cd ..

/content


In [35]:
time_start = time()
print("================")
for algo, env_id in GAMES:
    time_start_env = time()

    env = create_test_env(env_id, n_envs=1, seed=0, 
                          is_atari=True, should_render=False, 
                          log_dir=LOG_DIR, hyperparams={})
    agent_path = os.path.join(PATH_TO_AGENTS, algo.lower(), env_id + '.pkl')
    
    print(f"('{algo}', '{env_id}')")
    print(f"Getting pre-trained agent from: '{agent_path}'")
    if VERBOSE > 1:
        print(f"env.envs: {env.envs}")
    print()
    
    model = ALGO_IMPL[algo].load(agent_path, env)
    
    for trajectory in tqdm(range(N_OF_EVAL_TRAJECTORIES), position=0, leave=True):
        # episode stats
        ep_len, ep_reward, ep_rewards = 0, 0.0, []

        obs = env.reset() # (84, 84, 4)
        for step in range(N_OF_STEPS):
            action = model.predict(obs)
            # clip action to avoid out of bound errors
            if isinstance(env.action_space, gym.spaces.Box):
                action = np.clip(action, env.action_space.low, env.action_space.high)
            
            obs, reward, done, infos = env.step(action)

            # NOTE the return reward is not the Atari score
            #      so we have to get it from the infos dict
            ep_infos = infos[0].get('episode')
            if ep_infos is not None:
                tqdm.write(f"\nAtari Episode Score: {ep_infos['r']:.2f}")
                tqdm.write(f"Atari Episode Length: {ep_infos['l']}")
            
            # FIXME ep_infos is always None 
            # try checking stable-baselines' Monitor.step()
            if infos is not None: # debug
                # if step == 0:
                #     tqdm.write(f"(DEBUG) step == 0: {infos}")
                if len(infos) > 1:
                    tqdm.write(f"(DEBUG) infos: {infos}")
                elif len(infos[0].keys()) > 1:
                    if 'terminal_observation' not in infos[0].keys() or len(infos[0].keys()) > 2:
                        tqdm.write(f"(DEBUG) infos[0]: {infos[0]}")

            ep_len += 1
            ep_reward += reward[0]
            if done:
                obs = env.reset()
                ep_rewards.append(ep_reward)
                if VERBOSE > 1:
                    tqdm.write(f"\nEpisode Reward: {ep_reward:.2f}")
                    tqdm.write(f"Episode Length: {ep_len}")
                ep_reward = 0.0
                ep_len = 0
        
        if VERBOSE > 0:
            tqdm.write("\nMean reward: {:.2f}, len(ep_rewards) == {}".format(
                       np.mean(ep_rewards) if len(ep_rewards) > 0 else 0.0, 
                       len(ep_rewards)))

    env.close()
    print(f"Δt = {(time() - time_start_env):.2f}s")
    print("================")

print(f"Total Δt = {(time() - time_start):.2f}s")

Using Atari wrapper
('PPO2', 'BreakoutNoFrameskip-v4')
Getting pre-trained agent from: 'rl-baselines-zoo/trained_agents/ppo2/BreakoutNoFrameskip-v4.pkl'
env.envs: [<ClipRewardEnv<WarpFrame<FireResetEnv<EpisodicLifeEnv<Monitor<MaxAndSkipEnv<NoopResetEnv<TimeLimit<AtariEnv<BreakoutNoFrameskip-v4>>>>>>>>>>]



 50%|█████     | 1/2 [00:02<00:02,  2.18s/it]


Mean reward: 0.00, len(ep_rewards) == 0


100%|██████████| 2/2 [00:04<00:00,  2.16s/it]



Mean reward: 0.00, len(ep_rewards) == 0
Δt = 6.35s
Using Atari wrapper
('PPO2', 'PongNoFrameskip-v4')
Getting pre-trained agent from: 'rl-baselines-zoo/trained_agents/ppo2/PongNoFrameskip-v4.pkl'
env.envs: [<ClipRewardEnv<WarpFrame<FireResetEnv<EpisodicLifeEnv<Monitor<MaxAndSkipEnv<NoopResetEnv<TimeLimit<AtariEnv<PongNoFrameskip-v4>>>>>>>>>>]



 50%|█████     | 1/2 [00:02<00:02,  2.15s/it]


Mean reward: 0.00, len(ep_rewards) == 0


100%|██████████| 2/2 [00:04<00:00,  2.14s/it]


Mean reward: 0.00, len(ep_rewards) == 0
Δt = 6.31s
Total Δt = 12.66s





In [36]:
!ls data/results/

0.monitor.csv  progress.csv


## Generate trajectories

Note that we use `make_atari_env` + `VecFrameStack` for `NoFrameskip-v4` environments, so each frame is converted to grayscale and downscaled from 210x160 to 84x84. Therefore, the $observation$ shape is `(84, 84, 4)` (four stacked frames), and **not** `(210, 160, 3)`, nor `(84, 84, 1)`.

In [0]:
PRINT_EARLY_DONE = False
PRINT_ACTIONS_TAKEN = False

PRINT_EVERY_N_TRAJECTORIES = N_OF_TRAJECTORIES // 10
# uncomment below not to print
# PRINT_EVERY_N_TRAJECTORIES = N_OF_TRAJECTORIES + 1

**TODO: evaluate the trajectories before saving the final datasets**  
**TODO: add button to load from data/ or save to drive**

In [40]:
time_start = time()
print("PRINT_EVERY_N_TRAJECTORIES:", PRINT_EVERY_N_TRAJECTORIES)
print("N_OF_TRAJECTORIES:", N_OF_TRAJECTORIES)
print("N_OF_STEPS:", N_OF_STEPS)
print("================")
for algo, env_id in GAMES:
    time_start_env = time()

    env = make_atari_env(env_id, num_env=1, seed=0)
    env = VecFrameStack(env, n_stack=4) # Frame-stacking with 4 frames
    agent_path = os.path.join(PATH_TO_AGENTS, algo.lower(), env_id + '.pkl')
    
    print(f"('{algo}', '{env_id}')")
    print(f"Getting pre-trained agent from: '{agent_path}'\n")
    
    model = ALGO_IMPL[algo].load(agent_path, env)
    
    for trajectory in tqdm(range(N_OF_TRAJECTORIES), position=0, leave=True):
        # store the "obs -> action" mapping
        observed_states, actions_taken = [], []

        obs = env.reset() # (84, 84, 4)
        for step in range(N_OF_STEPS):
            action = model.predict(obs)
            observed_states.append(obs)
            actions_taken.append(action)
            obs, reward, done, infos = env.step(action)
            if done:
                obs = env.reset()
                if PRINT_EARLY_DONE:
                    print(f"done at step {step + 1} (reseting env)")
        
        # NOTE action, reward and done are arrays since we're using a vectorized env
        observed_states = [obs[0] for obs in observed_states]
        actions_taken = [action[0][0] for action in actions_taken]
        
        np.savez_compressed(file=os.path.join(SAVE_DIR, f"{env_id}_{algo}_t{trajectory+1}_{N_OF_STEPS}s"), 
                            observations=observed_states, actions=actions_taken)
        
        if (trajectory + 1) % 10 == 0:
            print(f" Saved trajectory {trajectory+1} (of {N_OF_TRAJECTORIES})")

        if PRINT_ACTIONS_TAKEN and trajectory == N_OF_TRAJECTORIES - 1:
            print("\nActions taken:", ", ".join([ACTION_MEANING[action] for action in set(actions_taken)]))

    del observed_states
    del actions_taken
    env.close()
    print(f"Δt = {(time() - time_start_env):.2f}s")
    print("================")

print(f"Total Δt = {(time() - time_start):.2f}s")

PRINT_EVERY_N_TRAJECTORIES: 0
N_OF_TRAJECTORIES: 5
N_OF_STEPS: 100
('PPO2', 'PongNoFrameskip-v4')
Getting pre-trained agent from: 'rl-baselines-zoo/trained_agents/ppo2/PongNoFrameskip-v4.pkl'



  0%|          | 0/5 [00:00<?, ?it/s]

[step 1] infos: [{'ale.lives': 0}]
[step 2] infos: [{'ale.lives': 0}]
[step 3] infos: [{'ale.lives': 0}]
[step 4] infos: [{'ale.lives': 0}]
[step 5] infos: [{'ale.lives': 0}]
[step 6] infos: [{'ale.lives': 0}]
[step 7] infos: [{'ale.lives': 0}]
[step 8] infos: [{'ale.lives': 0}]
[step 9] infos: [{'ale.lives': 0}]
[step 10] infos: [{'ale.lives': 0}]
[step 11] infos: [{'ale.lives': 0}]
[step 12] infos: [{'ale.lives': 0}]
[step 13] infos: [{'ale.lives': 0}]
[step 14] infos: [{'ale.lives': 0}]
[step 15] infos: [{'ale.lives': 0}]
[step 16] infos: [{'ale.lives': 0}]
[step 17] infos: [{'ale.lives': 0}]
[step 18] infos: [{'ale.lives': 0}]
[step 19] infos: [{'ale.lives': 0}]
[step 20] infos: [{'ale.lives': 0}]
[step 21] infos: [{'ale.lives': 0}]
[step 22] infos: [{'ale.lives': 0}]
[step 23] infos: [{'ale.lives': 0}]
[step 24] infos: [{'ale.lives': 0}]
[step 25] infos: [{'ale.lives': 0}]
[step 26] infos: [{'ale.lives': 0}]
[step 27] infos: [{'ale.lives': 0}]
[step 28] infos: [{'ale.lives': 0}]
[

 20%|██        | 1/5 [00:00<00:02,  1.76it/s]

[step 86] infos: [{'ale.lives': 0}]
[step 87] infos: [{'ale.lives': 0}]
[step 88] infos: [{'ale.lives': 0}]
[step 89] infos: [{'ale.lives': 0}]
[step 90] infos: [{'ale.lives': 0}]
[step 91] infos: [{'ale.lives': 0}]
[step 92] infos: [{'ale.lives': 0}]
[step 93] infos: [{'ale.lives': 0}]
[step 94] infos: [{'ale.lives': 0}]
[step 95] infos: [{'ale.lives': 0}]
[step 96] infos: [{'ale.lives': 0}]
[step 97] infos: [{'ale.lives': 0}]
[step 98] infos: [{'ale.lives': 0}]
[step 99] infos: [{'ale.lives': 0}]
[step 100] infos: [{'ale.lives': 0}]
[step 1] infos: [{'ale.lives': 0}]
[step 2] infos: [{'ale.lives': 0}]
[step 3] infos: [{'ale.lives': 0}]
[step 4] infos: [{'ale.lives': 0}]
[step 5] infos: [{'ale.lives': 0}]
[step 6] infos: [{'ale.lives': 0}]
[step 7] infos: [{'ale.lives': 0}]
[step 8] infos: [{'ale.lives': 0}]
[step 9] infos: [{'ale.lives': 0}]
[step 10] infos: [{'ale.lives': 0}]
[step 11] infos: [{'ale.lives': 0}]
[step 12] infos: [{'ale.lives': 0}]
[step 13] infos: [{'ale.lives': 0}]


 40%|████      | 2/5 [00:01<00:01,  1.82it/s]

[step 65] infos: [{'ale.lives': 0}]
[step 66] infos: [{'ale.lives': 0}]
[step 67] infos: [{'ale.lives': 0}]
[step 68] infos: [{'ale.lives': 0}]
[step 69] infos: [{'ale.lives': 0}]
[step 70] infos: [{'ale.lives': 0}]
[step 71] infos: [{'ale.lives': 0}]
[step 72] infos: [{'ale.lives': 0}]
[step 73] infos: [{'ale.lives': 0}]
[step 74] infos: [{'ale.lives': 0}]
[step 75] infos: [{'ale.lives': 0}]
[step 76] infos: [{'ale.lives': 0}]
[step 77] infos: [{'ale.lives': 0}]
[step 78] infos: [{'ale.lives': 0}]
[step 79] infos: [{'ale.lives': 0}]
[step 80] infos: [{'ale.lives': 0}]
[step 81] infos: [{'ale.lives': 0}]
[step 82] infos: [{'ale.lives': 0}]
[step 83] infos: [{'ale.lives': 0}]
[step 84] infos: [{'ale.lives': 0}]
[step 85] infos: [{'ale.lives': 0}]
[step 86] infos: [{'ale.lives': 0}]
[step 87] infos: [{'ale.lives': 0}]
[step 88] infos: [{'ale.lives': 0}]
[step 89] infos: [{'ale.lives': 0}]
[step 90] infos: [{'ale.lives': 0}]
[step 91] infos: [{'ale.lives': 0}]
[step 92] infos: [{'ale.live

 60%|██████    | 3/5 [00:01<00:01,  1.87it/s]

[step 91] infos: [{'ale.lives': 0}]
[step 92] infos: [{'ale.lives': 0}]
[step 93] infos: [{'ale.lives': 0}]
[step 94] infos: [{'ale.lives': 0}]
[step 95] infos: [{'ale.lives': 0}]
[step 96] infos: [{'ale.lives': 0}]
[step 97] infos: [{'ale.lives': 0}]
[step 98] infos: [{'ale.lives': 0}]
[step 99] infos: [{'ale.lives': 0}]
[step 100] infos: [{'ale.lives': 0}]
[step 1] infos: [{'ale.lives': 0}]
[step 2] infos: [{'ale.lives': 0}]
[step 3] infos: [{'ale.lives': 0}]
[step 4] infos: [{'ale.lives': 0}]
[step 5] infos: [{'ale.lives': 0}]
[step 6] infos: [{'ale.lives': 0}]
[step 7] infos: [{'ale.lives': 0}]
[step 8] infos: [{'ale.lives': 0}]
[step 9] infos: [{'ale.lives': 0}]
[step 10] infos: [{'ale.lives': 0}]
[step 11] infos: [{'ale.lives': 0}]
[step 12] infos: [{'ale.lives': 0}]
[step 13] infos: [{'ale.lives': 0}]
[step 14] infos: [{'ale.lives': 0}]
[step 15] infos: [{'ale.lives': 0}]
[step 16] infos: [{'ale.lives': 0}]
[step 17] infos: [{'ale.lives': 0}]
[step 18] infos: [{'ale.lives': 0}]


 80%|████████  | 4/5 [00:02<00:00,  1.90it/s]

[step 72] infos: [{'ale.lives': 0}]
[step 73] infos: [{'ale.lives': 0}]
[step 74] infos: [{'ale.lives': 0}]
[step 75] infos: [{'ale.lives': 0}]
[step 76] infos: [{'ale.lives': 0}]
[step 77] infos: [{'ale.lives': 0}]
[step 78] infos: [{'ale.lives': 0}]
[step 79] infos: [{'ale.lives': 0}]
[step 80] infos: [{'ale.lives': 0}]
[step 81] infos: [{'ale.lives': 0}]
[step 82] infos: [{'ale.lives': 0}]
[step 83] infos: [{'ale.lives': 0}]
[step 84] infos: [{'ale.lives': 0}]
[step 85] infos: [{'ale.lives': 0}]
[step 86] infos: [{'ale.lives': 0}]
[step 87] infos: [{'ale.lives': 0}]
[step 88] infos: [{'ale.lives': 0}]
[step 89] infos: [{'ale.lives': 0}]
[step 90] infos: [{'ale.lives': 0}]
[step 91] infos: [{'ale.lives': 0}]
[step 92] infos: [{'ale.lives': 0}]
[step 93] infos: [{'ale.lives': 0}]
[step 94] infos: [{'ale.lives': 0}]
[step 95] infos: [{'ale.lives': 0}]
[step 96] infos: [{'ale.lives': 0}]
[step 97] infos: [{'ale.lives': 0}]
[step 98] infos: [{'ale.lives': 0}]
[step 99] infos: [{'ale.live

100%|██████████| 5/5 [00:02<00:00,  1.92it/s]

[step 95] infos: [{'ale.lives': 0}]
[step 96] infos: [{'ale.lives': 0}]
[step 97] infos: [{'ale.lives': 0}]
[step 98] infos: [{'ale.lives': 0}]
[step 99] infos: [{'ale.lives': 0}]
[step 100] infos: [{'ale.lives': 0}]
Δt = 4.61s
Total Δt = 4.62s





In [0]:
trajectory_filenames = []
for r, ds, fs in os.walk(SAVE_DIR): # r=root, d=directories, f=files
    print(r + '/')
    for f in fs:
        print("|___", f)
        trajectory_filenames.append(f)

data/
|___ PongNoFrameskip-v4_PPO2_t54_1000s.npz
|___ PongNoFrameskip-v4_PPO2_t72_1000s.npz
|___ PongNoFrameskip-v4_PPO2_t40_1000s.npz
|___ BreakoutNoFrameskip-v4_PPO2_t40_1000s.npz
|___ BreakoutNoFrameskip-v4_PPO2_t35_1000s.npz
|___ BreakoutNoFrameskip-v4_PPO2_t62_1000s.npz
|___ BreakoutNoFrameskip-v4_PPO2_t52_1000s.npz
|___ BreakoutNoFrameskip-v4_PPO2_t38_1000s.npz
|___ BreakoutNoFrameskip-v4_PPO2_t89_1000s.npz
|___ PongNoFrameskip-v4_PPO2_t48_1000s.npz
|___ PongNoFrameskip-v4_PPO2_t3_1000s.npz
|___ BreakoutNoFrameskip-v4_PPO2_t29_1000s.npz
|___ BreakoutNoFrameskip-v4_PPO2_t11_1000s.npz
|___ BreakoutNoFrameskip-v4_PPO2_t48_1000s.npz
|___ PongNoFrameskip-v4_PPO2_t16_1000s.npz
|___ BreakoutNoFrameskip-v4_PPO2_t18_1000s.npz
|___ PongNoFrameskip-v4_PPO2_t21_1000s.npz
|___ BreakoutNoFrameskip-v4_PPO2_t17_1000s.npz
|___ BreakoutNoFrameskip-v4_PPO2_t53_1000s.npz
|___ PongNoFrameskip-v4_PPO2_t82_1000s.npz
|___ PongNoFrameskip-v4_PPO2_t89_1000s.npz
|___ PongNoFrameskip-v4_PPO2_t47_1000s.npz
|

In [0]:
test_trajectory_filename = trajectory_filenames[0]
print(f"Loading from '{test_trajectory_filename}'\n")

test_trajectory_load = np.load(os.path.join(SAVE_DIR, test_trajectory_filename), 
                               allow_pickle=True)

print("observations shape:", test_trajectory_load['observations'].shape)
print("actions shape:", test_trajectory_load['actions'].shape)

Loading from 'PongNoFrameskip-v4_PPO2_t54_1000s.npz'

observations shape: (1000, 84, 84, 4)
actions shape: (1000,)


In [0]:
# https://github.com/araffin/rl-baselines-zoo/blob/master/utils/record_video.py
# https://github.com/araffin/rl-baselines-zoo/blob/master/enjoy.py
# https://github.com/hill-a/stable-baselines#try-it-online-with-colab-notebooks-

## Old

In [0]:
# def save_as_image(observation, save_dir, img_name, prefix="img_", downscale=False):
#     # downscaling the image
#     if downscale:
#         im_array = cv2.resize(observation, INP_IMAGE_SHAPE) # TODO test tf.image.resize
#         im_array = np.array(im_array, dtype='float32')
#         im_array = (im_array/127.5) - 1
#         im = PIL.Image.fromarray(im_array, 'RGB')
#     else:
#         try:
#             im = PIL.Image.fromarray(observation, 'RGB')
#         except:
#             print(type(observation))
#     imname = "{}{}.png".format(prefix, img_name)
#     im.save(os.path.join(save_dir, imname))

In [0]:
# # you can change the default values here
# save_dir = SAVE_DIR
# num_images = IMAGES_TO_GENERATE

In [0]:
# os.makedirs(save_dir, exist_ok=True)

In [0]:
# envs = [gym.make(env_id) for env_id in ENV_IDS]

In [0]:
# for env_id, env in zip(ENV_IDS, envs):
#     print(env_id)
#     env_dir = os.path.join(save_dir, f"{env_id}_{IMAGES_TO_GENERATE}")
#     os.makedirs(env_dir, exist_ok=True)
    
#     env.reset()
#     i, current_env_images = 0, 0
    
#     actions_taken = []
#     while i < num_images:
#         # take a random action (sampled from the action space)
#         action = env.action_space.sample()
#         actions_taken.append(action)
#         assert 0 <= action < 18, f"action = {action}"
#         obs, _, done, _ = env.step(action)
#         if np.mean(obs) > 0.01:
#             save_as_image(obs, env_dir, str(i))
#             i += 1
#         else:
#             print("should I have been reached?")
#             continue
#         if done:
#             print(f"reseting {env_id} at i={i}")
#             env.reset()
    
#     actions_taken = np.asarray(actions_taken, dtype='int8')
#     print(actions_taken.shape, actions_taken.size, actions_taken.dtype)
#     np.save(os.path.join(save_dir, f"{env_id}_{IMAGES_TO_GENERATE}_actions"), actions_taken)

In [0]:
# IMG_SIZE = 160 # All images will be resized to 160x160

# def load_image(image_path):
#     image = tf.io.read_file(image_path)
#     image = tf.image.decode_png(image, channels=3)
#     image = tf.cast(image, tf.float32)
#     image = (image/127.5) - 1
#     image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
#     return image, image_path

# IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)

# # Create the base model from the pre-trained model MobileNet V2
# base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
#                                                include_top=False,
#                                                weights='imagenet')

# s = time()
# # Get unique images
# encode_train = img_name_vector

# # Feel free to change batch_size according to your system configuration
# image_dataset = tf.data.Dataset.from_tensor_slices(encode_train)
# image_dataset = image_dataset.map(
#   load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(16)

# print((time()-s)/1000)

# for img, path in image_dataset:
#   batch_features = image_features_extract_model(img)
#   batch_features = tf.reshape(batch_features,
#                               (batch_features.shape[0], -1, batch_features.shape[3]))

#   for bf, p in zip(batch_features, path):
#     path_of_feature = p.numpy().decode("utf-8")
#     np.save(path_of_feature, bf.numpy())