<a href="https://colab.research.google.com/github/laurelkeys/machine-learning/blob/master/assignment-4/Trajectories.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import os

# from google.colab import drive
# drive.mount('/content/drive', force_remount=True)
# PATH_TO_DATA = os.path.join("drive", "My Drive", "unicamp", "MC886", "atari")

PATH_TO_DATA = ""

In [29]:
SAVE_DIR = os.path.join(PATH_TO_DATA, "data")
os.makedirs(SAVE_DIR, exist_ok=True)

SAVE_DIR # where the trajectories for each game will be saved to

'data'

In [30]:
LOG_DIR = os.path.join(PATH_TO_DATA, "data", "results")
os.makedirs(LOG_DIR, exist_ok=True)

LOG_DIR # where the stats for each game will be saved to

'data/results'

In [0]:
# number of trajectories to generate
N_OF_TRAJECTORIES = 45

# number of steps per trajectory
N_OF_STEPS = 1000

# list of string tuples in the format (RL Algorithm, Game Environment)
GAMES = [
    ("PPO2", "BreakoutNoFrameskip-v4"),
    ("PPO2", "PongNoFrameskip-v4"),
]

## Install dependencies
Note that we're not installing [MPI](https://mpi4py.readthedocs.io/en/stable/), so these algorithms will probably not work: `DDPG`, `GAIL`, `PPO1`, `TRPO`.

In [0]:
!apt-get update                                                  > /dev/null 2>&1
!apt-get install swig cmake zlib1g-dev ffmpeg freeglut3-dev xvfb > /dev/null 2>&1
!pip install pytablewriter                                       > /dev/null 2>&1

In [33]:
#### Stable Baselines only supports TF 1.x for now ####
try:
    # Colab only
    %tensorflow_version 1.x
except Exception:
    pass

import tensorflow as tf
from tensorflow import keras
print(tf.__version__)

1.15.0


In [0]:
import os
from time import time
from IPython.display import clear_output

import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# NOTE use tqdm.write() instead of print() inside of tqdm wrapped loops
from tqdm import tqdm

import gym
from gym.envs.atari.atari_env import ACTION_MEANING

### Update [Stable Baselines](https://github.com/hill-a/stable-baselines) and clone [RL Zoo Baselines](https://github.com/araffin/rl-baselines-zoo)

In [0]:
!yes | pip uninstall stable-baselines                           > /dev/null 2>&1
!pip install git+https://github.com/hill-a/stable-baselines.git > /dev/null 2>&1

In [36]:
from stable_baselines.common.cmd_util import make_atari_env
from stable_baselines.common.vec_env import VecFrameStack

# HACK to save logs
from stable_baselines import logger
os.environ["OPENAI_LOG_FORMAT"] = 'stdout,log,csv,tensorboard'
os.environ["OPENAI_LOGDIR"] = os.path.abspath(LOG_DIR)
logger.configure()

# NOTE add more algorithms here if you want to use them
from stable_baselines import PPO2, ACER, ACKTR
ALGO_IMPL = {
    'PPO2': PPO2,
    'ACER': ACER,
    'ACKTR': ACKTR,
}

Logging to /content/data/results


In [0]:
!git clone https://github.com/araffin/rl-baselines-zoo.git > /dev/null 2>&1

## Load pre-trained agents

In [38]:
PATH_TO_AGENTS = os.path.join("rl-baselines-zoo", "trained_agents")
!ls rl-baselines-zoo/trained_agents/

a2c  acer  acktr  ddpg	dqn  her  ppo2	sac  td3  trpo


In [39]:
ext = "NoFrameskip-v4.pkl"
# check the available pre-trained models
algorithms = ["PPO2"]
for algo in algorithms:
    algo_path = os.path.join(PATH_TO_AGENTS, algo.lower())
    print(algo_path + '/')
    for f in sorted(os.listdir(algo_path), key=lambda x: x[::-1]):
        # sort by the reverse filename, so env types get grouped together
        if f.endswith(ext):
            print("├──", f)

rl-baselines-zoo/trained_agents/ppo2/
├── PongNoFrameskip-v4.pkl
├── MsPacmanNoFrameskip-v4.pkl
├── EnduroNoFrameskip-v4.pkl
├── BeamRiderNoFrameskip-v4.pkl
├── SpaceInvadersNoFrameskip-v4.pkl
├── QbertNoFrameskip-v4.pkl
├── SeaquestNoFrameskip-v4.pkl
├── BreakoutNoFrameskip-v4.pkl


In [40]:
for i in range(2):
    clear_output() # HACK to remove TensorFlow warnings
    for algo, env_id in GAMES:
        print(f"('{algo}', '{env_id}')")
        agent_path = os.path.join(PATH_TO_AGENTS, algo.lower(), env_id + '.pkl')
        model = ALGO_IMPL[algo].load(agent_path, verbose=0)
        print("observation_space:", model.observation_space)
        print("action_space:", model.action_space)
        print()

('PPO2', 'BreakoutNoFrameskip-v4')
observation_space: Box(84, 84, 4)
action_space: Discrete(4)

('PPO2', 'PongNoFrameskip-v4')
observation_space: Box(84, 84, 4)
action_space: Discrete(6)



## Generate trajectories

In [41]:
VERBOSE = 2 # 0, 1 or 2

print("N_OF_STEPS:", N_OF_STEPS)
print("N_OF_TRAJECTORIES:", N_OF_TRAJECTORIES)
print(N_OF_STEPS, "*", N_OF_TRAJECTORIES, "=", N_OF_STEPS * N_OF_TRAJECTORIES)

N_OF_STEPS: 1000
N_OF_TRAJECTORIES: 45
1000 * 45 = 45000


In [0]:
IMAGES_FOLDER = "images" # name of the folder in which to save the observations
FILE_NAME = "trajectory" # name of the .npz trajectory file

# set to False to save observations as PNG and store their location path, 
# instead of saving them as numpy arrays (which end up taking more space)
SAVE_IMAGES_AS_NUMPY_ARRAYS = False

PRINT_EARLY_DONE = False # print env resets in a trajectory
PRINT_ACTIONS_TAKEN = True # print the meanings of actions

# set to N_OF_TRAJECTORIES + 1 not to print
PRINT_EVERY_N_TRAJECTORIES = N_OF_TRAJECTORIES // 10

In [48]:
print("N_OF_STEPS:", N_OF_STEPS)
print("N_OF_TRAJECTORIES:", N_OF_TRAJECTORIES)
print("PRINT_EVERY_N_TRAJECTORIES:", PRINT_EVERY_N_TRAJECTORIES)

time_start = time()
print("\n================")

for algo, env_id in GAMES:
    time_start_env = time()

    env = make_atari_env(env_id, num_env=1, seed=0)
    env = VecFrameStack(env, n_stack=4) # Frame-stacking with 4 frames
    agent_path = os.path.join(PATH_TO_AGENTS, algo.lower(), env_id + '.pkl')

    print(f"('{algo}', '{env_id}')")
    print(f"Getting pre-trained agent from: '{agent_path}'\n")
    
    # setup paths where data will be saved to
    dataset_folder = f"{env_id}_{algo}_{N_OF_STEPS}steps"
    if not SAVE_IMAGES_AS_NUMPY_ARRAYS:
        images_folder = os.path.join(SAVE_DIR, dataset_folder, IMAGES_FOLDER)
        os.makedirs(images_folder, exist_ok=True)
        if VERBOSE > 0:
            print(f"Images will be recorded to '{images_folder}/'\n")

    model = ALGO_IMPL[algo].load(agent_path, env)
    
    for trajectory in tqdm(range(N_OF_TRAJECTORIES), position=0, leave=True):
        # store the "obs -> action" mapping
        observed_states, actions_taken = [], []
        
        # episode stats
        ep_rewards, ep_starts = np.zeros((N_OF_STEPS,)), [True]
        
        # NOTE action, obs, reward, done and info are 
        #      arrays as we're using a vectorized env
        
        obs = env.reset() # (84, 84, 4)
        for step in range(N_OF_STEPS):
            observed_states.append(obs[0])
            action, _ = model.predict(obs)
            actions_taken.append(action[0])
            obs, reward, done, info = env.step(action)
            ep_starts.append(done[0])
            ep_rewards[step] = reward[0]
            if done[0]:
                obs = env.reset()
                if PRINT_EARLY_DONE:
                    tqdm.write(f" Done at step {step} (reseting env)")
        
        # left pad the trajectory number with 0's
        trajectory_number = str(trajectory).zfill(len(str(N_OF_TRAJECTORIES - 1)))

        if not SAVE_IMAGES_AS_NUMPY_ARRAYS:
            _observed_states = [] # store image paths
            for step, obs in enumerate(observed_states):
                image_path = os.path.join(images_folder, trajectory_number, f"{str(step).zfill(len(str(N_OF_STEPS)))}.png")
                os.makedirs(image_path, exist_ok=True)
                if obs.shape[-1] == 3:
                    obs = cv2.cvtColor(obs, cv2.COLOR_RGB2BGR)
                cv2.imwrite(image_path, obs) # , [cv2.IMWRITE_PNG_COMPRESSION, 9])
                _observed_states.append(image_path)
            observed_states = np.array(_observed_states)
        else:
            observed_states = np.concatenate(observed_states).reshape((-1,) + env.observation_space.shape)

        actions_taken = np.array(actions_taken)
        ep_starts = np.array(ep_starts[:-1])

        data = {
            'actions': actions_taken,
            'observations': observed_states,
            'rewards': ep_rewards,
            'episode_starts': ep_starts
        } # type: Dict[str, np.ndarray]

        save_path = os.path.join(SAVE_DIR, dataset_folder, trajectory_number, FILE_NAME)
        os.makedirs(save_path, exist_ok=True)
        np.savez_compressed(file=save_path, **data)
        
        if (trajectory + 1) % 10 == 0:
            tqdm.write(f" Saved trajectory {trajectory + 1} (of {N_OF_TRAJECTORIES})")
            if VERBOSE > 1:
                tqdm.write(f" Mean reward: {np.mean(ep_rewards):.2f}, ep_rewards.shape == {ep_rewards.shape}")

        if PRINT_ACTIONS_TAKEN and trajectory == N_OF_TRAJECTORIES - 1:
            tqdm.write(f"\n Actions taken: {', '.join([ACTION_MEANING[action] for action in set(actions_taken)])}")
    
    env.close()
    print(f" Δt = {(time() - time_start_env):.2f}s")
    print("================\n")

print(f"Total Δt = {(time() - time_start):.2f}s")

N_OF_STEPS: 1000
N_OF_TRAJECTORIES: 45
PRINT_EVERY_N_TRAJECTORIES: 4

('PPO2', 'BreakoutNoFrameskip-v4')
Getting pre-trained agent from: 'rl-baselines-zoo/trained_agents/ppo2/BreakoutNoFrameskip-v4.pkl'

Images will be recorded to 'data/BreakoutNoFrameskip-v4_PPO2_1000steps/images/'



 22%|██▏       | 10/45 [00:45<02:38,  4.54s/it]

 Saved trajectory 10 (of 45)
 Mean reward: 0.03, ep_rewards.shape == (1000,)


 44%|████▍     | 20/45 [01:31<01:52,  4.51s/it]

 Saved trajectory 20 (of 45)
 Mean reward: 0.06, ep_rewards.shape == (1000,)


 67%|██████▋   | 30/45 [02:16<01:06,  4.44s/it]

 Saved trajectory 30 (of 45)
 Mean reward: 0.06, ep_rewards.shape == (1000,)


 89%|████████▉ | 40/45 [03:01<00:22,  4.54s/it]

 Saved trajectory 40 (of 45)
 Mean reward: 0.05, ep_rewards.shape == (1000,)


100%|██████████| 45/45 [03:24<00:00,  4.60s/it]



 Actions taken: NOOP, FIRE, UP, RIGHT
 Δt = 206.14s

('PPO2', 'PongNoFrameskip-v4')
Getting pre-trained agent from: 'rl-baselines-zoo/trained_agents/ppo2/PongNoFrameskip-v4.pkl'

Images will be recorded to 'data/PongNoFrameskip-v4_PPO2_1000steps/images/'



 22%|██▏       | 10/45 [00:46<02:45,  4.72s/it]

 Saved trajectory 10 (of 45)
 Mean reward: 0.01, ep_rewards.shape == (1000,)


 44%|████▍     | 20/45 [01:37<02:04,  4.96s/it]

 Saved trajectory 20 (of 45)
 Mean reward: 0.01, ep_rewards.shape == (1000,)


 67%|██████▋   | 30/45 [02:35<01:28,  5.88s/it]

 Saved trajectory 30 (of 45)
 Mean reward: 0.01, ep_rewards.shape == (1000,)


 89%|████████▉ | 40/45 [03:32<00:26,  5.38s/it]

 Saved trajectory 40 (of 45)
 Mean reward: 0.01, ep_rewards.shape == (1000,)


100%|██████████| 45/45 [04:00<00:00,  5.51s/it]


 Actions taken: NOOP, FIRE, UP, RIGHT, LEFT, DOWN
 Δt = 242.02s

Total Δt = 448.16s





In [49]:
!ls /content/data

BreakoutNoFrameskip-v4_PPO2_1000steps  results
PongNoFrameskip-v4_PPO2_1000steps


In [0]:
!zip -r /content/data.zip /content/data > /dev/null 2>&1

In [60]:
!ls -la | grep .zip
!ls -lh | grep .zip

-rw-r--r--  1 root root 24072140 Dec  3 02:23 data.zip
-rw-r--r--  1 root root  23M Dec  3 02:23 data.zip


In [0]:
from google.colab import files

In [0]:
files.download("/content/data.zip")

In [0]:
# larger files may not work, try looking at:
# https://stackoverflow.com/questions/49428332/how-to-download-large-files-like-weights-of-a-model-from-colaboratory