<a href="https://colab.research.google.com/github/laurelkeys/machine-learning/blob/master/assignment-4/Trajectories.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import os

# from google.colab import drive
# drive.mount('/content/drive', force_remount=True)
# PATH_TO_DATA = os.path.join("drive", "My Drive", "unicamp", "MC886", "atari")

PATH_TO_DATA = ""

In [24]:
SAVE_DIR = os.path.join(PATH_TO_DATA, "data")
os.makedirs(SAVE_DIR, exist_ok=True)

SAVE_DIR # where the trajectories for each game will be saved to

'data'

In [25]:
LOG_DIR = os.path.join(PATH_TO_DATA, "data", "results")
os.makedirs(LOG_DIR, exist_ok=True)

LOG_DIR # where the stats for each game will be saved to

'data/results'

In [0]:
# number of trajectories to generate
N_OF_TRAJECTORIES = 5

# number of steps per trajectory
N_OF_STEPS = 1000

# list of string tuples in the format (RL Algorithm, Game Environment)
GAMES = [
    ("PPO2", "BreakoutNoFrameskip-v4"),
    ("PPO2", "PongNoFrameskip-v4"),
]

## Install dependencies
Note that we're not installing [MPI](https://mpi4py.readthedocs.io/en/stable/), so these algorithms will probably not work: `DDPG`, `GAIL`, `PPO1`, `TRPO`.

In [0]:
!apt-get update                                                  > /dev/null 2>&1
!apt-get install swig cmake zlib1g-dev ffmpeg freeglut3-dev xvfb > /dev/null 2>&1
!pip install pytablewriter                                       > /dev/null 2>&1

In [28]:
#### Stable Baselines only supports TF 1.x for now ####
try:
    # Colab only
    %tensorflow_version 1.x
except Exception:
    pass

import tensorflow as tf
from tensorflow import keras
print(tf.__version__)

1.15.0


In [0]:
import os
from time import time
from IPython.display import clear_output

import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# NOTE use tqdm.write() instead of print() inside of tqdm wrapped loops
from tqdm import tqdm

import gym
from gym.envs.atari.atari_env import ACTION_MEANING

### Update [Stable Baselines](https://github.com/hill-a/stable-baselines) and clone [RL Zoo Baselines](https://github.com/araffin/rl-baselines-zoo)

In [0]:
!yes | pip uninstall stable-baselines                           > /dev/null 2>&1
!pip install git+https://github.com/hill-a/stable-baselines.git > /dev/null 2>&1

In [0]:
from stable_baselines.common.cmd_util import make_atari_env
from stable_baselines.common.vec_env import VecFrameStack

# HACK to save logs
from stable_baselines import logger
os.environ["OPENAI_LOG_FORMAT"] = 'stdout,log,csv,tensorboard'
os.environ["OPENAI_LOGDIR"] = os.path.abspath(LOG_DIR)
logger.configure(folder=os.path.abspath(LOG_DIR), format_strs=['stdout', 'log', 'csv', 'tensorboard'])

# NOTE add more algorithms here if you want to use them
from stable_baselines import PPO2, ACER, ACKTR
ALGO_IMPL = {
    'PPO2': PPO2,
    'ACER': ACER,
    'ACKTR': ACKTR,
}

In [0]:
!git clone https://github.com/araffin/rl-baselines-zoo.git > /dev/null 2>&1

## Load pre-trained agents

In [0]:
PATH_TO_AGENTS = os.path.join("rl-baselines-zoo", "trained_agents")
!ls rl-baselines-zoo/trained_agents/

In [0]:
ext = "NoFrameskip-v4.pkl"
# check the available pre-trained models
algorithms = ["PPO2"]
for algo in algorithms:
    algo_path = os.path.join(PATH_TO_AGENTS, algo.lower())
    print(algo_path + '/')
    for f in sorted(os.listdir(algo_path), key=lambda x: x[::-1]):
        # sort by the reversed filename, so env types get grouped together
        if f.endswith(ext):
            print("├──", f)

In [0]:
for i in range(2):
    clear_output() # HACK to remove TensorFlow warnings
    for algo, env_id in GAMES:
        print(f"('{algo}', '{env_id}')")
        agent_path = os.path.join(PATH_TO_AGENTS, algo.lower(), env_id + '.pkl')
        model = ALGO_IMPL[algo].load(agent_path, verbose=0)
        print("observation_space:", model.observation_space)
        print("action_space:", model.action_space)
        print()

## Monitor vectorized environments

In [0]:
import csv
import json
from collections import deque

from stable_baselines.common.vec_env import VecEnvWrapper

class VecMonitor(VecEnvWrapper):
    EXT = "monitor.csv"
    
    def __init__(self, venv, filename=None, keep_buf=0, info_keywords=()):
        VecEnvWrapper.__init__(self, venv)
        self.eprets = None
        self.eplens = None
        self.epcount = 0
        self.tstart = time()
        if filename:
            if logger.get_dir() is not None and os.path.isdir(logger.get_dir()):
                filename = os.path.join(logger.get_dir(), filename)
            self.results_writer = ResultsWriter(filename, 
                                                header={'t_start': self.tstart},
                                                extra_keys=info_keywords)
        else:
            self.results_writer = None
        self.info_keywords = info_keywords
        self.keep_buf = keep_buf
        if self.keep_buf:
            self.epret_buf = deque([], maxlen=keep_buf)
            self.eplen_buf = deque([], maxlen=keep_buf)

    def reset(self):
        obs = self.venv.reset()
        self.eprets = np.zeros(self.num_envs, 'f')
        self.eplens = np.zeros(self.num_envs, 'i')
        return obs

    def step_wait(self):
        obs, rews, dones, infos = self.venv.step_wait()
        self.eprets += rews
        self.eplens += 1

        newinfos = list(infos[:])
        for i in range(len(dones)):
            if dones[i]:
                info = infos[i].copy()
                ret = self.eprets[i]
                eplen = self.eplens[i]
                epinfo = {'r': ret, 'l': eplen, 't': round(time() - self.tstart, 6)}
                for k in self.info_keywords:
                    try:
                        epinfo[k] = info[k]
                    except:
                        pass # HACK to log 'episode' info for Atari envs
                info['episode'] = epinfo
                if self.keep_buf:
                    self.epret_buf.append(ret)
                    self.eplen_buf.append(eplen)
                self.epcount += 1
                self.eprets[i] = 0
                self.eplens[i] = 0
                if self.results_writer:
                    self.results_writer.write_row(epinfo)
                newinfos[i] = info
        return obs, rews, dones, newinfos
        
class ResultsWriter(object):
    def __init__(self, filename, header='', extra_keys=()):
        self.extra_keys = extra_keys
        assert filename is not None
        if not filename.endswith(VecMonitor.EXT):
            os.makedirs(filename, exist_ok=True)
            filename = os.path.join(filename, VecMonitor.EXT)
            # if os.path.isdir(filename):
            #     filename = os.path.join(filename, VecMonitor.EXT)
            # else:
            #     filename = f"{filename}.{VecMonitor.EXT}"
        self.f = open(filename, "wt")
        if isinstance(header, dict):
            header = '# {} \n'.format(json.dumps(header))
        self.f.write(header)
        self.logger = csv.DictWriter(self.f, fieldnames=('r', 'l', 't')+tuple(extra_keys))
        self.logger.writeheader()
        self.f.flush()

    def write_row(self, epinfo):
        if self.logger:
            self.logger.writerow(epinfo)
            self.f.flush()        

## Generate trajectories

In [0]:
VERBOSE = 2 # 0, 1 or 2

print("N_OF_STEPS:", N_OF_STEPS)
print("N_OF_TRAJECTORIES:", N_OF_TRAJECTORIES)
print(N_OF_STEPS, "*", N_OF_TRAJECTORIES, "=", N_OF_STEPS * N_OF_TRAJECTORIES)

In [0]:
# set to False to save observations as PNG and store their location path, 
# instead of saving them as numpy arrays (which end up taking more space)
SAVE_IMAGES_AS_NUMPY_ARRAYS = False

PRINT_EARLY_DONE = False # print env resets in a trajectory
PRINT_ACTIONS_TAKEN = True # print the meanings of actions
PRINT_ATARI_EPISODE = False # print 'episode' if available

# set to N_OF_TRAJECTORIES + 1 not to print
PRINT_EVERY_N_TRAJECTORIES = N_OF_TRAJECTORIES // 10

In [0]:
print("N_OF_STEPS:", N_OF_STEPS)
print("N_OF_TRAJECTORIES:", N_OF_TRAJECTORIES)
print("PRINT_EVERY_N_TRAJECTORIES:", PRINT_EVERY_N_TRAJECTORIES)

time_start = time()
print("\n================")

for algo, env_id in GAMES:
    time_start_env = time()
    
    # setup paths where data will be saved to
    dataset_folder = f"{env_id}_{algo}_{N_OF_STEPS}steps"
    if not SAVE_IMAGES_AS_NUMPY_ARRAYS:
        images_folder = os.path.join(SAVE_DIR, dataset_folder, "images")
        os.makedirs(images_folder, exist_ok=True)
        if VERBOSE > 0:
            print(f"Images will be recorded to '{images_folder}/'\n")

    env = make_atari_env(env_id, num_env=1, seed=0)
    env = VecFrameStack(env, n_stack=4) # Frame-stacking with 4 frames
    env = VecMonitor(env, f"{algo}-{env_id}", info_keywords=('episode',))
    agent_path = os.path.join(PATH_TO_AGENTS, algo.lower(), env_id + '.pkl')

    print(f"('{algo}', '{env_id}')")
    print(f"Getting pre-trained agent from: '{agent_path}'\n")

    model = ALGO_IMPL[algo].load(agent_path, env)
    
    for trajectory in tqdm(range(N_OF_TRAJECTORIES), position=0, leave=True):
        # store the "obs -> action" mapping
        observed_states, actions_taken = [], []
        
        # episode stats
        ep_rewards, ep_starts = np.zeros((N_OF_STEPS,)), [True]
        
        # NOTE action, obs, reward, done and info are 
        #      arrays as we're using a vectorized env
        
        obs = env.reset() # (84, 84, 4)
        for step in range(N_OF_STEPS):
            observed_states.append(obs[0])
            action, _ = model.predict(obs)
            actions_taken.append(action[0])
            obs, reward, done, info = env.step(action)
            ep_starts.append(done[0])
            ep_rewards[step] = reward[0]
            if info is not None and PRINT_ATARI_EPISODE:
                ep_info = info[0].get('episode')
                if ep_info is not None:
                    tqdm.write("\n ****************")
                    tqdm.write(f" Atari Episode Score: {ep_info['r']:.2f}")
                    tqdm.write(f" Atari Episode Length: {ep_info['l']}")
                    tqdm.write(" ****************")
            if done[0]:
                obs = env.reset()
                if PRINT_EARLY_DONE:
                    tqdm.write(f" Done at step {step} (reseting env)")
        
        # left pad the trajectory number with 0's
        trajectory_number = str(trajectory).zfill(len(str(N_OF_TRAJECTORIES - 1)))

        if not SAVE_IMAGES_AS_NUMPY_ARRAYS:
            _observed_states = [] # store image paths
            for step, obs in enumerate(observed_states):
                image_folder = os.path.join(images_folder, trajectory_number)
                os.makedirs(image_folder, exist_ok=True)
                image_path = os.path.join(image_folder, f"{str(step).zfill(len(str(N_OF_STEPS - 1)))}.png")
                if obs.shape[-1] == 3:
                    obs = cv2.cvtColor(obs, cv2.COLOR_RGB2BGR)
                cv2.imwrite(image_path, obs) #, [cv2.IMWRITE_PNG_COMPRESSION, 9])
                _observed_states.append(image_path)
            observed_states = np.array(_observed_states)
        else:
            observed_states = np.concatenate(observed_states).reshape((-1,) + env.observation_space.shape)

        actions_taken = np.array(actions_taken)
        ep_starts = np.array(ep_starts[:-1])

        data = {
            'actions': actions_taken,
            'observations': observed_states,
            'rewards': ep_rewards,
            'episode_starts': ep_starts
        } # type: Dict[str, np.ndarray]

        save_folder = os.path.join(SAVE_DIR, dataset_folder, trajectory_number)
        os.makedirs(save_folder, exist_ok=True)
        np.savez_compressed(file=os.path.join(save_folder, "trajectory"), **data)
        
        if (trajectory + 1) % 10 == 0:
            tqdm.write(f" Saved trajectory {trajectory + 1} (of {N_OF_TRAJECTORIES})")
            if VERBOSE > 1:
                tqdm.write(f" Mean reward: {np.mean(ep_rewards):.2f}, ep_rewards.shape == {ep_rewards.shape}")

        if PRINT_ACTIONS_TAKEN and trajectory == N_OF_TRAJECTORIES - 1:
            tqdm.write(f"\n Actions taken: {', '.join([ACTION_MEANING[action] for action in set(actions_taken)])}")
    
    env.close()
    print(f" Δt = {(time() - time_start_env):.2f}s")
    print("================\n")

print(f"Total Δt = {(time() - time_start):.2f}s")

## Load results

In [0]:
from stable_baselines.bench.monitor import get_monitor_files
from stable_baselines.results_plotter import load_results, ts2xy

!ls /content/data/results/

In [0]:
def monitor_info(monitor_file_path, print_info=True):
    results = load_results(monitor_file_path)
    x, y = ts2xy(results, 'timesteps')
    if print_info:
        # print("x:", x) # x == results['l']
        # print("y:", y) # y == results['r']
        print(f"Mean reward: {np.mean(y):.2f} +- {np.std(y):.2f}")
        print(f"Number of timesteps: {x[-1]}")
        print(f"Number of episodes: {len(y)}") # trajectories
    return results

In [0]:
monitor_info(LOG_DIR) # data/results/0.monitor.csv

In [0]:
for algo, env_id in GAMES:
    monitor_folder_path = os.path.join(LOG_DIR, f"{algo}-{env_id}")
    print(get_monitor_files(monitor_folder_path))
    monitor_info(monitor_folder_path)

## Zip data

In [0]:
!ls /content/data/

In [0]:
!zip -r /content/data.zip /content/data > /dev/null 2>&1

In [0]:
!ls -la | grep .zip
!ls -lh | grep .zip

In [0]:
from google.colab import files

In [0]:
# files.download("/content/data.zip")

In [0]:
# larger files may not work, try looking at:
# https://stackoverflow.com/questions/49428332/how-to-download-large-files-like-weights-of-a-model-from-colaboratory

In [0]:
downloaded_data = False

try:
    files.download("/content/data.zip")
    downloaded_data = True
except:
    from google.colab import auth
    from googleapiclient.http import MediaFileUpload
    from googleapiclient.discovery import build

    auth.authenticate_user()

In [0]:
if not downloaded_data:
    