<a href="https://colab.research.google.com/github/laurelkeys/machine-learning/blob/master/assignment-4/Eval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# # you might want to run this before hand
# # to get prompted a High-RAM notebook option
# d = []
# while True:
#     d.append('1')

## Install dependencies and setup Drive

In [0]:
!pip install PyDrive > /dev/null 2>&1

In [0]:
import os
import zipfile

import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from random import randrange

from gym.envs.atari.atari_env import ACTION_MEANING

from google.colab import auth
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from oauth2client.client import GoogleCredentials

In [0]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
gdrive = GoogleDrive(gauth)

## Download and unzip data

In [0]:
# Enable link sharing for the dataset .zip file in your Drive and copy the link:
# https://drive.google.com/open?id=YOUR_FILE_ID

YOUR_FILE_ID = '1OtrZycTx-VItUxzCEVo9lKaBdGk4CyIC'

In [0]:
download = gdrive.CreateFile({'id': YOUR_FILE_ID})

save_folder = "data" # /content/data
os.makedirs(save_folder, exist_ok=True)
download.GetContentFile(os.path.join(save_folder, "atari8.zip"))

In [6]:
!du -sh data/atari8.zip

1.6G	data/atari8.zip


In [0]:
zip_ref = zipfile.ZipFile("/content/data/atari8.zip", 'r')
zip_ref.extractall("/content/data/")
zip_ref.close()

In [0]:
!rm data/atari8.zip

In [11]:
!du -sh data/
!du -sh data/*

3.1G	data/
395M	data/BeamRiderNoFrameskip-v4_PPO2_1000steps
395M	data/BreakoutNoFrameskip-v4_PPO2_1000steps
395M	data/EnduroNoFrameskip-v4_PPO2_1000steps
395M	data/MsPacmanNoFrameskip-v4_PPO2_1000steps
395M	data/PongNoFrameskip-v4_PPO2_1000steps
399M	data/QbertNoFrameskip-v4_PPO2_1000steps
395M	data/SeaquestNoFrameskip-v4_PPO2_1000steps
395M	data/SpaceInvadersNoFrameskip-v4_PPO2_1000steps


In [12]:
print("================")
for game_folder in os.listdir("data"):
    print(game_folder + '/')
    data_folders = sorted([d for d in os.listdir(f"data/{game_folder}")])
    # print("├──", data_folders[0] + '/')  # 0
    # print("├── ...")
    # print("├──", data_folders[-2] + '/') # N_OF_TRAJECTORIES
    # print("└──", data_folders[-1] + '/') # 'images'
    
    images_folders = sorted([d for d in os.listdir(f"data/{game_folder}/images")])
    # print("    ├──", images_folders[0] + '/')
    # print("    ├── ...")
    # print("    └──", images_folders[-1] + '/')
    
    last_images_folder = sorted([d for d in os.listdir(f"data/{game_folder}/images/{images_folders[-1]}")])
    # print("        ├──", last_images_folder[0] + '/')
    # print("        ├── ...")
    # print("        └──", last_images_folder[-1] + '/')

    print(f"Trajectories: {int(images_folders[-1]) - int(images_folders[0]) + 1}")
    print(f"Steps per trajectory: {int(last_images_folder[-1][:-4]) - int(last_images_folder[0][:-4]) + 1}")
    print("================")

BreakoutNoFrameskip-v4_PPO2_1000steps/
Trajectories: 100
Steps per trajectory: 1000
SpaceInvadersNoFrameskip-v4_PPO2_1000steps/
Trajectories: 100
Steps per trajectory: 1000
BeamRiderNoFrameskip-v4_PPO2_1000steps/
Trajectories: 100
Steps per trajectory: 1000
QbertNoFrameskip-v4_PPO2_1000steps/
Trajectories: 100
Steps per trajectory: 1000
EnduroNoFrameskip-v4_PPO2_1000steps/
Trajectories: 100
Steps per trajectory: 1000
SeaquestNoFrameskip-v4_PPO2_1000steps/
Trajectories: 100
Steps per trajectory: 1000
PongNoFrameskip-v4_PPO2_1000steps/
Trajectories: 100
Steps per trajectory: 1000
MsPacmanNoFrameskip-v4_PPO2_1000steps/
Trajectories: 100
Steps per trajectory: 1000


## Set up the dataset

In [0]:
GAMES = sorted(os.listdir("data")) # len(GAMES) == 8

# NOTE set these values according to the downloaded dataset
N_OF_TRAJECTORIES = 100
N_OF_STEPS = 1000

FILE_NAME = "trajectory.npz"

In [14]:
# unicode strings
obs_dtypes = set()
for game_folder in os.listdir("data"):
    for data_folder in os.listdir(f"data/{game_folder}"):
        if data_folder != 'images':
            # 'actions', 'observations', 'rewards', 'episode_starts'
            data = np.load(f"data/{game_folder}/{data_folder}/{FILE_NAME}")
            obs_dtypes.add(data['observations'].dtype)
del data
obs_dtypes

{dtype('<U56'),
 dtype('<U57'),
 dtype('<U58'),
 dtype('<U60'),
 dtype('<U61'),
 dtype('<U65')}

In [0]:
# actions go from 0 to 17 (see the ACTION_MEANING dict)
all_actions = np.empty((len(GAMES), N_OF_TRAJECTORIES, N_OF_STEPS), dtype='int8')

# 'observations' stores the path to the PNG image of the state
all_obs_paths = np.empty((len(GAMES), N_OF_TRAJECTORIES, N_OF_STEPS), dtype=max(obs_dtypes))
all_obs = np.empty((len(GAMES), N_OF_TRAJECTORIES, N_OF_STEPS, 84, 84, 4), dtype='uint8')

In [16]:
for game_folder in os.listdir("data"):
    print(game_folder + '/', end=' ')
    game = GAMES.index(game_folder)
    for data_folder in os.listdir(f"data/{game_folder}"):
        if data_folder != 'images':
            # 'actions', 'observations', 'rewards', 'episode_starts'
            data = np.load(f"data/{game_folder}/{data_folder}/{FILE_NAME}")
            
            trajectory = int(data_folder)
            all_actions[game, trajectory] = data['actions']
            all_obs_paths[game, trajectory] = data['observations']
    print("done.")
del data

BreakoutNoFrameskip-v4_PPO2_1000steps/ done.
SpaceInvadersNoFrameskip-v4_PPO2_1000steps/ done.
BeamRiderNoFrameskip-v4_PPO2_1000steps/ done.
QbertNoFrameskip-v4_PPO2_1000steps/ done.
EnduroNoFrameskip-v4_PPO2_1000steps/ done.
SeaquestNoFrameskip-v4_PPO2_1000steps/ done.
PongNoFrameskip-v4_PPO2_1000steps/ done.
MsPacmanNoFrameskip-v4_PPO2_1000steps/ done.


In [17]:
from tqdm import tqdm

# this may take some time
for g in tqdm(range(len(GAMES))):
    for t in range(N_OF_TRAJECTORIES):
        for s in range(N_OF_STEPS):
            obs_path = all_obs_paths[g, t, s]
            all_obs[g, t, s] = cv2.imread(obs_path, 
                                          cv2.IMREAD_UNCHANGED) # keep the 4 channels
    tqdm.write(" done.")

 12%|█▎        | 1/8 [00:13<01:36, 13.76s/it]

 done.


 25%|██▌       | 2/8 [00:26<01:20, 13.43s/it]

 done.


 38%|███▊      | 3/8 [00:41<01:10, 14.04s/it]

 done.


 50%|█████     | 4/8 [01:01<01:02, 15.69s/it]

 done.


 62%|██████▎   | 5/8 [01:15<00:45, 15.26s/it]

 done.


 75%|███████▌  | 6/8 [01:38<00:35, 17.52s/it]

 done.


 88%|████████▊ | 7/8 [02:39<00:30, 30.64s/it]

 done.


100%|██████████| 8/8 [04:03<00:00, 46.60s/it]

 done.





In [20]:
all_obs.shape, all_actions.shape, np.unique(all_actions)

((8, 100, 1000, 84, 84, 4),
 (8, 100, 1000),
 array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17], dtype=int8))

In [0]:
dataset = (all_obs, all_actions)

In [0]:
def print_trajectory_info(x):
    # actions taken
    print("- actions:", x['actions'].shape)
    print("  actions taken:", ', '.join([ACTION_MEANING[action] for action in set(x['actions'].reshape(-1))]))
    # path to the observed images
    print("- observations:", x['observations'].shape)
    print("  path:", os.path.dirname(x['observations'][0]) + '/')
    # reward for each step
    print("- rewards:", x['rewards'].shape)
    print("  reward values:", ', '.join([str(r) for r in set(x['rewards'].reshape(-1))]))
    # `done` value returned by env.step(action)
    print("- episode_starts:", x['episode_starts'].shape)
    print("  episode starts:", [i for i, ep_start in enumerate(x['episode_starts']) if ep_start])

In [0]:
print_trajectory_info(x)

- actions: (1000,)
  actions taken: NOOP, FIRE, UP, RIGHT
- observations: (1000,)
  path: data/BreakoutNoFrameskip-v4_PPO2_1000steps/images/00/
- rewards: (1000,)
  reward values: 0.0, 1.0
- episode_starts: (1000,)
  episode starts: [0, 245]
