In [16]:
import ale_py
import gym
import time
import torch as T
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import kornia as K
import numpy as np
import einops
from einops.layers.torch import Rearrange
from supersuit import frame_stack_v1, resize_v0, clip_reward_v0
import cv2


import skimage.transform as st



In [2]:
#from pretrain.environment import create_env
from ppg.agent import Agent

Apex Optimizers not installed, defaulting to PyTorch Optimizer


In [3]:
config = {'policy_clip': 0.25,
          'kl_max': None,  # 0.05 used previously
          'kl_max_aux': None,  # stability in pretrain 0.01
          'clip_reward': True,
          'beta': 1,
          'val_coeff': 1e-2,
          'train_iterations': 1,
          'entropy_coeff': 0.01,
          'entropy_min': 0.01,
          'entropy_decay': 0.9999,
          'grad_norm': 10,
          'grad_norm_ppg': 0.5,
          'critic_lr': 1e-3,
          'actor_lr': 3e-4,  # Paper val 1e-4 while pre-Training
          'aux_freq': 32,
          'aux_iterations': 3,
          'gae_lambda': 0.95,
          'batch_size': 32,  # 512 while pretraining, 32 after
          'target_batch_size': 32,
          'use_wandb': False,
          'discount_factor': 0.99,
          'height': 84,
          'width': 84,
          'action_dim': 18,
          'contrast_lr': 1e-3,
          'temperature': 0.1,
          'frames_to_skip': 4,
          'stacked_frames': 4,
          'is_pretrain': False,
          'steps_before_repr_learning': 1600,  # Paper value
          'replay_buffer_size': 10000,
          'rollout_length': 700,
          'num_envs': 16,  # Parallel Envs
          'prefix': 'ATARI_VIZ'
          }

In [4]:
FRAMES_TO_STACK = 4
FRAMES_TO_SKIP = 4
PAD_SIZE = 4

X_DIM = 84
Y_DIM = 84

rng = np.random.default_rng()

In [5]:
def preprocess(img):
    img = T.from_numpy(img) / 255
    
    if len(img.shape) == 3: # if no fourth dim, batch size is missing
        img = einops.rearrange(img, 'h w c -> 1 c h w')
        
    else:
        img = einops.rearrange(img, 'b h w c -> b c h w')
    return img

In [6]:
def plot_state(state, big=False):
    fig = plt.figure(figsize=(36,18))
    if big:
        plt.imshow(state[0][0], cmap='gray');
        return
    
    for i in range(state.shape[1]):
        fig.add_subplot(1, FRAMES_TO_STACK, i + 1)
        plt.imshow(state[0][i], cmap='gray');

In [23]:
def create_env(name='MsPacman', render=None):
    env = gym.make('ALE/' + name + '-v5',
                    obs_type='grayscale',             # ram | rgb | grayscale
                    frameskip=FRAMES_TO_SKIP,         # frame skip
                    mode=0,                           # game mode, see Machado et al. 2018
                    difficulty=0,                     # game difficulty, see Machado et al. 2018
                    repeat_action_probability=0.25,   # Sticky action probability
                    full_action_space=True,           # Use all actions
                    render_mode=render                # None | human | rgb_array
    )

    #env = clip_reward_v0(env, lower_bound=-1, upper_bound=1)
    env = resize_v0(env, X_DIM, Y_DIM, linear_interp=True)
    env = frame_stack_v1(env, 4)
    
    return env

In [24]:
env = create_env()
agent = Agent(env, config, load=False)

In [25]:
PATH = agent.path + '/agent_latest.pt'
device = T.device('cpu')
agent.load_state_dict(T.load(PATH, map_location=device))

<All keys matched successfully>

In [34]:
def run_episode(env, agent):

    
    i = 0
    rewards = []
    
    
    while np.sum(rewards) < 2500:
        done = False
        s = env.reset()
        s = preprocess(s)
        rewards = []
        states = []
        while not done:
            action = agent.get_action(s)[0]
            s, r, done, _ = env.step(action)

            rewards.append(r)
            s = preprocess(s)
            states.append(s[0][0])

        print(i)
        i = i + 1
    
    env.close()
    return states

In [37]:
states = run_episode(env, agent)

In [30]:
def states_to_vid(states):
    # expects states to be a list of array that have shape (h, w)
    video = np.stack(states)
    video = einops.rearrange(video, 'b h w -> b h w 1') * 255

    num_frames, height, width, _ = video.shape

    video = np.uint8(video)

    filename = "./test_vid.mp4"
    codec_id = "mp4v"
    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
    fps = 15
    out = cv2.VideoWriter(filename, fourcc, float(fps), (width, height), 0)


    for frame in np.split(video, num_frames, axis=0):
        out.write(frame[0])

    out.release()

In [31]:
states_to_vid(states)