In [1]:
import ale_py
import gym
import time
import torch as T
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import kornia as K
import numpy as np
import einops
from einops.layers.torch import Rearrange
from supersuit import frame_stack_v1, resize_v0, clip_reward_v0
import cv2




In [2]:
FRAMES_TO_STACK = 4
FRAMES_TO_SKIP = 4
PAD_SIZE = 4

# TODO Add seed
# TODO Terminal on loss of life
# TODO compare Adam with LARS optimizer

X_DIM = 84
Y_DIM = 84

rng = np.random.default_rng()

In [3]:
def preprocess(img):
    img = T.from_numpy(img) / 255
    
    if len(img.shape) == 3: # if no fourth dim, batch size is missing
        img = einops.rearrange(img, 'h w c -> c h w')
        
    else:
        img = einops.rearrange(img, 'b h w c -> b c h w')
    return img

In [4]:
def plot_state(state, big=False):
    fig = plt.figure(figsize=(36,18))
    if big:
        plt.imshow(state[0][0], cmap='gray');
        return
    
    for i in range(state.shape[1]):
        fig.add_subplot(1, FRAMES_TO_STACK, i + 1)
        plt.imshow(state[0][i], cmap='gray');

In [131]:
def create_env(name='MsPacman', render=None):
    env = gym.make('ALE/' + name + '-v5',
                    obs_type='grayscale',             # ram | rgb | grayscale
                    frameskip=FRAMES_TO_SKIP,         # frame skip
                    mode=0,                           # game mode, see Machado et al. 2018
                    difficulty=0,                     # game difficulty, see Machado et al. 2018
                    repeat_action_probability=0.25,   # Sticky action probability
                    full_action_space=True,           # Use all actions
                    render_mode=render                # None | human | rgb_array
    )

    #env = clip_reward_v0(env, lower_bound=-1, upper_bound=1)
    #env = resize_v0(env, X_DIM, Y_DIM, linear_interp=True)
    env = frame_stack_v1(env, 4)
    
    return env

In [156]:
def run_episode(env):
    done = False
    s = env.reset()
    rewards = []
    states = []

    while not done:
        action = env.action_space.sample()
        s, r, done, _ = env.step(action)

        rewards.append(r)
        s = preprocess(s)
        states.append(s[0])
    
    env.close()
    return states

In [157]:
env = create_env()

In [148]:
states = run_episode(env)

In [155]:
states_to_vid(states)

In [153]:
def states_to_vid(states):
    # expects states to be a list of array that have shape (h, w)
    video = np.stack(states)
    video = einops.rearrange(video, 'b h w -> b h w 1') * 255

    num_frames, height, width, _ = video.shape

    video = np.uint8(video)

    filename = "./test_vid.mp4"
    codec_id = "mp4v"
    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
    out = cv2.VideoWriter(filename, fourcc, float(fps), (width, height), 0)


    for frame in np.split(video, num_frames, axis=0):
        out.write(frame[0])

    out.release()