<a href="https://colab.research.google.com/github/cheliu01/RL_Super_Mario_Bros/blob/main/RL_Super_Mario_Bros.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install nes-py==0.2.6
!pip install gym-super-mario-bros
!apt-get update
!apt-get install ffmpeg libsm6 libxext6  -y
!apt install -y libgl1-mesa-glx
!pip install opencv-python

Collecting nes-py==0.2.6
  Downloading nes_py-0.2.6.tar.gz (75 kB)
[?25l[K     |████▎                           | 10 kB 14.0 MB/s eta 0:00:01[K     |████████▋                       | 20 kB 10.1 MB/s eta 0:00:01[K     |█████████████                   | 30 kB 12.1 MB/s eta 0:00:01[K     |█████████████████▎              | 40 kB 14.4 MB/s eta 0:00:01[K     |█████████████████████▋          | 51 kB 9.9 MB/s eta 0:00:01[K     |█████████████████████████▉      | 61 kB 10.2 MB/s eta 0:00:01[K     |██████████████████████████████▏ | 71 kB 7.9 MB/s eta 0:00:01[K     |████████████████████████████████| 75 kB 2.6 MB/s 
Collecting pygame>=1.9.3
  Downloading pygame-2.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[K     |████████████████████████████████| 18.3 MB 46 kB/s 
Building wheels for collected packages: nes-py
  Building wheel for nes-py (setup.py) ... [?25l[?25hdone
  Created wheel for nes-py: filename=nes_py-0.2.6-cp37-cp37m-linux_x86_64.whl size=16

In [2]:
import torch
import torch.nn as nn
import random
import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
from tqdm import tqdm
import pickle 
from gym_super_mario_bros.actions import RIGHT_ONLY
import gym
import numpy as np
import collections 
import cv2
import matplotlib.pyplot as plt

Apply 6 different transformations to enviorment:



1.   Every action the agent makes is repeated over 4 frames
2.   The size of each frame is 84x84

1.   The frames are converted to PyTorch tensors
2.   Only every fourth frame is collected by the buffer

1.   The frames are normalized so that pixels values are between 0 and 1
2.   The number of actions is reduced to 5 (such that agent can only move right)


In [None]:
class MaxandSkipEnv(gym.Wrapper):
  def __init__(self, env=None, skip=4):
    """Return only every 'skip'-th frame"""
    super(MaxandSkipEnv, self).__init__(env)
    self._obs_buffer = collections.deque(maxlen=2)
    self._skip = skip
  
  def step(self, action):
    total_reward = 0.0
    done = None
    for _ in range(self._skip):
      obs, reward, done, info = self.env.step(action)
      self._obs_buffer.append(obs)
      total_reward += total_reward
      if done:
        break
    max_frame = np.max(np.stack(self._obs_buffer), axis=0)
    return max_frame, total_reward, done, info
  
  def reset(self):
    """Clear past frame buffer and init to first ob"""
    self._obs_buffer.clear()
    obs = self.env.reset()
    self._obs_buffer.append(obs)
    return obs

class ProcessFrame84(gym.ObservationWrapper):
  """
  Downsamples image to 84x84
  Greyscales image

  Returns numpy array
  """
  def __init__(self, env=None):
    super(ProcessFrame84, self).__init__(env)
    self.observation_space = gym.spaces.BOx(low=0, high=255, shape=np.uint8)
  
  def observation(self, obs):
    return ProcessFrame84.process(obs)
  
  @staticmethod
  def process(frame):
    if frame.size == 240*256*3:
      img = np.reshape(frame, [240, 256, 3]).astype(np.float32)
    else:
      assert False, "Unknown resolution."
    img = img[:, :, 0]*0.299+img[:, :, 1]*0.587+img[:, :, 2]*0.114
    resized_screen = cv2.resize(img, (84, 110), interpolation=cv2.INTER_AREA)
    x_t = resized_screen([18:102, :])
    x_t = np.reshape(x_t, [84, 84, 1])
    return x_t.astype(np.unit8)

class ImageToPyTorch(gym.ObservationWrapper):
  def __init__(self, env):
    super(ImageToPyTorch, self).__init__(env)
    old_shape = self.observation_space.shape
    self.observation_space = gym.space.Box(low=0.0, high=1.0, shape=(old_shape[-1], old_shape[0], old_shape[1]),
                                           dtype=np.float32)
  
  def observation(self, observation):
    return np.moveaxis(observation, 2, 0)

class ScaledFloatFrame(gym.ObservationWrapper):
  """Normalized pixel values in frame --> 0 to 1"""
  def observation(self, obs):
    return np.array(obs).astype(np.float32)/255.0
class BufferWrapper(gym.ObservationWrapper):
  def __init__(self, env, n_steps, dtype=np.float32):
    super(BufferWrapper, self).__init__(env)
    self.dtype = dtype
    old_space = env.observation_space
    self.observation_space = gym.space.Box(old_space.repeat(n_steps, axis=0),
                                           old_space.high.repeat(n_steps, axis=0),
                                           dtype=dtype)
  def reset(self):
    self.buffer = np.zeros_like(self.observation_space.low, dtype=self.dtype)
    return self.observation(self.env.reset())
  
  def observation(self, observation):
    self.buffer[:-1] = self.buffer[1:]
    self.buffer[-1] = observation
    return self.buffer

def make_env(env):
  env = MaxandSkipEnv(env)
  env = ProcessFrame84(env)
  env = ImageToPyTorch(env)
  env = BufferWrapper(env, 4)
  env = ScaledFloatFrame(env)
  return JoypadSpace (env, RIGHT_ONLY)