# 1. Setup

## 1.1. Imports

In [6]:
# import the game
import gym_super_mario_bros

# import the joypad wrapper
from nes_py.wrappers import JoypadSpace

# import the simplified controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

## 1.2. Setting up our game

In [8]:
# Setup game environment
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT) # wrap the environment with simplified controls

## 1.3. Testing the game

In [10]:
# Create a flag - tells the game that we need to start the game on
done = True
# loop through each frame in the game
for step in range(100000):
    # start the game if the flag is true
    if done:
        # start the game and get the initial state
        reset_out = env.reset()
        state = reset_out[0] if isinstance(reset_out, tuple) else reset_out # gymnasium vs gym compatibility
        
    # take a random action
    step_out = env.step(env.action_space.sample()) # env.step allows to take an action in the game, in this case randomnly through sample method - check cell below
    
    # unpack the outputs
    if len(step_out) == 5:  # gymnasium vs gym compatibility
        state, reward, done, truncated, info = step_out
        done = done or truncated
    else:
        state, reward, done, info = step_out

    # render the game
    env.render()
env.close()

  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):
If you want to render in human mode, initialize the environment in this way: gym.make('EnvName', render_mode='human') and don't call the render method.
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.warn(
  return (self.ram[0x86] - self.ram[0x071c]) % 256


KeyboardInterrupt: 

## 1.4. Annex code for interpretation

In [None]:
# --- IGNORE ---
# Just to verify everything is set up correctly
SIMPLE_MOVEMENT

[['NOOP'],
 ['right'],
 ['right', 'A'],
 ['right', 'B'],
 ['right', 'A', 'B'],
 ['A'],
 ['left']]

In [None]:
# --- IGNORE ---
# check a random action from the 7 available actions in
SIMPLE_MOVEMENT[env.action_space.sample()]

['right']

In [None]:
# --- IGNORE ---
# Just to verify everything is set up correctly
print(f'The number of actions is: {env.action_space}') # check the number of actions after wrapping
print(f'The observation space shape is: {env.observation_space.shape}') # check the observation space shape after wrapping

In [None]:
# --- IGNORE ---
# Check the shape of the observation space
state = env.reset()
state.shape # here we can see the shape of the observation space - in this case a (240, 256, 3) array representing the RGB image of the game screen

(240, 256, 3)

In [None]:
# --- IGNORE ---
# Just to verify what's inside the outputs of env.step method 
len(env.step(1)) # check the number of outputs from env.step method
# 4 outputs in gym: state, reward, done, info
# 5 outputs in gymnasium: state, reward, done, truncated, info

# first output is the state
print(env.step(1)[0]) # state

# second output is the reward
print(env.step(1)[1]) # reward - in this case 0.0 because Mario got stuck in a pipe

# third output is the done flag
print(env.step(1)[2]) # done - in this case False because the game is not over (not dead)

# fourth output is the info dictionary
print(env.step(1)[3]) # info

[[[104 136 252]
  [104 136 252]
  [104 136 252]
  ...
  [104 136 252]
  [104 136 252]
  [104 136 252]]

 [[104 136 252]
  [104 136 252]
  [104 136 252]
  ...
  [104 136 252]
  [104 136 252]
  [104 136 252]]

 [[104 136 252]
  [104 136 252]
  [104 136 252]
  ...
  [104 136 252]
  [104 136 252]
  [104 136 252]]

 ...

 [[240 208 176]
  [228  92  16]
  [228  92  16]
  ...
  [228  92  16]
  [228  92  16]
  [  0   0   0]]

 [[240 208 176]
  [228  92  16]
  [228  92  16]
  ...
  [228  92  16]
  [  0   0   0]
  [  0   0   0]]

 [[228  92  16]
  [  0   0   0]
  [  0   0   0]
  ...
  [  0   0   0]
  [  0   0   0]
  [228  92  16]]]
0.0
False
{'coins': 0, 'flag_get': False, 'life': 2, 'score': 0, 'stage': 1, 'status': 'small', 'time': 400, 'world': 1, 'x_pos': 41, 'x_pos_screen': 41, 'y_pos': 79}


# 2. Preprocess Environment

We need to preprocess our Mario game data before we run any algorithm on it. In this case we'll be applying two preprocessing steps:
- Grayscaling: Colored images have tripple the data to process (RGB layers)
- Framestacking: Helps our algorithm to have context

In [None]:
# Import Frame Stack wrapper and GrayScaling wrapper
from gym.wrappers import GrayScaleObservation, FrameStack
# Import Vectorization wrapper
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
# Import matplotlib for plotting
import matplotlib.pyplot as plt