# CityU_DataScience_Club_RL_Model

For those interested in building a gaming Reinforcement Learning model, here is a sample code.

Library versions used is compatible with Python 3.8...


## Installing OpenAI Gym and Stable Baselines


In [1]:
# # Install super mario game environment and NES emulator and OpenAI Gym interface
# # install stable baselines for RL algorithms

# %pip install gym_super_mario_bros
# %pip install nes_py
# %pip install 'stable-baselines3[extra]'  # current version

# Import os for file path management
import os

# Import the Joypad wrapper
# Requires cpp version 14 or later
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros

# Import simplified controls for AI to learn faster
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

# Import Frame Stacker Wrapper and GrayScaling Wrapper
from gym.wrappers import GrayScaleObservation

# Import  Vectorization Wrappers
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
from stable_baselines3 import PPO   # algorithm used for training the model

# Import Matplotlib to show impact of frame stacking
from matplotlib import pyplot as plt


## Setup Mario

### Import game

In [6]:
# check Actions
SIMPLE_MOVEMENT

# Setup game
env = gym_super_mario_bros.make("SuperMarioBros-v0")
env = JoypadSpace(env, SIMPLE_MOVEMENT)  # simplifies actions
env.action_space

env.observation_space
print(env.observation_space.shape)


# Create a flag- restart or not
done = True
# Loop through each frame in the game
for frame in range(100000):
    if done:
        # Start or restart the game
        env.reset()
    # Pass an action for the game randomly: left, right, etc. from SIMPLE_MOVEMENT
    observation, reward, terminated, truncated, info = env.step(env.action_space.sample())
    # Show the game on the screen
    env.render()
# Closes the game
env.close()


(240, 256, 3)


## Pre-process Environment


GrayScaleObservation - converts colored frames into grayscale to cut down on computation time and making the training faster

VecFrameStack - capture several frames while playing the game, vectorize and stack them together. AI will be able to see the actions taken for those frames and learn from them

DummyVecEnv - wrap stacked frames with the dummy vector environment created for stable baseline and train agent in multiple environments at the same time


In [None]:
# Setup the base environment
env = gym_super_mario_bros.make("SuperMarioBros-v0")  # standard version
# Simplify the controls or actions
env = JoypadSpace(env, SIMPLE_MOVEMENT)

state = env.reset()
shp = state.shape

# Show original game frame
plt.imshow(state)

# Convert to gray scale to minimize data for preprocessing
env = GrayScaleObservation(
    env, keep_dim=True
)  # keep dimension to be able to apply stacking

# Show game frame after gray scale conversion
plt.imshow(state)
shp

# Wrap inside the Dummy environment
env = DummyVecEnv([lambda: env])
shp

# Show game frame after dummy environment vectorization
plt.imshow(state[0])

# Stack 4 different frames or images together and apply channel order to last.
env = VecFrameStack(env, 4, channels_order="last")
state = env.reset()
shp

# Show game frame after stacking frames
plt.imshow(state[0])

state, reward, done, info = env.step([env.action_space.sample()])

# Show game frame after stacking
plt.figure(figsize=(20, 16))
for i in range(state.shape[3]):
    plt.subplot(1, 4, i + 1)
    plt.imshow(state[0][:, :, i])
plt.show()
state = env.reset()

SIMPLE_MOVEMENT

# run several times to show mario jumping
state, reward, done, info = env.step([5])

# Show game frame after stacking frames
plt.figure(figsize=(20, 16)) 
for i in range(state.shape[3]):
    plt.subplot(1, 4, i + 1)
    plt.imshow(state[0][:, :, i])
plt.show()

# Train the model


In [None]:
# Save training parameters, results, and logs
log_path = os.path.join("Training", "Logs")

# Create a Reinforcement Learning AI model
model = PPO(
    "CnnPolicy",
    env,
    verbose=1,
    tensorboard_log=log_path,
    learning_rate=0.000001,
    n_steps=512,
)
model.learn(
    total_timesteps=10000
)  # change to higher value for better results - may take hours without GPU

### Save the model


In [None]:
model.save("mario_model_10000")

### Load the model


In [None]:
model = PPO.load("mario_model_10000")

### Test the model


In [None]:
state = env.reset()
while True:
    actions, _ = model.predict(state)
    state, reward, done, info = env.step(actions)
    env.render()
