1. SetUp SuperMario

In [1]:
'''SetUp of Super Mario'''

'''

        Libraries installed
==================================
1. pip install gym-super-mario-bros  => ("Allows us to play Mario in Python") {https://pypi.org/project/gym-super-mario-bros/}
2. pip install nes-py {https://pypi.org/project/nes-py/}
'''



In [2]:
'''Importing Dependencies'''
# 1. Import Super Mario
# 2. Import the Joypad Wrapper ("Wraps the Movement of our model ==> Joypadspace wrapper makes it easier for our AI to learn how to play")
# 3. Import the Simplified Controls

import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT



In [3]:
SIMPLE_MOVEMENT # Shows us the Directions the AI is limited to.

[['NOOP'],
 ['right'],
 ['right', 'A'],
 ['right', 'B'],
 ['right', 'A', 'B'],
 ['A'],
 ['left']]

In [4]:
# SetUp the Game
# Environment
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env,SIMPLE_MOVEMENT)

In [5]:
env.action_space # (Try with or without Wrapper) [Lists the Number of Actions]
# Check the Observation Space
env.observation_space.shape

(240, 256, 3)

In [None]:
# Flag set to True, allows to start the game
done = True

 # Loop through each Frame in the game
for step in range(100000):
    if done:
        # Start the game
        env.reset() # Allows us to restart the game

    # Pass through an action to the game  && Do random actions
    state,reward, done, info = env.step(env.action_space.sample())
    
    # Show game on the Screen
    env.render()

# Close the Game
env.close()

In [8]:
'''Explaining the State, Reward, Done && Info'''
# env.reset()
# state = env.reset()
# state.shape # A state is basically a Frame from the game

# On taking a Step, 4 values are returned
''' 
- State => (env.step(1)[0] (Particular frame)
- Reward => (env.step(1)[1] (Reward obtained)
- Done => (env.step(1)[0] (Whether we are dead or not)
- Info => (env.step(1)[0] (Basic Info)

'''
# env.step(1)[0]
# env.step(1)[1]
# env.step(1)[2]
# env.step(1)[3]

' \n- State => (env.step(1)[0] (Particular frame)\n- Reward => (env.step(1)[1] (Reward obtained)\n- Done => (env.step(1)[0] (Whether we are dead or not)\n- Info => (env.step(1)[0] (Basic Info)\n\n'

2. Pre-Processing the Game Environment

In [9]:
'''     
    Pre-processing steps
============================
-  Gray Scaling (Cuts down data the AI has to learn from)
-  Frame Stacking (Gives our model memory, Helps determine Trajectory and Velocity)
'''



In [10]:
# Dependencies / Libraries
'''
1. Import Frame Stacker Wrapper and Grayscaling Wrapper
2. Import Vectorization Wrappers
3. Import MatplotLib  => Will Show us the impact of Frame Stacking

=>  Stablebaselines3 is a reiforcement library (https://stable-baselines3.readthedocs.io/en/master/)
    - Gives algorithms that are used to train your AI model
'''

from gym.wrappers import GrayScaleObservation
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
from matplotlib import pyplot as plt

In [12]:
'''
1 . Create the Base Environment
2 . Simplify the Controls
3 . GrayScale Environment
4 . Wrap inside the Dummy environment (=> Dummy Stable Baseline Vectorization environment ) {==> Changes the State of our Data}
5 . Stack the Frames

'''
#1
env = gym_super_mario_bros.make('SuperMarioBros-v0')

#2
env =JoypadSpace(env,SIMPLE_MOVEMENT) 

#3
env = GrayScaleObservation(env,keep_dim=True)

#4

env = DummyVecEnv([lambda:env])

#5 
env = VecFrameStack(env,4,channels_order='last') 


In [13]:
# state = env.reset()
# state.shape

# # # '''
# # # Without GrayScaling, you get (240,256,3) => 240 * 256 pixels, and 3 channels ==> Colored Picture
# # # '''

# # #Use MatplotLib to show the game Frame

# # plt.imshow(state)

# # '''
# #  After passing data through a Dummy Environment
# # '''
# # plt.imshow(state[0])
# plt.imshow(state[0])
# env = VecFrameStack(env,4) 


In [14]:
# state = env.reset()

In [15]:
# state,reward, done, info = env.step([5])

In [16]:
# plt.figure(figsize=(20,16))
# for i in range(state.shape[3]):
#     plt.subplot(1,4,i+1)
#     plt.imshow(state[0][:,:,i])
# plt.show()

3. Train the RL Model

In [17]:
'''Dependencies'''
'''
1. Import OS => For file Path management (Determine Where to save our models)
2. Import PPO (Proximal Policy Optimisation Algorithm) from                         =====> https://stable-baselines3.readthedocs.io/en/master/guide/install.html
    baseline to facilitate - Reinforcement Learning(1. Agent   3. Environment
                                                    2. Action  4. Reward)       
3. Import Callbacks to save models after a certain number of steps                                              
'''

'\n1. Import OS => For file Path management (Determine Where to save our models)\n2. Import PPO (Proximal Policy Optimisation Algorithm) from                         =====> https://stable-baselines3.readthedocs.io/en/master/guide/install.html\n    baseline to facilitate - Reinforcement Learning(1. Agent   3. Environment\n                                                    2. Action  4. Reward)       \n3. Import Callbacks to save models after a certain number of steps                                              \n'

In [18]:
import os
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback

In [19]:
# Callback
class TrainAndLoggingCallback(BaseCallback):
    def __init__ (self,check_freq,save_path,verbose=1):
        super(TrainAndLoggingCallback,self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
    
    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path,exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path,'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)
        
        return True

In [20]:
# Directories for saving the trained models and Logs

CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'




In [21]:
# Setup the Callback

callback = TrainAndLoggingCallback(check_freq=10000,save_path=CHECKPOINT_DIR,verbose=1)

In [22]:
# Setup the Model(PPO Model)
# CnnPolicy => Convolutional Neural Network Policy {Deep learning neural network that processes Images faster} other neural networks include RNNs, LSTMs, GRUs, mlp
# verbose => 1 => Prints the training progress
# learning_rate => Learning Rate
# n_steps => Number of Steps

model = PPO('CnnPolicy', env, verbose = 1, tensorboard_log=LOG_DIR, learning_rate=0.000001, n_steps=512)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [23]:
# Train the AI model. This is ehere the model starts to learn
''' ==> More like how many frames our AI is going to see 
- In this case for every frame, our AI  will see 1000000 frames'''

model.learn(total_timesteps=1000000,callback=callback)

Logging to ./logs/PPO_1
----------------------------
| time/              |     |
|    fps             | 7   |
|    iterations      | 1   |
|    time_elapsed    | 66  |
|    total_timesteps | 512 |
----------------------------
