# 1. Setup Mario

In [1]:
!pip install gym_super_mario_bros==7.3.0 nes_py



In [2]:
# Import the game
import gym_super_mario_bros
# Import the Joypad wrapper
from nes_py.wrappers import JoypadSpace
# Import the SIMPLIFIED controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

In [3]:
# Setup game
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)

In [4]:
# Create a flag - restart or not
done = True
# Loop through each frame in the game
for step in range(100000): 
    # Start the game to begin with 
    if done: 
        # Start the gamee
        env.reset()
    # Do random actions
    state, reward, done, info = env.step(env.action_space.sample())
    # Show the game on the screen
    env.render()
# Close the game
env.close()

  return (self.ram[0x86] - self.ram[0x071c]) % 256


KeyboardInterrupt: 

# 2. Preprocess Environment

In [5]:
# Install pytorch
!pip install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio===0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html

Looking in links: https://download.pytorch.org/whl/cu113/torch_stable.html


In [6]:
# Install stable baselines for RL stuff
!pip install stable-baselines3[extra]



In [7]:
# Import Frame Stacker Wrapper and GrayScaling Wrapper
from gym.wrappers import GrayScaleObservation
# Import Vectorization Wrappers
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
# Import Matplotlib to show the impact of frame stacking
from matplotlib import pyplot as plt

In [8]:
# 1. Create the base environment
env = gym_super_mario_bros.make('SuperMarioBros-v0')
# 2. Simplify the controls 
env = JoypadSpace(env, SIMPLE_MOVEMENT)
# 3. Grayscale
env = GrayScaleObservation(env, keep_dim=True)
# 4. Wrap inside the Dummy Environment
env = DummyVecEnv([lambda: env])
# 5. Stack the frames
env = VecFrameStack(env, 4, channels_order='last')

In [9]:
state = env.reset()

In [10]:
state, reward, done, info = env.step([5])

# 3. Train the RL Model

In [11]:
# Import os for file path management
import os 
# Import PPO for algos
from stable_baselines3 import PPO
from stable_baselines3 import DQN
# Import Base Callback for saving models
from stable_baselines3.common.callbacks import BaseCallback

In [12]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [13]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [14]:
# Setup model saving callback
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

In [15]:
# This is the AI model started
model = DQN('CnnPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001,seed=64, buffer_size=100, 
            ) 

Using cuda device
Wrapping the env in a VecTransposeImage.


In [16]:
# Train the AI model, this is where the AI model starts to learn
model.learn(total_timesteps=1000000, callback=callback)

Logging to ./logs/DQN_2


  return (self.ram[0x86] - self.ram[0x071c]) % 256


----------------------------------
| rollout/            |          |
|    exploration_rate | 0.452    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 371      |
|    time_elapsed     | 155      |
|    total_timesteps  | 57662    |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.00432  |
|    n_updates        | 1915     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.256    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 233      |
|    time_elapsed     | 334      |
|    total_timesteps  | 78274    |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.2      |
|    n_updates        | 7068     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 80       |
|    fps              | 141      |
|    time_elapsed     | 1212     |
|    total_timesteps  | 170945   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.166    |
|    n_updates        | 30236    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 84       |
|    fps              | 139      |
|    time_elapsed     | 1270     |
|    total_timesteps  | 177167   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.112    |
|    n_updates        | 31791    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 156      |
|    fps              | 128      |
|    time_elapsed     | 1957     |
|    total_timesteps  | 251113   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.523    |
|    n_updates        | 50278    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 160      |
|    fps              | 127      |
|    time_elapsed     | 1987     |
|    total_timesteps  | 254336   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.875    |
|    n_updates        | 51083    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 232      |
|    fps              | 122      |
|    time_elapsed     | 2664     |
|    total_timesteps  | 325694   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.705    |
|    n_updates        | 68923    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 236      |
|    fps              | 121      |
|    time_elapsed     | 2712     |
|    total_timesteps  | 330585   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.76     |
|    n_updates        | 70146    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 308      |
|    fps              | 118      |
|    time_elapsed     | 3475     |
|    total_timesteps  | 411241   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.934    |
|    n_updates        | 90310    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 312      |
|    fps              | 118      |
|    time_elapsed     | 3508     |
|    total_timesteps  | 414702   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.696    |
|    n_updates        | 91175    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 384      |
|    fps              | 115      |
|    time_elapsed     | 4263     |
|    total_timesteps  | 494522   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.291    |
|    n_updates        | 111130   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 388      |
|    fps              | 115      |
|    time_elapsed     | 4321     |
|    total_timesteps  | 500594   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 2.04     |
|    n_updates        | 112648   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 460      |
|    fps              | 114      |
|    time_elapsed     | 5106     |
|    total_timesteps  | 582912   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.817    |
|    n_updates        | 133227   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 464      |
|    fps              | 114      |
|    time_elapsed     | 5148     |
|    total_timesteps  | 587407   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.376    |
|    n_updates        | 134351   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 536      |
|    fps              | 112      |
|    time_elapsed     | 6018     |
|    total_timesteps  | 679158   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.59     |
|    n_updates        | 157289   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 540      |
|    fps              | 112      |
|    time_elapsed     | 6081     |
|    total_timesteps  | 685696   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.83     |
|    n_updates        | 158923   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 612      |
|    fps              | 111      |
|    time_elapsed     | 6923     |
|    total_timesteps  | 774575   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.24     |
|    n_updates        | 181143   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 616      |
|    fps              | 111      |
|    time_elapsed     | 6956     |
|    total_timesteps  | 778143   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.567    |
|    n_updates        | 182035   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 688      |
|    fps              | 111      |
|    time_elapsed     | 7843     |
|    total_timesteps  | 871501   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.78     |
|    n_updates        | 205375   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 692      |
|    fps              | 111      |
|    time_elapsed     | 7879     |
|    total_timesteps  | 875355   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.27     |
|    n_updates        | 206338   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 764      |
|    fps              | 110      |
|    time_elapsed     | 8719     |
|    total_timesteps  | 963534   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.29     |
|    n_updates        | 228383   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 768      |
|    fps              | 110      |
|    time_elapsed     | 8751     |
|    total_timesteps  | 966910   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 2.24     |
|    n_updates        | 229227   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

<stable_baselines3.dqn.dqn.DQN at 0x28bbe1d64c0>

In [None]:
model.save('thisisatestmodel')

# 4. Test it Out

In [None]:
# Load model
model = DQN.load('./train/best_model_1000000')

In [None]:
state = env.reset()

In [None]:
# Start the game 
state = env.reset()
# Loop through the game
while True: 
    
    action, _ = model.predict(state)
    state, reward, done, info = env.step(action)
    env.render()

AttributeError: 'DQN' object has no attribute 'parameters'