<a href="https://colab.research.google.com/github/logmosier/ai_project/blob/master/mario_collab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# install dependancies

Rendering Dependancies



In [None]:
#remove " > /dev/null 2>&1" to see what is going on under the hood
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1

Main Dependancies

In [None]:
!pip uninstall tensorflow tensorflow-gpu && pip install tensorflow-gpu==1.15
!pip install gym[atari]
!pip install git+https://github.com/sajjadzamani/gym-super-mario-bros.git
!pip install stable_baselines

# Setup Video Rendering


In [None]:
import gym
from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(40) #error only
import glob
import io
import base64
from IPython.display import HTML

from IPython import display as ipythondisplay
from pyvirtualdisplay import Display

display = Display(visible=0, size=(1400, 900))
display.start()

"""
Utility functions to enable video recording of gym environment and displaying it
To enable video, just do "env = wrap_env(env)""
"""

def show_video():
  mp4list = glob.glob('logs/videos/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")
    

def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

# Build Model

In [None]:
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common import make_vec_env
from stable_baselines import PPO2
from stable_baselines.common.env_checker import check_env
from stable_baselines.common.callbacks import CallbackList, CheckpointCallback, EvalCallback

import gym
from stable_baselines.common.vec_env import VecVideoRecorder, DummyVecEnv

def train_model(name):
  # multiprocess environment
  env = gym_super_mario_bros.make('SuperMarioBros-1-1-v2')
  env = JoypadSpace(env, SIMPLE_MOVEMENT)
  #env = make_vec_env(lambda: env, n_envs=4)

  # Save a checkpoint every 1000 steps
  checkpoint_callback = CheckpointCallback(save_freq=10000, save_path='./logs_joypad_vec/',
                                          name_prefix='rl_model')

  model = PPO2(MlpPolicy, env, verbose=1,tensorboard_log="./mario_tensorboard/")
  model.learn(total_timesteps=1000000, callback=checkpoint_callback)
  model.save(name)

def run_model(name):
  env_id = 'SuperMarioBros-1-1-v2'
  video_folder = 'logs/videos/'
  video_length = 1000

  env = DummyVecEnv([lambda: gym.make(env_id)])

  obs = env.reset()
  model = PPO2.load(name)

  # Record the video starting at the first step
  env = VecVideoRecorder(env, video_folder,
                       record_video_trigger=lambda x: x == 0, video_length=video_length,
                       name_prefix="random-agent-{}".format(env_id))

  env.reset()
  for _ in range(video_length + 1):
    action, _states = model.predict(obs)
    obs, _, _, _ = env.step(action)
  # Save the video
  env.close()
  show_video()

train_model("mario")

# Run model

In [None]:
run_model("mario")