<a target="_blank" href="https://colab.research.google.com/github/rcpaffenroth/dac_raghu/blob/main/LunarLander.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Setup and libraries

In [31]:
import sys
IN_COLAB = 'google.colab' in sys.modules

In [32]:
if IN_COLAB:
  ! apt-get install swig
  ! pip install stable-baselines3[extra] gymnasium[box2d] huggingface_sb3
else:
  # Otherwise, install locally and you need the following
  # NOTE: Need "gym" and "gymnasium" installed, since we use "gymnasium" for the LunarLander environment
  #       and "gym" is for huggingface_sb3.
  # NOTE: Need "ffmpeg" for the video recording, which may also need to be installed.
  # sudo apt install swig
  # pip install stable-baselines3[extra] gymnasium[box2d] huggingface_sb3 imageio[ffmpeg] gym ipywidegets ipykernel pandas pyarrow
  pass

In [33]:
import gymnasium as gym
import matplotlib.pylab as py
import numpy as np

import imageio
from stable_baselines3 import PPO
from huggingface_sb3 import load_from_hub

import pandas as pd

from IPython.display import display
from IPython.display import HTML
from ipywidgets import interact, widgets
from base64 import b64encode
%matplotlib inline

In [34]:
# Make the environment
env = gym.make("LunarLander-v2", render_mode='rgb_array')
observation = env.reset()


### Action Space
There are four discrete actions available:

0: do nothing

1: fire left orientation engine

2: fire main engine

3: fire right orientation engine

### Observation Space

The state is an 8-dimensional vector: the coordinates of the lander in x & y, its linear velocities in x & y, its angle, its angular velocity, and two booleans that represent whether each leg is in contact with the ground or not.

In [35]:
obs_names = ['x', 'y', 'vx', 'vy', 'theta', 'vtheta', 'leg1', 'leg2']

# Train a model

In [36]:
models = {}

In [37]:
class RandomModel(object):
  def __init__(self, env):
    self.env = env

  def predict(self, obs):
    return env.action_space.sample(), None # The second return value is the state value, which the random model does not use

random_model =  RandomModel(env)
models['random'] = {}
models['random']['model'] = random_model
models['random']['runs'] = []

In [38]:
# This is an trained model that has a good architecture and loss function, but is not trained very much.  This takes about 30 sec on 
# a RTX 4090
trained_model = PPO("MlpPolicy", env)
trained_model.learn(total_timesteps=20000)

models['trained'] = {}
models['trained']['model'] = trained_model
models['trained']['runs'] = []

In [39]:
# This is a model from huggingface.co at https://huggingface.co/sb3/a2c-LunarLander-v2
# Mean reward: 181.08 +/- 95.35
checkpoint = load_from_hub(
    repo_id="sb3/a2c-LunarLander-v2",
    filename="a2c-LunarLander-v2.zip",
)

good_model = PPO.load(checkpoint)

models['good'] = {}
models['good']['model'] = good_model
models['good']['runs'] = []

Exception: an integer is required (got type bytes)
Exception: an integer is required (got type bytes)


In [40]:
# This is a model from huggingface.co at https://huggingface.co/araffin/ppo-LunarLander-v2
# Mean reward:  283.49 +/- 13.74
checkpoint = load_from_hub(
    repo_id="araffin/ppo-LunarLander-v2",
    filename="ppo-LunarLander-v2.zip",
)

better_model = PPO.load(checkpoint)
models['better'] = {}
models['better']['model'] = better_model
models['better']['runs'] = []

Exception: an integer is required (got type bytes)


# Evaluate models

In [41]:
def evaluate_model(model_name, models=models, env=env):
   # Make a movie of a trained agent
   obs = env.reset()[0]

   # Get the model
   model = models[model_name]['model']
   images = []
   all_obs = []
   all_actions = []
   all_rewards = []
   done = False
   while not done:
      # This rendering mode puts an image into a numpy array
      images += [env.render()]
      action, _state = model.predict(obs)
      all_obs.append(obs)
      all_actions.append(action)
      obs, reward, done, trunc, info = env.step(action)
      all_rewards.append(reward)
   env.close()

   df = pd.DataFrame(all_obs, columns=obs_names)
   df['action'] = all_actions
   df['reward'] = all_rewards
   models[model_name]['runs'].append({'data':df,'images':images})



In [42]:
for i in range(3):
    evaluate_model('random')
    evaluate_model('trained')
    evaluate_model('good')
    evaluate_model('better')
    

In [44]:
@interact(model_name=models.keys(), run_idx=widgets.IntSlider(min=0, max=9, step=1, value=0))
def show_video(model_name, run_idx):
      images = models[model_name]['runs'][run_idx]['images']      
      # imageio is a nice library for taking a sequence of images and makeing a movie
      name = 'tmp.mp4'
      imageio.mimsave(name, images, fps=15)
      mp4 = open(name,'rb').read()
      data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
      display(HTML("""
      <video width=400 controls>
            <source src="%s" type="video/mp4">
      </video>
      """ % data_url))

interactive(children=(Dropdown(description='model_name', options=('random', 'trained', 'good', 'better'), valu…

# Rewards

# Write files