<a target="_blank" href="https://colab.research.google.com/github/rcpaffenroth/dac_raghu/blob/main/LunarLander.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Setup and libraries

In [1]:
import sys
IN_COLAB = 'google.colab' in sys.modules

In [2]:
if IN_COLAB:
  ! apt-get install swig
  ! pip install stable-baselines3[extra] gymnasium[box2d] huggingface_sb3
# Otherwise, install locally and you need the following
# NOTE: Need "gym" and "gymnasium" installed, since we use "gymnasium" for the LunarLander environment
#       and "gym" is for huggingface_sb3.
# NOTE: Need "ffmpeg" for the video recording, which may also need to be installed.
# pip install stable-baselines3[extra] gymnasium[box2d] huggingface_sb3 imageio[ffmpeg] gym


In [3]:
import gymnasium as gym
import matplotlib.pylab as py
import numpy as np

import imageio
from stable_baselines3 import PPO
from huggingface_sb3 import load_from_hub

import pandas as pa

from IPython import display
from IPython.display import HTML
from base64 import b64encode
%matplotlib inline

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Make the environment
env = gym.make("LunarLander-v2", render_mode='rgb_array')

observation = env.reset()
# Note, there are warnings here that I should fix at some point.

### Action Space
There are four discrete actions available:

0: do nothing

1: fire left orientation engine

2: fire main engine

3: fire right orientation engine

### Observation Space

The state is an 8-dimensional vector: the coordinates of the lander in x & y, its linear velocities in x & y, its angle, its angular velocity, and two booleans that represent whether each leg is in contact with the ground or not.

# Train a model

In [5]:
class RandomModel(object):
  def __init__(self, env):
    self.env = env

  def predict(self, obs):
    return env.action_space.sample()

random_model =  RandomModel(env)

In [6]:
# This is an trained model that has a good architecture and loss function, but is not trained very much
trained_model = PPO("MlpPolicy", env)
trained_model.learn(total_timesteps=10000)

<stable_baselines3.ppo.ppo.PPO at 0x7f77984fa0e0>

In [7]:
# checkpoint = load_from_hub(
#     repo_id="sb3/demo-hf-CartPole-v1",
#     filename="ppo-CartPole-v1.zip",
# )
checkpoint = load_from_hub(
    # repo_id="MalarzDawid/ppo-LunarLandar-v2",
    repo_id="sb3/a2c-LunarLander-v2",
    # filename="ppo-LunarLander-v2.zip",
    filename="a2c-LunarLander-v2.zip",
)

sota_model = PPO.load(checkpoint)

Exception: 'bytes' object cannot be interpreted as an integer
Exception: 'bytes' object cannot be interpreted as an integer


# Untrained model

In [15]:
# Make a movie of a trained agent
obs = env.reset()[0]
images = []
all_obs = []
all_actions = []
done = False
while not done:
   # This rendering mode puts an image into a numpy array
   images += [env.render()]
   action = random_model.predict(obs)
   all_obs.append(obs)
   all_actions.append(action)
   obs, reward, done, trunc, info = env.step(action)
env.close()

In [16]:
names = ['x', 'y', 'vx', 'vy', 'theta', 'omega', 'leg1', 'leg2']
all_obs[0]

array([ 0.00738888,  1.4002633 ,  0.74840033, -0.4736604 , -0.00855508,
       -0.16952363,  0.        ,  0.        ], dtype=float32)

In [17]:
all_actions[0]

1

In [10]:
# imageio is a nice library for taking a sequence of images and makeing a movie
name = 'tmp.mp4'
imageio.mimsave(name, images, fps=15)
mp4 = open(name,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)



# Trained model

In [11]:
# Make a movie of a trained agent
obs = env.reset()[0]
images = []
done = False
while not done:
   # This rendering mode puts an image into a numpy array
   images += [env.render()]
   action, _state = trained_model.predict(obs)
   obs, reward, done, trunc, info = env.step(action)
env.close()

In [12]:
# imageio is a nice library for taking a sequence of images and makeing a movie
name = 'tmp.mp4'
imageio.mimsave(name, images, fps=15)
mp4 = open(name,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)



# SotA model

In [13]:
# Make a movie of a trained agent
obs = env.reset()[0]
images = []
done = False
while not done:
   # This rendering mode puts an image into a numpy array
   images += [env.render()]
   action, _state = sota_model.predict(obs)
   obs, reward, done, trunc, info = env.step(action)
env.close()

In [14]:
# imageio is a nice library for taking a sequence of images and makeing a movie
name = 'tmp.mp4'
imageio.mimsave(name, images, fps=15)
mp4 = open(name,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

