<a href="https://colab.research.google.com/github/kennethgoodman/hugging_face_dl_course/blob/main/Hugging_Face_DL_Course_Unit_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install dependencies and create a virtual screen 🔽


In [None]:
!apt install swig cmake
!pip install -r https://huggingface.co/spaces/ThomasSimonini/temp-space-requirements/raw/main/requirements/requirements-unit1.txt
!sudo apt-get update
!apt install python-opengl
!apt install ffmpeg
!apt install xvfb
!pip3 install pyvirtualdisplay

To make sure the new installed libraries are used, **sometimes it's required to restart the notebook runtime**. The next cell will force the **runtime to crash, so you'll need to connect again and run the code starting from here**. Thanks for this trick, **we will be able to run our virtual screen.**

In [None]:
import os
os.kill(os.getpid(), 9)

In [None]:
# Virtual display
from pyvirtualdisplay import Display

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

## Import the packages 📦


In [None]:
import gym

from huggingface_sb3 import load_from_hub, package_to_hub, push_to_hub
from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.
from huggingface_sb3 import package_to_hub

from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv

In [None]:
notebook_login()
!git config --global credential.helper store

# Helper Functions

In [None]:
def get_vec_env(env_id, number_of_envs):
  return make_vec_env(env_id, n_envs=number_of_envs)

envs = [
    'LunarLander-v2',
    'CartPole-v1',
    'FrozenLake-v1-4x4-no_slippery',
    'FrozenLake-v1-8x8-no_slippery',
    'FrozenLake-v1-4x4', 
    'FrozenLake-v1-8x8',
    'Taxi-v3',
    'CarRacing-v0',
    'MountainCar-v0',
    'SpaceInvadersNoFrameskip-v4',
    'BipedalWalker-v3',
    'Walker2DBulletEnv-v0',
    'AntBulletEnv-v0',
    'HalfCheetahBulletEnv-v0'
]

def get_model_architecture(env_id):
  return {
  }.get(env_id, 'PPO')

def get_model(env_id, number_of_envs=16):
  return {
      'LunarLander-v2': PPO( # DQN
        policy = 'MlpPolicy',
        env = get_vec_env(env_id, number_of_envs),
        n_steps = 2048,
        batch_size = 256,
        n_epochs = 16,
        gamma = 0.95,
        gae_lambda = 0.98,
        ent_coef = 0.01,
        verbose=1
    ),
  }.get(env_id,
    PPO( # DQN
        policy = 'MlpPolicy',
        env = get_vec_env(env_id, number_of_envs),
        n_steps = 1024,
        batch_size = 64,
        n_epochs = 4,
        gamma = 0.999,
        gae_lambda = 0.98,
        ent_coef = 0.01,
        verbose=1
    )
  )

def get_version_id(env_id):
  return {
  }.get(env_id, "0_0_1")

def get_model_name(env_id):
  return f"{env_id}-version_{get_version_id(env_id)}"

def train_and_save(env_id, model, timesteps=1000000):
  model.learn(total_timesteps=timesteps)
  model.save(get_model_name(env_id))

def eval_agent(env_id, model):
  # Create a new environment for evaluation
  eval_env = gym.make(env_id)

  # Evaluate the model with 10 evaluation episodes and deterministic=True
  mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)

  # Print the results, top=308, 7
  print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")  

def publish_model(env_id, model):
  model_architecture = get_model_architecture(env_id)
  repo_id = f"kennethgoodman/{model_architecture.lower()}-{env_id}"
  commit_message = f"Upload {model_architecture} {env_id} trained agent"
  eval_env = DummyVecEnv([lambda: gym.make(env_id)])
  package_to_hub(model=model, # Our trained model
                model_name=get_model_name(env_id), # The name of our trained model 
                model_architecture=model_architecture, # The model architecture we used: in our case PPO
                env_id=env_id, # Name of the environment
                eval_env=eval_env, # Evaluation Environment
                repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
                commit_message=commit_message)

def train_save_eval_and_publish_model(env_id):
  model = get_model(env_id)
  train_and_save(env_id, model)
  eval_agent(env_id, model)
  publish_model(env_id, model)
  return model

# Run Code

In [None]:
env_id = 'LunarLander-v2'
model = get_model(env_id, 16 * 10)
train_and_save(env_id, model, 10_000_000)
eval_agent(env_id, model)
publish_model(env_id, model)

# Loading A Model

In [None]:
from huggingface_sb3 import load_from_hub
repo_id = "Classroom-workshop/assignment2-omar" # The repo_id
filename = "ppo-LunarLander-v2.zip" # The model filename.zip

# When the model was trained on Python 3.8 the pickle protocol is 5
# But Python 3.6, 3.7 use protocol 4
# In order to get compatibility we need to:
# 1. Install pickle5 (we done it at the beginning of the colab)
# 2. Create a custom empty object we pass as parameter to PPO.load()
custom_objects = {
            "learning_rate": 0.0,
            "lr_schedule": lambda _: 0.0,
            "clip_range": lambda _: 0.0,
}

checkpoint = load_from_hub(repo_id, filename)
model = PPO.load(checkpoint, custom_objects=custom_objects, print_system_info=True)

Let's evaluate this agent:

In [None]:
#@title
eval_env = gym.make("LunarLander-v2")
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")