<a href="https://colab.research.google.com/github/blackdarkside/reinforcement_learning/blob/master/parking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install Dependency

install dependency

In [4]:
%tensorflow_version 1.14.0 > /dev/null 2>&1
!pip install stable-baselines[mpi]==2.10.0 > /dev/null 2>&1
!pip install git+https://github.com/eleurent/highway-env > /dev/null 2>&1
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1
!apt-get install x11-utils > /dev/null 2>&1

`%tensorflow_version` only switches the major version: 1.x or 2.x.
You set: `1.14.0 > /dev/null 2>&1`. This will be interpreted as: `1.x`.


TensorFlow 1.x selected.


import dependency

In [0]:
import gym
from gym import logger as gymlogger
from gym.wrappers import Monitor
import highway_env
import numpy as np
from stable_baselines import HER, SAC, DDPG
from stable_baselines.ddpg import NormalActionNoise
gymlogger.set_level(40) #error only
import math
import glob
import io
import base64
from IPython.display import HTML
from google.colab import files

Hyperparameter

In [0]:
HYPERPARAMS = {
 'parking_sac': {
      'env_name': "parking-v0",
      'tb_log_name': "SAC_first_run",
      'filename': "her_sac_highway",
      'learning_rate': 1e-3,
      'buffer_size': int(1e6),
      'gamma': 0.95,
      'verbose': 1,
      'batch_size': 256,
      'goal_selection_strategy': 'future',
      'tensorboard_log': "./tensorboard/",
      'n_sampled_goal': 4,
      'steps': int(1e5)
 },
 'parking_ddpg': {
      'env_name': "parking-v0",
      'noise_std': 0.2,
      'tb_log_name': "DDPG_first_run",
      'filename': "her_ddpg_highway",
      'gamma': 0.95,
      'batch_size': 256,
      'verbose': 1,
      'actor_lr': 1e-3,
      'critic_lr': 1e-3,
      'buffer_size': int(1e6),
      'goal_selection_strategy': 'future',
      'tensorboard_log': "./tensorboard/",
      'n_sampled_goal': 4,
      'steps': int(2e5)

 }
}

# Training

SAC

In [0]:
params = HYPERPARAMS["parking_sac"]

env = gym.make(params["env_name"])
model = HER('MlpPolicy', env, SAC,
            n_sampled_goal=params["n_sampled_goal"],
            goal_selection_strategy=params["goal_selection_strategy"],
            verbose=params["verbose"],
            tensorboard_log=params["tensorboard_log"],
            buffer_size=params["buffer_size"],
            learning_rate=params["learning_rate"],
            gamma=params["gamma"],
            batch_size=params["batch_size"],
            policy_kwargs=dict(layers=[256, 256, 256]))

print("start training")
model.learn(params["steps"], tb_log_name=params["tb_log_name"])
model.save(params["filename"])

DDPG

In [0]:
params = HYPERPARAMS["parking_ddpg"]

env = gym.make(params["env_name"])
n_actions = env.action_space.shape[0]
noise_std = params["noise_std"]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=noise_std * np.ones(n_actions))

model = HER('MlpPolicy', env, DDPG,
            n_sampled_goal=params["n_sampled_goal"],
            goal_selection_strategy=params["goal_selection_strategy"],
            verbose=params["verbose"],
            tensorboard_log=params["tensorboard_log"],
            buffer_size=params["buffer_size"],
            actor_lr=params["actor_lr"],
            critic_lr=params["critic_lr"],
            action_noise=action_noise,
            gamma=params["gamma"],
            batch_size=params["batch_size"],
            policy_kwargs=dict(layers=[256, 256, 256]))

print("start training")
model.learn(params["steps"], tb_log_name=params["tb_log_name"])
model.save(params["filename"])

# Testing

helper functions

In [0]:
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()

"""
def show_video():
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay loop controls style="height: 400px;"><source src="data:video/mp4;base64,{0}" type="video/mp4" /></video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")
"""


def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

playing

In [0]:
env = gym.make("parking-v0")
model = HER.load('her_ddpg_highway', env=env)

env = wrap_env(env)

obs = env.reset()

# Evaluate the agent
episode_reward = 0
for _ in range(500):
	action, _ = model.predict(obs)
	obs, reward, done, info = env.step(action)
	episode_reward += reward
	if done or info.get('is_success', False):
		print("Reward:", episode_reward, "Success?", info.get('is_success', False))
		episode_reward = 0.0
		obs = env.reset()