In [1]:
import gym

from stable_baselines3 import SAC
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor

In [2]:
train_env = make_vec_env("LunarLanderContinuous-v2", n_envs=16)
eval_env = Monitor(gym.make("LunarLanderContinuous-v2"))
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path="./logs/continuous/sac",
    log_path="./logs/continuous/sac",
    eval_freq=1000,
    n_eval_episodes=10,
    deterministic=True,
    render=False,
)


In [3]:
model = SAC.load("logs/continuous/sac/best_model_286", env=train_env)

In [4]:
model.learn(total_timesteps=int(1e6), callback=eval_callback)

Logging to ./runs/sac_lunar_tensorboard/SAC_3
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 84       |
|    ep_rew_mean     | -260     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 5047     |
|    time_elapsed    | 0        |
|    total_timesteps | 1392     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 89       |
|    ep_rew_mean     | -234     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 5000     |
|    time_elapsed    | 0        |
|    total_timesteps | 1600     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 95.2     |
|    ep_rew_mean     | -275     |
| time/              |          |
|    episodes        | 12       |
|    fps             | 5024     |
|    time_elapsed    | 0        |
| 

<stable_baselines3.sac.sac.SAC at 0x25531a67e80>

In [5]:
model.save("data/policies/LunarLanderContinuous-v2#sac#train_best")
del model