# Stable Baselines 3 Examples with CityLearn
The `WARNING:root:The StableBaselines3Wrapper wrapper is compatible only when env.central_agent=True. Note that env.central_agent has been set to True for compatibility.` is normal

In [1]:
import sys
sys.path.insert(0, '..')
from stable_baselines3.common.env_checker import check_env
from citylearn.citylearn import CityLearnEnv, StableBaselines3Wrapper

# Initialize environment
dataset_name = 'citylearn_challenge_2022_phase_1'
env = CityLearnEnv(dataset_name)

# Wrap for SB3 compatibility
env = StableBaselines3Wrapper(env)

# Perform compatibility check
try:
    check_env(env)
    print('Passed test!! CityLearn is compatible with SB3 when using the StableBaselines3Wrapper.')
finally:
    pass



Passed test!! CityLearn is compatible with SB3 when using the StableBaselines3Wrapper.


In [16]:
from stable_baselines3 import A2C
# Run simulation with A2C policy
model = A2C("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=17520)
# Evaluation
print(env.evaluate())
# Reset
env.reset()

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
------------------------------------
| time/                 |          |
|    fps                | 301      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -7.12    |
|    explained_variance | 0.0609   |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -43.2    |
|    std                | 1.01     |
|    value_loss         | 43.2     |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 311      |
|    iterations         | 200      |
|    time_elapsed       | 3        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -7.13    |
|    explained_variance | 0.00147  |
|    learning_rat

# SAC SB3

In [10]:
from stable_baselines3 import SAC
# Run simulation with SAC policy
# train_freq is episode
"""
:param train_freq: Update the model every ``train_freq`` steps. Alternatively pass a tuple of frequency and unit
        like ``(5, "step")`` or ``(2, "episode")``.
"""
model = SAC('MlpPolicy', env, verbose=1, seed=0, train_freq=8760)
model.learn(total_timesteps=35040, log_interval=4)
# Evaluation
print(env.evaluate())
# Reset
env.reset()

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 8.76e+03  |
|    ep_rew_mean     | -3.01e+04 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 695       |
|    time_elapsed    | 50        |
|    total_timesteps | 35036     |
| train/             |           |
|    actor_loss      | 80.5      |
|    critic_loss     | 943       |
|    ent_coef        | 1         |
|    ent_coef_loss   | -0.0181   |
|    learning_rate   | 0.0003    |
|    n_updates       | 3         |
----------------------------------
              cost_function         value        name     level
0           1 - load_factor  1.297357e+00    District  district
1        average_daily_peak  3.062132e+00    District  district
2          carbon_emissions  2.734781e+06    District  district
3   electricity_consumption  2.744014e+06    District 

array([7.0000000e+00, 7.0000000e+00, 2.4000000e+01, 2.0000000e+01,
       1.8299999e+01, 2.2799999e+01, 2.0000000e+01, 8.4000000e+01,
       8.1000000e+01, 6.8000000e+01, 8.1000000e+01, 0.0000000e+00,
       2.5000000e+01, 9.6400000e+02, 0.0000000e+00, 0.0000000e+00,
       1.0000000e+02, 8.1500000e+02, 0.0000000e+00, 1.7072441e-01,
       2.2758000e+00, 0.0000000e+00, 0.0000000e+00, 2.2758000e+00,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       2.1887500e+00, 0.0000000e+00, 0.0000000e+00, 2.1887500e+00,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       1.0096232e-07, 0.0000000e+00, 0.0000000e+00, 1.0096232e-07,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       2.8191500e+00, 0.0000000e+00, 0.0000000e+00, 2.8191500e+00,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       7.7143335e-01, 0.0000000e+00, 0.0000000e+00, 7.7143335e-01,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e

In [11]:
from stable_baselines3 import TD3
# Run simulation with TD3 Policy
model = TD3("MlpPolicy", env, verbose=1, train_freq=8760)
model.learn(total_timesteps=17520, log_interval=10)
# Evaluation
print(env.evaluate())
# Reset
env.reset()

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
              cost_function     value        name     level
0           1 - load_factor  1.095234    District  district
1        average_daily_peak  1.820688    District  district
2          carbon_emissions  1.718907    District  district
3   electricity_consumption  1.730628    District  district
4               peak_demand  1.820688    District  district
5                   pricing  1.730628    District  district
6                   ramping  3.104033    District  district
7           zero_net_energy  1.730628    District  district
8   electricity_consumption  1.000000  Building_1  building
9           zero_net_energy  1.000000  Building_1  building
10         carbon_emissions  1.000000  Building_1  building
11                  pricing  1.000000  Building_1  building
12  electricity_consumption  2.207949  Building_2  building
13          zero_net_energy  2.207949  Building_2  building
14   

array([7.0000000e+00, 7.0000000e+00, 2.4000000e+01, 2.0000000e+01,
       1.8299999e+01, 2.2799999e+01, 2.0000000e+01, 8.4000000e+01,
       8.1000000e+01, 6.8000000e+01, 8.1000000e+01, 0.0000000e+00,
       2.5000000e+01, 9.6400000e+02, 0.0000000e+00, 0.0000000e+00,
       1.0000000e+02, 8.1500000e+02, 0.0000000e+00, 1.7072441e-01,
       2.2758000e+00, 0.0000000e+00, 0.0000000e+00, 2.2758000e+00,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       2.1887500e+00, 0.0000000e+00, 0.0000000e+00, 2.1887500e+00,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       1.0096232e-07, 0.0000000e+00, 0.0000000e+00, 1.0096232e-07,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       2.8191500e+00, 0.0000000e+00, 0.0000000e+00, 2.8191500e+00,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       7.7143335e-01, 0.0000000e+00, 0.0000000e+00, 7.7143335e-01,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e

In [15]:
from stable_baselines3 import PPO
# Run simulation with PPO Policy
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=17520)
# Evaluation
print(env.evaluate())
# Reset
env.reset()

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------------------------
| time/              |      |
|    fps             | 518  |
|    iterations      | 1    |
|    time_elapsed    | 3    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 442          |
|    iterations           | 2            |
|    time_elapsed         | 9            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0080549605 |
|    clip_fraction        | 0.0706       |
|    clip_range           | 0.2          |
|    entropy_loss         | -7.08        |
|    explained_variance   | -0.00999     |
|    learning_rate        | 0.0003       |
|    loss                 | 2.09e+03     |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.0156      |
|    st

array([7.0000000e+00, 7.0000000e+00, 2.4000000e+01, 2.0000000e+01,
       1.8299999e+01, 2.2799999e+01, 2.0000000e+01, 8.4000000e+01,
       8.1000000e+01, 6.8000000e+01, 8.1000000e+01, 0.0000000e+00,
       2.5000000e+01, 9.6400000e+02, 0.0000000e+00, 0.0000000e+00,
       1.0000000e+02, 8.1500000e+02, 0.0000000e+00, 1.7072441e-01,
       2.2758000e+00, 0.0000000e+00, 0.0000000e+00, 2.2758000e+00,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       2.1887500e+00, 0.0000000e+00, 0.0000000e+00, 2.1887500e+00,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       1.0096232e-07, 0.0000000e+00, 0.0000000e+00, 1.0096232e-07,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       2.8191500e+00, 0.0000000e+00, 0.0000000e+00, 2.8191500e+00,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       7.7143335e-01, 0.0000000e+00, 0.0000000e+00, 7.7143335e-01,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e

In [14]:
from stable_baselines3 import DDPG
# Run simulation with DDPG Policy
model = DDPG("MlpPolicy", env, verbose=1, train_freq=8760)
model.learn(total_timesteps=17520, log_interval=10)
# Evaluation
print(env.evaluate())
# Reset
env.reset()

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
              cost_function     value        name     level
0           1 - load_factor  0.932296    District  district
1        average_daily_peak  1.820688    District  district
2          carbon_emissions  2.001118    District  district
3   electricity_consumption  2.041263    District  district
4               peak_demand  1.820688    District  district
5                   pricing  2.041263    District  district
6                   ramping  2.816585    District  district
7           zero_net_energy  2.041263    District  district
8   electricity_consumption  1.000000  Building_1  building
9           zero_net_energy  1.000000  Building_1  building
10         carbon_emissions  1.000000  Building_1  building
11                  pricing  1.000000  Building_1  building
12  electricity_consumption  1.000000  Building_2  building
13          zero_net_energy  1.000000  Building_2  building
14   

array([7.0000000e+00, 7.0000000e+00, 2.4000000e+01, 2.0000000e+01,
       1.8299999e+01, 2.2799999e+01, 2.0000000e+01, 8.4000000e+01,
       8.1000000e+01, 6.8000000e+01, 8.1000000e+01, 0.0000000e+00,
       2.5000000e+01, 9.6400000e+02, 0.0000000e+00, 0.0000000e+00,
       1.0000000e+02, 8.1500000e+02, 0.0000000e+00, 1.7072441e-01,
       2.2758000e+00, 0.0000000e+00, 0.0000000e+00, 2.2758000e+00,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       2.1887500e+00, 0.0000000e+00, 0.0000000e+00, 2.1887500e+00,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       1.0096232e-07, 0.0000000e+00, 0.0000000e+00, 1.0096232e-07,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       2.8191500e+00, 0.0000000e+00, 0.0000000e+00, 2.8191500e+00,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e-01,
       7.7143335e-01, 0.0000000e+00, 0.0000000e+00, 7.7143335e-01,
       2.2000000e-01, 2.2000000e-01, 2.2000000e-01, 2.2000000e