In [1]:
# # Stable Baselines only supports tensorflow 1.x for now
# %tensorflow_version 1.x
# !apt install swig cmake libopenmpi-dev zlib1g-dev
# !pip install stable-baselines[mpi]==2.10.0 box2d box2d-kengz
!pip install stable_baselines3

Collecting stable_baselines3
[?25l  Downloading https://files.pythonhosted.org/packages/76/7c/ec89fd9a51c2ff640f150479069be817136c02f02349b5dd27a6e3bb8b3d/stable_baselines3-0.10.0-py3-none-any.whl (145kB)
[K     |██▎                             | 10kB 22.3MB/s eta 0:00:01[K     |████▌                           | 20kB 6.7MB/s eta 0:00:01[K     |██████▊                         | 30kB 7.8MB/s eta 0:00:01[K     |█████████                       | 40kB 8.4MB/s eta 0:00:01[K     |███████████▎                    | 51kB 7.2MB/s eta 0:00:01[K     |█████████████▌                  | 61kB 7.7MB/s eta 0:00:01[K     |███████████████▊                | 71kB 8.7MB/s eta 0:00:01[K     |██████████████████              | 81kB 9.1MB/s eta 0:00:01[K     |████████████████████▏           | 92kB 8.4MB/s eta 0:00:01[K     |██████████████████████▌         | 102kB 9.1MB/s eta 0:00:01[K     |████████████████████████▊       | 112kB 9.1MB/s eta 0:00:01[K     |███████████████████████████     |

In [2]:
from google.colab import drive
drive.mount('/gdrive')
!ls /gdrive

Mounted at /gdrive
'My Drive'


In [3]:
### Set the right directory
### Must change to right directory to run!
%cd /gdrive/My\ Drive/Colab\ Notebooks/RL/citylearn/CityLearn

/gdrive/My Drive/Colab Notebooks/RL/citylearn/CityLearn


In [4]:
# Run this again after editing submodules so Colab uses the updated versions
from citylearn import  CityLearn
import matplotlib.pyplot as plt
from pathlib import Path
from agent import RL_Agents_Coord
import numpy as np                                                                                                                                                                                      
import csv
import time
import re
import pandas as pd
import torch
from joblib import dump, load

In [5]:
# Load environment
climate_zone = 1
data_path = Path("data/Climate_Zone_"+str(climate_zone))
building_attributes = data_path / 'building_attributes.json'
weather_file = data_path / 'weather_data.csv'
solar_profile = data_path / 'solar_generation_1kW.csv'
building_state_actions = 'buildings_state_action_space.json'
building_id = ["Building_1","Building_2","Building_3","Building_4","Building_5","Building_6","Building_7","Building_8","Building_9"]
objective_function = ['ramping','1-load_factor','average_daily_peak','peak_demand','net_electricity_consumption','quadratic']

# Contain the lower and upper bounds of the states and actions, to be provided to the agent to normalize the variables between 0 and 1.
# Can be obtained using observations_spaces[i].low or .high
env = CityLearn(data_path, building_attributes, weather_file, solar_profile, building_id, buildings_states_actions = building_state_actions, cost_function = objective_function, verbose = 0, simulation_period=(0,8760-1))
observations_spaces, actions_spaces = env.get_state_action_spaces()

# Provides information on Building type, Climate Zone, Annual DHW demand, Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings
building_info = env.get_building_information()

cost_list = []

In [6]:
# Hyperparameters
bs = 256
tau = 0.005
gamma = 0.99
lr = 0.0003
hid = [256,256]

n_episodes = 12

# Instantiating the control agent(s)
agents = RL_Agents_Coord(building_id, building_state_actions, building_info, observations_spaces, actions_spaces, discount = gamma, batch_size = bs, replay_buffer_capacity = 1e5, regression_buffer_capacity = 12*8760, tau=tau, lr=lr, hidden_dim=hid, start_training=8760*3, exploration_period = 8760*3+1,  start_regression=8760, information_sharing = True, pca_compression = .95, action_scaling_coef=0.5, reward_scaling = 5., update_per_step = 1, iterations_as = 2)

# The number of episodes can be replaces by a stopping criterion (i.e. convergence of the average reward)
start = time.time()
for e in range(n_episodes): 
    is_evaluating = (e > 7) # Evaluate deterministic policy after 7 epochs
    rewards = []
    state = env.reset()
    done = False

    j = 0
    action, coordination_vars = agents.select_action(state, deterministic=is_evaluating)    
    while not done:
        next_state, reward, done, _ = env.step(action)
        action_next, coordination_vars_next = agents.select_action(next_state, deterministic=is_evaluating)
        agents.add_to_buffer(state, action, reward, next_state, done, coordination_vars, coordination_vars_next)

        state = next_state
        coordination_vars = coordination_vars_next
        action = action_next
    cost_list.append(env.cost())
    print('Loss -',env.cost(), 'Simulation time (min) -',(time.time()-start)/60.0)

Loss - {'ramping': 1.1906729, '1-load_factor': 1.0371022319655803, 'average_daily_peak': 1.0648501, 'peak_demand': 1.226891, 'net_electricity_consumption': 1.0560241, 'quadratic': 1.1760567, 'total': 1.125266176062511} Simulation time (min) - 1.0585864424705504
Loss - {'ramping': 1.190852, '1-load_factor': 1.0391787705647026, 'average_daily_peak': 1.0716896, 'peak_demand': 1.1962703, 'net_electricity_consumption': 1.0558783, 'quadratic': 1.1750767, 'total': 1.1214909622706817} Simulation time (min) - 5.580171577135721
Loss - {'ramping': 1.1738604, '1-load_factor': 1.033453850504361, 'average_daily_peak': 1.0610878, 'peak_demand': 1.1872166, 'net_electricity_consumption': 1.0556396, 'quadratic': 1.1741022, 'total': 1.1142267632081446} Simulation time (min) - 10.916501184304556
Loss - {'ramping': 0.8488253, '1-load_factor': 0.9694345140226686, 'average_daily_peak': 0.9365428, 'peak_demand': 1.1558892, 'net_electricity_consumption': 1.0050905, 'quadratic': 1.0442569, 'total': 0.9933398588

In [7]:
df = pd.DataFrame(cost_list)
df.to_csv('multi_region1.csv')
from google.colab import files
files.download('multi_region1.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>