In [1]:
# # Stable Baselines only supports tensorflow 1.x for now
# %tensorflow_version 1.x
# !apt install swig cmake libopenmpi-dev zlib1g-dev
# !pip install stable-baselines[mpi]==2.10.0 box2d box2d-kengz
!pip install stable_baselines3

Collecting stable_baselines3
[?25l  Downloading https://files.pythonhosted.org/packages/76/7c/ec89fd9a51c2ff640f150479069be817136c02f02349b5dd27a6e3bb8b3d/stable_baselines3-0.10.0-py3-none-any.whl (145kB)
[K     |██▎                             | 10kB 26.1MB/s eta 0:00:01[K     |████▌                           | 20kB 5.8MB/s eta 0:00:01[K     |██████▊                         | 30kB 6.9MB/s eta 0:00:01[K     |█████████                       | 40kB 7.4MB/s eta 0:00:01[K     |███████████▎                    | 51kB 6.8MB/s eta 0:00:01[K     |█████████████▌                  | 61kB 7.7MB/s eta 0:00:01[K     |███████████████▊                | 71kB 7.6MB/s eta 0:00:01[K     |██████████████████              | 81kB 8.6MB/s eta 0:00:01[K     |████████████████████▏           | 92kB 7.9MB/s eta 0:00:01[K     |██████████████████████▌         | 102kB 7.9MB/s eta 0:00:01[K     |████████████████████████▊       | 112kB 7.9MB/s eta 0:00:01[K     |███████████████████████████     |

In [2]:
from google.colab import drive
drive.mount('/gdrive')
!ls /gdrive

Mounted at /gdrive
'My Drive'


In [3]:
### Set the right directory
### Must change to right directory to run!
%cd /gdrive/My\ Drive/Colab\ Notebooks/RL/citylearn/CityLearn

/gdrive/My Drive/Colab Notebooks/RL/citylearn/CityLearn


In [4]:
# Run this again after editing submodules so Colab uses the updated versions
from citylearn import  CityLearn
import matplotlib.pyplot as plt
from pathlib import Path
from agent import RL_Agents_Coord
import numpy as np                                                                                                                                                                                      
import csv
import time
import re
import pandas as pd
import torch
from joblib import dump, load

In [5]:
# Load environment
climate_zone = 2
data_path = Path("data/Climate_Zone_"+str(climate_zone))
building_attributes = data_path / 'building_attributes.json'
weather_file = data_path / 'weather_data.csv'
solar_profile = data_path / 'solar_generation_1kW.csv'
building_state_actions = 'buildings_state_action_space.json'
building_id = ["Building_1","Building_2","Building_3","Building_4","Building_5","Building_6","Building_7","Building_8","Building_9"]
objective_function = ['ramping','1-load_factor','average_daily_peak','peak_demand','net_electricity_consumption','quadratic']

# Contain the lower and upper bounds of the states and actions, to be provided to the agent to normalize the variables between 0 and 1.
# Can be obtained using observations_spaces[i].low or .high
env = CityLearn(data_path, building_attributes, weather_file, solar_profile, \
                building_id, buildings_states_actions = building_state_actions, \
                cost_function = objective_function, verbose = 0, \
                simulation_period=(0,8760-1),reward =3)
observations_spaces, actions_spaces = env.get_state_action_spaces()

# Provides information on Building type, Climate Zone, Annual DHW demand, Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings
building_info = env.get_building_information()

cost_list = []

In [6]:
# Hyperparameters
bs = 256
tau = 0.005
gamma = 0.99
lr = 0.0003
hid = [256,256]

n_episodes = 12

# Instantiating the control agent(s)
agents = RL_Agents_Coord(building_id, building_state_actions, building_info, observations_spaces, actions_spaces, discount = gamma, batch_size = bs, replay_buffer_capacity = 1e5, regression_buffer_capacity = 12*8760, tau=tau, lr=lr, hidden_dim=hid, start_training=8760*3, exploration_period = 8760*3+1,  start_regression=8760, information_sharing = True, pca_compression = .95, action_scaling_coef=0.5, reward_scaling = 5., update_per_step = 1, iterations_as = 2)

# The number of episodes can be replaces by a stopping criterion (i.e. convergence of the average reward)
start = time.time()
for e in range(n_episodes): 
    is_evaluating = (e > 7) # Evaluate deterministic policy after 7 epochs
    rewards = []
    state = env.reset()
    done = False

    j = 0
    action, coordination_vars = agents.select_action(state, deterministic=is_evaluating)    
    while not done:
        next_state, reward, done, _ = env.step(action)
        action_next, coordination_vars_next = agents.select_action(next_state, deterministic=is_evaluating)
        agents.add_to_buffer(state, action, reward, next_state, done, coordination_vars, coordination_vars_next)

        state = next_state
        coordination_vars = coordination_vars_next
        action = action_next
    cost_list.append(env.cost())
    print('Loss -',env.cost(), 'Simulation time (min) -',(time.time()-start)/60.0)

Loss - {'ramping': 1.1380447, '1-load_factor': 1.0878189330472314, 'average_daily_peak': 1.0829412, 'peak_demand': 1.2416575, 'net_electricity_consumption': 1.0423802, 'quadratic': 1.153829, 'total': 1.1244452517094505} Simulation time (min) - 0.9592936833699545
Loss - {'ramping': 1.1515429, '1-load_factor': 1.0827016432265488, 'average_daily_peak': 1.0819393, 'peak_demand': 1.2349476, 'net_electricity_consumption': 1.0429145, 'quadratic': 1.157394, 'total': 1.1252400013999815} Simulation time (min) - 5.418544622262319
Loss - {'ramping': 1.1449198, '1-load_factor': 1.074637323760349, 'average_daily_peak': 1.0880705, 'peak_demand': 1.2252344, 'net_electricity_consumption': 1.0433389, 'quadratic': 1.1602765, 'total': 1.1227462341625423} Simulation time (min) - 11.06928597688675
Loss - {'ramping': 0.87151194, '1-load_factor': 1.050921808190265, 'average_daily_peak': 0.9851691, 'peak_demand': 1.2293214, 'net_electricity_consumption': 0.99772865, 'quadratic': 1.0427047, 'total': 1.029559594

In [7]:
df = pd.DataFrame(cost_list)
df.to_csv('reward3_single_region2.csv')
from google.colab import files
files.download('reward3_single_region2.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>