In [2]:
# get CityLearn from github
!rm -rf ./CityLearn/
!git clone https://github.com/intelligent-environments-lab/CityLearn.git

Cloning into 'CityLearn'...
remote: Enumerating objects: 216, done.[K
remote: Counting objects: 100% (216/216), done.[K
remote: Compressing objects: 100% (172/172), done.[K
remote: Total 953 (delta 68), reused 174 (delta 37), pack-reused 737[K
Receiving objects: 100% (953/953), 36.71 MiB | 19.46 MiB/s, done.
Resolving deltas: 100% (497/497), done.


In [3]:
!pip install stable_baselines3

Collecting stable_baselines3
[?25l  Downloading https://files.pythonhosted.org/packages/f9/97/f6da6fcaa96934832c02acf95a32309cfa8646b010221f6c7a14bfcf40d0/stable_baselines3-0.11.1-py3-none-any.whl (152kB)
[K     |██▏                             | 10kB 15.0MB/s eta 0:00:01[K     |████▎                           | 20kB 16.5MB/s eta 0:00:01[K     |██████▌                         | 30kB 9.9MB/s eta 0:00:01[K     |████████▋                       | 40kB 7.8MB/s eta 0:00:01[K     |██████████▊                     | 51kB 4.5MB/s eta 0:00:01[K     |█████████████                   | 61kB 5.0MB/s eta 0:00:01[K     |███████████████                 | 71kB 5.1MB/s eta 0:00:01[K     |█████████████████▏              | 81kB 5.4MB/s eta 0:00:01[K     |███████████████████▍            | 92kB 5.2MB/s eta 0:00:01[K     |█████████████████████▌          | 102kB 4.3MB/s eta 0:00:01[K     |███████████████████████▊        | 112kB 4.3MB/s eta 0:00:01[K     |█████████████████████████▉      

In [4]:
from google.colab import drive
drive.mount('/gdrive')
!ls /gdrive

Mounted at /gdrive
MyDrive


In [5]:
# Loading libraries
import sys
sys.path.append("./CityLearn")

from citylearn import CityLearn
from reward_function import reward_function_ma
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
from agent import RL_Agents_Coord

import os
import gym
import numpy as np
from stable_baselines3 import SAC
from stable_baselines3.sac.policies import MlpPolicy as MlpPolicy_SAC
from stable_baselines3.common.callbacks import BaseCallback
import matplotlib.pyplot as plt
from pathlib import Path
import time

import pandas as pd
import seaborn as sns

import pickle
import copy

import warnings
warnings.filterwarnings("ignore")

In [6]:
def get_env(climate_zone):
  # Load environment
  data_path = Path("./CityLearn/data/Climate_Zone_"+str(climate_zone))
  building_attributes = data_path / 'building_attributes.json'
  weather_file = data_path / 'weather_data.csv'
  solar_profile = data_path / 'solar_generation_1kW.csv'
  building_state_actions = './CityLearn/buildings_state_action_space.json'
  building_ids = ["Building_1","Building_2","Building_3","Building_4","Building_5","Building_6","Building_7","Building_8","Building_9"]
  objective_function = ['ramping','1-load_factor','average_daily_peak','peak_demand','net_electricity_consumption']

  # Contain the lower and upper bounds of the states and actions, to be provided to the agent to normalize the variables between 0 and 1.
  # Can be obtained using observations_spaces[i].low or .high
  env = CityLearn(data_path, 
                  building_attributes, 
                  weather_file, 
                  solar_profile, 
                  building_ids, 
                  buildings_states_actions = building_state_actions, 
                  cost_function = objective_function, 
                  verbose = 0, 
                  simulation_period=(0,8760-1), 
                  central_agent=False)
  # Provides information on Building type, Climate Zone, Annual DHW demand, Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings
  building_info = env.get_building_information()  
  observations_spaces, actions_spaces = env.get_state_action_spaces()

  return env, building_ids, building_state_actions, building_info, observations_spaces, actions_spaces

In [7]:
def train_agent(n_episodes = 4, warm_up = 2, cz = 4):

  '''Returns an agent trained at a certain climate zone for a specified number of episodes
  
  Parameters
  ----------
  n_episodes: int
    Number of episodes, including learning and evaluation
  warm_up: int
    Number of initial episodes where policy will not be evaluated.
    For episodes after the warm_up period, policies will be evaluated deterministically
  cz: int
    Climate zone where the agent is to be trained.
    Note that this climate zone bears no relationship with the same parameter in the evaluation function.
    Thus, it is possible to train an agent in one climate zone and evaluate it on another climate zone.
  
  Returns
  -------
  An agent trained on the specified climate zone
  
  '''

  # Hyperparameters
  bs = 256
  tau = 0.005
  gamma = 0.99
  lr = 0.0003
  hid = [256,256]

  #Create environment for specified climate zone
  env, building_ids, building_state_actions, building_info, observations_spaces, actions_spaces = get_env(cz)

  # Instantiating the control agent(s)
  agents = RL_Agents_Coord(building_ids, 
                          building_state_actions, 
                          building_info, 
                          observations_spaces, 
                          actions_spaces, 
                          discount = gamma, 
                          batch_size = bs, 
                          replay_buffer_capacity = 1e5, 
                          regression_buffer_capacity = 12*8760, 
                          tau=tau, 
                          lr=lr, 
                          hidden_dim=hid, 
                          start_training=8760*3, 
                          exploration_period = 8760*3+1,  
                          start_regression=8760, 
                          information_sharing = True, 
                          pca_compression = .95, 
                          action_scaling_coef=0.5, 
                          reward_scaling = 5., 
                          update_per_step = 1, 
                          iterations_as = 2)
  
  #Learn
  cost_by_epoch = []

  # The number of episodes can be replaces by a stopping criterion (i.e. convergence of the average reward)
  print(f'Number of episodes: {n_episodes}')
  start = time.time()

  for e in range(n_episodes): 
      is_evaluating = (e > warm_up) # Evaluate deterministic policy after warm_up period (in epochs)
      rewards = []
      state = env.reset()
      done = False

      j = 0
      action, coordination_vars = agents.select_action(state, deterministic=is_evaluating)    
      while not done:

        next_state, reward, done, _ = env.step(action)
        action_next, coordination_vars_next = agents.select_action(next_state, deterministic=is_evaluating)
        agents.add_to_buffer(state, action, reward, next_state, done, coordination_vars, coordination_vars_next)

        state = next_state
        coordination_vars = coordination_vars_next
        action = action_next
        
      cost = env.cost()
      cost_by_epoch.append(cost)
      print(e,': Loss -', cost, 'Simulation time (min) -',(time.time()-start)/60.0)
    
  return(agents)
  

In [8]:
agent = train_agent()

Number of episodes: 4
0 : Loss - {'ramping': 1.1921318, '1-load_factor': 1.0913021875795699, 'average_daily_peak': 1.0890987, 'peak_demand': 1.2048324, 'net_electricity_consumption': 1.036682, 'total': 1.1228094163214561} Simulation time (min) - 1.164693574110667
1 : Loss - {'ramping': 1.1914583, '1-load_factor': 1.1020179201472837, 'average_daily_peak': 1.0933446, 'peak_demand': 1.1925836, 'net_electricity_consumption': 1.0373905, 'total': 1.123358973052284} Simulation time (min) - 6.18104662100474
2 : Loss - {'ramping': 1.1853551, '1-load_factor': 1.1134984223883568, 'average_daily_peak': 1.0924349, 'peak_demand': 1.1995869, 'net_electricity_consumption': 1.0372198, 'total': 1.1256190007694804} Simulation time (min) - 12.392118767897289
3 : Loss - {'ramping': 0.81367445, '1-load_factor': 1.0524643322181817, 'average_daily_peak': 0.99137455, 'peak_demand': 1.1775756, 'net_electricity_consumption': 0.9979965, 'total': 1.0066170863380455} Simulation time (min) - 63.28154309193293


In [9]:
def evaluate(cz, agent = copy.deepcopy(agent), n_episodes = 4):

  '''
  Evaluate policy on given climate zone.
  This function gets a pre-trained agent such as the one outputed by function **train_agent** 
  and evaluates its loss function on any given climate zone, which may or may not be the same
  in which the agent was trained in the first place.

  Parameters:
  ----------
  cz: int
    The climate zone where the agent's performance is to be evaluated
  agent: agent
    An agent whose performance is to be evaluated on a specified climate zone.
    This can be the output of the **train_agent** function.
  n_episodes: int
    The number of episodes over which the evaluation is to be made

  Returns:
  -------
  list
  A list containing the values of the loss function evaluated in each epoch.
  In other words, the y-values of the learning curve

  '''

  cost_by_epoch = []

  #Create environment for specified climate zone
  env, building_ids, building_state_actions, building_info, observations_spaces, actions_spaces = get_env(cz)


  # The number of episodes can be replaces by a stopping criterion (i.e. convergence of the average reward)
  print(f'Number of episodes: {n_episodes}')
  start = time.time()

  for e in range(n_episodes): 
      is_evaluating = True
      rewards = []
      state = env.reset()
      done = False

      j = 0
      action, coordination_vars = agent.select_action(state, deterministic=is_evaluating)    
      while not done:

        next_state, reward, done, _ = env.step(action)
        action_next, coordination_vars_next = agent.select_action(next_state, deterministic=is_evaluating)

        state = next_state
        coordination_vars = coordination_vars_next
        action = action_next
        
      cost = env.cost()['total']
      cost_by_epoch.append(cost)
      print(e,': Loss -', cost, 'Simulation time (min) -',(time.time()-start)/60.0)
    
  return(cost_by_epoch)
  

  

In [10]:
#learning_curve_cz1 = evaluate(1)

Number of episodes: 4
0 : Loss - 0.9274882806684678 Simulation time (min) - 4.806350135803223
1 : Loss - 0.9258588066901243 Simulation time (min) - 9.327413423856099
2 : Loss - 0.926935663809829 Simulation time (min) - 13.816188526153564
3 : Loss - 0.9265175797757672 Simulation time (min) - 18.28842971722285


In [None]:
n_cz = 4
learning_curves = {str(cz): [] for cz in range(1,n_cz + 1)}

for cz in range(1, n_cz + 1):
  print(f'*** climate zone {cz} ***')
  learning_curves[cz] = evaluate(cz)

*** climate zone 1 ***
Number of episodes: 4
0 : Loss - 0.9289516131965658 Simulation time (min) - 4.8157745440800985
1 : Loss - 0.9247851448575662 Simulation time (min) - 9.246354289849599
2 : Loss - 0.9269425851711357 Simulation time (min) - 13.676181856791178
3 : Loss - 0.926651797520919 Simulation time (min) - 18.02609800895055
*** climate zone 2 ***
Number of episodes: 4


In [None]:
for cz in range(1, n_cz + 1):
  plt.plot(learning_curves_by_cz[str(cz)])
plt.show()