# Meta learing baseline evaluation

To improve RI model performance using meta-learning, first, we evaluate the baseline performance that shows how good model without meta-learning trained on a certain region environment controls on other environments. For example, we have four simulators that is coresponding to one prepared region. For example, we have four simulators that are corresponding to prepared regions. Once we trained our RI model in region 1, then we evaluate how well the trained model performers in other regions, 2, 3, and 4.

## Libraries

In [None]:
# get CityLearn from github
!rm -rf ./CityLearn/
!git clone https://github.com/intelligent-environments-lab/CityLearn.git

Cloning into 'CityLearn'...
remote: Enumerating objects: 216, done.[K
remote: Counting objects: 100% (216/216), done.[K
remote: Compressing objects: 100% (172/172), done.[K
remote: Total 953 (delta 68), reused 174 (delta 37), pack-reused 737[K
Receiving objects: 100% (953/953), 36.71 MiB | 22.44 MiB/s, done.
Resolving deltas: 100% (497/497), done.


In [None]:
!pip install stable_baselines3



In [None]:
from google.colab import drive
drive.mount('/gdrive')
!ls /gdrive

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).
MyDrive


In [None]:
# Loading libraries
import sys
sys.path.append("./CityLearn")

from citylearn import CityLearn
from reward_function import reward_function_ma
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
from agent import RL_Agents_Coord

import os
import gym
import numpy as np
from stable_baselines3 import SAC
from stable_baselines3.sac.policies import MlpPolicy as MlpPolicy_SAC
from stable_baselines3.common.callbacks import BaseCallback
import matplotlib.pyplot as plt
from pathlib import Path
import time

import pandas as pd
import seaborn as sns

import pickle
import copy

In [None]:
import warnings
warnings.filterwarnings("ignore")

## Simulation environment

Now we focus on the climete region, so I create a simple function that takes one parameter that means the climate region and retrurns the coressponding environment simulator. Also, I use MARISA included in CityLearn as the future return scoring.

In [None]:
def get_env(climate_zone):
  # Load environment
  data_path = Path("./CityLearn/data/Climate_Zone_"+str(climate_zone))
  building_attributes = data_path / 'building_attributes.json'
  weather_file = data_path / 'weather_data.csv'
  solar_profile = data_path / 'solar_generation_1kW.csv'
  building_state_actions = './CityLearn/buildings_state_action_space.json'
  building_ids = ["Building_1","Building_2","Building_3","Building_4","Building_5","Building_6","Building_7","Building_8","Building_9"]
  objective_function = ['ramping','1-load_factor','average_daily_peak','peak_demand','net_electricity_consumption']

  # Contain the lower and upper bounds of the states and actions, to be provided to the agent to normalize the variables between 0 and 1.
  # Can be obtained using observations_spaces[i].low or .high
  env = CityLearn(data_path, 
                  building_attributes, 
                  weather_file, 
                  solar_profile, 
                  building_ids, 
                  buildings_states_actions = building_state_actions, 
                  cost_function = objective_function, 
                  verbose = 0, 
                  simulation_period=(0,8760-1), 
                  central_agent=False)
  # Provides information on Building type, Climate Zone, Annual DHW demand, Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings
  building_info = env.get_building_information()  
  observations_spaces, actions_spaces = env.get_state_action_spaces()

  return env, building_ids, building_state_actions, building_info, observations_spaces, actions_spaces

In [None]:
env, building_ids, building_state_actions, building_info, observations_spaces, actions_spaces = get_env(4)

In [None]:
observations_spaces

[Box(-19.100000381469727, 1044.0400390625, (26,), float32),
 Box(-19.100000381469727, 1044.0400390625, (26,), float32),
 Box(-19.100000381469727, 1044.0400390625, (25,), float32),
 Box(-19.100000381469727, 1044.0400390625, (26,), float32),
 Box(-19.100000381469727, 1044.0400390625, (26,), float32),
 Box(-19.100000381469727, 1044.0400390625, (26,), float32),
 Box(-19.100000381469727, 1044.0400390625, (26,), float32),
 Box(-19.100000381469727, 1044.0400390625, (26,), float32),
 Box(-19.100000381469727, 1044.0400390625, (26,), float32)]

In [None]:
actions_spaces

[Box(-0.25, 0.25, (2,), float32),
 Box(-0.3333333432674408, 0.3333333432674408, (2,), float32),
 Box(-0.3333333432674408, 0.3333333432674408, (1,), float32),
 Box(-0.5, 0.5, (1,), float32),
 Box(-0.5, 0.5, (2,), float32),
 Box(-0.3333333432674408, 0.3333333432674408, (2,), float32),
 Box(-0.3333333432674408, 0.3333333432674408, (2,), float32),
 Box(-0.5, 0.5, (2,), float32),
 Box(-0.5, 0.5, (2,), float32)]

## Train an agent on a certain region

In [None]:
# Hyperparameters
bs = 256
tau = 0.005
gamma = 0.99
lr = 0.0003
hid = [256,256]

n_episodes = 4 # --- Just for coding. Change to 12 to run for real
warm_up = 2 # --- Change to 7 to run for real

In [None]:
# Instantiating the control agent(s)
agents = RL_Agents_Coord(building_ids, 
                         building_state_actions, 
                         building_info, 
                         observations_spaces, 
                         actions_spaces, 
                         discount = gamma, 
                         batch_size = bs, 
                         replay_buffer_capacity = 1e5, 
                         regression_buffer_capacity = 12*8760, 
                         tau=tau, 
                         lr=lr, 
                         hidden_dim=hid, 
                         start_training=8760*3, 
                         exploration_period = 8760*3+1,  
                         start_regression=8760, 
                         information_sharing = True, 
                         pca_compression = .95, 
                         action_scaling_coef=0.5, 
                         reward_scaling = 5., 
                         update_per_step = 1, 
                         iterations_as = 2)

In [None]:
cost_by_epoch = []

# The number of episodes can be replaces by a stopping criterion (i.e. convergence of the average reward)
start = time.time()

for e in range(n_episodes): 
    is_evaluating = (e > warm_up) # Evaluate deterministic policy after warm_up period (in epochs)
    rewards = []
    state = env.reset()
    done = False

    j = 0
    action, coordination_vars = agents.select_action(state, deterministic=is_evaluating)    
    while not done:

      next_state, reward, done, _ = env.step(action)
      action_next, coordination_vars_next = agents.select_action(next_state, deterministic=is_evaluating)
      agents.add_to_buffer(state, action, reward, next_state, done, coordination_vars, coordination_vars_next)

      state = next_state
      coordination_vars = coordination_vars_next
      action = action_next
      
    cost = env.cost()
    cost_by_epoch.append(cost)
    print('Loss -', cost, 'Simulation time (min) -',(time.time()-start)/60.0)

Loss - {'ramping': 1.1905167, '1-load_factor': 1.0959079087630101, 'average_daily_peak': 1.0867164, 'peak_demand': 1.1883829, 'net_electricity_consumption': 1.0372682, 'total': 1.1197584116628803} Simulation time (min) - 0.8012892961502075
Loss - {'ramping': 1.1881983, '1-load_factor': 1.1039932046016636, 'average_daily_peak': 1.0988388, 'peak_demand': 1.3005668, 'net_electricity_consumption': 1.0376801, 'total': 1.1458554560677325} Simulation time (min) - 4.883676187197367
Loss - {'ramping': 1.191418, '1-load_factor': 1.0788237975470165, 'average_daily_peak': 1.0924903, 'peak_demand': 1.1500947, 'net_electricity_consumption': 1.0372899, 'total': 1.1100233540223428} Simulation time (min) - 9.862490010261535
Loss - {'ramping': 0.82156605, '1-load_factor': 1.0554486180450022, 'average_daily_peak': 0.9951084, 'peak_demand': 1.1737045, 'net_electricity_consumption': 0.99808437, 'total': 1.008782392046958} Simulation time (min) - 36.53748629490534


In [None]:
import pickle

with open("/gdrive/My Drive/cost_by_epoch_cz4.pkl", "wb") as f:
  pickle.dump(cost_by_epoch, f)

with open("/gdrive/My Drive/agents_cz4.pkl", "wb") as f:
    pickle.dump(agents, f)

## Evaluate the costs in other regions using the trained model

In [41]:
#Number of climate zones
n_cz = 3

In [None]:
# load the trained model from pickle file.
with open("/gdrive/My Drive/agents_cz4.pkl", "rb") as f:
    agents_org = pickle.load(f) #Load
agents_org

<agent.RL_Agents_Coord at 0x7fccd9135cd0>

In [47]:
#Create empty dictionary to store learning curves
learning_curves_by_cz = {str(cz): [] for cz in range(1,n_cz + 1)}
learning_curves_by_cz

{'1': [], '2': [], '3': []}

In [48]:
cost_by_cz = []

for cz in range(1, n_cz + 1):
  start = time.time()
  # get an env that is corespoinding to cz.
  env, building_ids, building_state_actions, building_info, observations_spaces, actions_spaces = get_env(cz)
  print(f"Climate Zone: {cz}")

  # copy the trained agent from the original.
  agents = copy.deepcopy(agents_org)

  is_evaluating = True
  rewards = []
  state = env.reset()
  done = False

  action, coordination_vars = agents.select_action(state, deterministic=is_evaluating)    
  while not done:
      next_state, reward, done, _ = env.step(action)
      action_next, coordination_vars_next = agents.select_action(next_state, deterministic=is_evaluating)
      #agents.add_to_buffer(state, action, reward, next_state, done, coordination_vars, coordination_vars_next)

      state = next_state
      coordination_vars = coordination_vars_next
      action = action_next

  cost = env.cost()['total']
  learning_curves_by_cz[str(cz)].append(cost)

  print('Loss -', cost, 'Simulation time (min) -',(time.time()-start)/60.0)
  cost_by_cz.append(cost)

Climate Zone: 1
Loss - 1.118682422941564 Simulation time (min) - 0.34189003308614097
Climate Zone: 2
Loss - 1.091341571045355 Simulation time (min) - 0.34488989114761354
Climate Zone: 3
Loss - 1.1350296038454384 Simulation time (min) - 0.34100622733434044


In [49]:
learning_curves_by_cz

{'1': [1.118682422941564], '2': [1.091341571045355], '3': [1.1350296038454384]}

In [None]:
df = pd.DataFrame(cost_by_cz)
df.index = ["cz1", "cz2", "cz3", "cz4"]

In [None]:
df

In [None]:
#df.to_csv("/gdrive/My Drive/eval_baseline_cz3.csv")
df.to_csv("/gdrive/My Drive/eval_baseline_cz4.csv")

In [None]:
df = pd.read_csv("/gdrive/My Drive/eval_baseline_cz3.csv", index_col=0)
ax = df.plot.bar(rot=0)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylim((0.8, 1.1))
plt.title("Costs by climate zone(cz) using angent trained on cz 3")
pass

In [None]:
df = pd.read_csv("/gdrive/My Drive/eval_baseline_cz4.csv", index_col=0)
ax = df.plot.bar(rot=0)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylim((0.8, 1.10))
plt.title("Costs by climate zone(cz) using angent trained on cz 4")
pass