In [1]:
from citylearn import  CityLearn, building_loader, auto_size
from energy_models import HeatPump, EnergyStorage, Building
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
np.random.seed(3)

import ray 
import ray.rllib.agents.ppo as ppo
from ray.tune.logger import pretty_print


import math
import random

import gym
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions import Normal


import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
#Use only one building for SINGLE AGENT environment, unmark multiple building IDs to simulate MULTI-AGENT environment. In the multi-agent environment
#the reward of each agent depend partially on the actions of the other agents or buildings (see reward_function.py)
building_ids = [8]#, 5, 9, 16, 21, 26, 33, 36, 49, 59]

In [3]:
'''
Building the RL environment with heating and cooling loads and weather files
CityLearn
    Weather file
    Buildings
        File with heating and cooling demands
        CoolingDevices (HeatPump)
        CoolingStorages (EnergyStorage)
'''

data_folder = Path("data/")

demand_file = data_folder / "AustinResidential_TH.csv"
weather_file = data_folder / 'Austin_Airp_TX-hour.csv'

heat_pump, heat_tank, cooling_tank = {}, {}, {}

#Ref: Assessment of energy efficiency in electric storage water heaters (2008 Energy and Buildings)
loss_factor = 0.19/24
buildings = []
for uid in building_ids:
    heat_pump[uid] = HeatPump(nominal_power = 9e12, eta_tech = 0.22, t_target_heating = 45, t_target_cooling = 10)
    heat_tank[uid] = EnergyStorage(capacity = 9e12, loss_coeff = loss_factor)
    cooling_tank[uid] = EnergyStorage(capacity = 9e12, loss_coeff = loss_factor)
    buildings.append(Building(uid, heating_storage = heat_tank[uid], cooling_storage = cooling_tank[uid], heating_device = heat_pump[uid], cooling_device = heat_pump[uid]))
    buildings[-1].state_space(np.array([1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 40.0, 1.001]), np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 17.0, -0.001]))
    buildings[-1].action_space(np.array([0.5]), np.array([-0.3]))
    
building_loader(demand_file, weather_file, buildings)  
auto_size(buildings, t_target_heating = 45, t_target_cooling = 10)

env = CityLearn(demand_file, weather_file, buildings = buildings, time_resolution = 1, simulation_period = (3500,6000))

In [4]:
from reward_function import reward_function
observations_space, actions_space = [],[]
for building in buildings:
    observations_space.append(building.observation_spaces)
    actions_space.append(building.action_spaces)

In [5]:
from reward_function import reward_function

In [6]:
alpha=0.9
w=np.ones((5,24))


def generate_state_action_matrix(state,action_shape,state_shape):
    ns=len(state)
    x=np.zeros((state_shape,action_shape))
    p=np.argmax(state)
    x[int(p),:]=1
    return(x)
    
def get_q_values(x,w,num_actions):
    q_s=np.matmul(w.reshape(num_actions,24),x)
    return(q_s)

In [7]:
#on policy sarsa
#on policy sarsa with epsilon greedy much worse than greedy

action_space=[-0.2,-0.1,0,0.1,0.2]
cost, cum_reward = {}, {}
gamma=0.9
alpha=0.1
na=5
from collections import defaultdict
state_action=defaultdict()
w=np.ones((5,24))
for ep in range(10):
    q=[]
    states=[]
    state = env.reset()
    state=state[0][:24]
    states.append(state)
    done = False

    
    q_s_a_s=get_q_values(state,w,5)
    ac=np.argmax(q_s_a_s)
    action=action_space[ac]
    q.append(np.max(q_s_a_s))
    while not done:
        next_state, reward, done, _ = env.step([[action]])
        reward = reward_function(reward)[0] 
        
        p=np.argmax(state)
        state_action[int(p)]=action
        next_state = next_state[0][:24]
        
        q_s_dash_a_s=get_q_values(next_state,w,5)
        
        epsilon=np.random.rand(1)
        temp=np.zeros((5,24))
        temp[ac,:]=state
        
        if epsilon<=0.9:
            ac=np.argmax(q_s_dash_a_s)
            action_dash=action_space[ac]
            q_s_dash=np.max(q_s_dash_a_s)
            
            
        else:
            ac=np.random.choice(na)
            action_dash=action_space[ac]
            q_s_dash=q_s_dash_a_s[ac]
       
    
        q_s=q[-1]
        w=w+alpha*(reward+gamma*q_s_dash-q_s)*(temp)
        
        q.append(q_s_dash)
        state=next_state
        action=action_dash
 
    cost[ep] = env.cost()
    print(cost[ep])
        
        
    
    

196.21583747489592
195.78251600096382
201.95487124088402
203.23997682765048
205.2194236510215
204.78875581525443
207.56207460807508
205.04493393345956
202.22061465387577
205.0604728141771


In [8]:
#on policy sarsa greedy
#on policy sarsa with epsilon greedy much worse than greedy
cost, cum_reward = {}, {}
gamma=0.9
alpha=0.1
na=5
from collections import defaultdict
state_action=defaultdict()
w=np.random.rand(24)
for ep in range(100):
    q=[]
    states=[]
    state = env.reset()
    state=state[0][:24]
    p=np.argmax(state)
    states.append(state)
    done = False
    
    
    x_s_a=generate_state_action_matrix(state,5,24)
    #print(x_s_a)
    q_s_a_s=get_q_values(state,w,5)
    print(q_s_a_s)
    action=action_space[np.argmax(q_s_a_s[0])]
    #print(action)
    x=x_s_a[:,np.argmax(q_s_a_s[0])]
    #print(x)
    q.append(np.max(q_s_a_s[0]))
    while not done:
        next_state, reward, done, _ = env.step([[action]])
        reward = reward_function(reward)[0] 
        #print('rea',reward)
        p=np.argmax(state)
        print('p',p)
        print(action)
        state_action[int(p)]=action
        next_state = next_state[0][:24]
        
        x_s_dash_a=generate_state_action_matrix(state,5,24)
        q_s_dash_a_s=get_q_values(next_state,w,5)
        temp=np.zeros((5,24))
        temp[ac,:]=state
        epsilon=np.random.rand(1)
        if epsilon<=1:
            action_dash=action_space[np.argmax(q_s_dash_a_s[0])]
            q_s_dash=np.max(q_s_dash_a_s[0])
            x_=x_s_dash_a[:,np.argmax(q_s_dash_a_s[0])]
            
        else:
            ac=np.random.choice(na)
            action_dash=action_space[ac]
            q_s_dash=q_s_dash_a_s[0][ac]
            x_=x_s_dash_a[:,ac]
        #print(np.matmul(w,x))
        #print(np.matmul(w,x_s_dash_a[np.argmax(q_s_dash_a_s)]))
        q_s=q[-1]
        w=w+alpha*(reward+gamma*q_s_dash-q_s)*(temp)
        print(w)
        q.append(q_s_dash)
        
        action=action_dash
        x=x_
    cost[ep] = env.cost()
    print(cost[ep])
        
        
    
    

ValueError: cannot reshape array of size 24 into shape (5,24)

In [16]:
#Qlearning
cost, cum_reward = {}, {}
gamma=0.9
alpha=0.1
na=5
w=np.ones((5,24))
for ep in range(1):
    print(ep)
    q=[]
    states=[]
    state = env.reset()
    state=state[0][:24]
    states.append(state)
    done = False
    
    q_s_a_s=get_q_values(state,w,5)
    ac=np.argmax(q_s_a_s)
    action=action_space[ac]
    q.append(np.max(q_s_a_s))

    while not done:
        
        next_state, reward, done, _ = env.step([[action]])
        reward = reward_function(reward)[0] 
        p=np.argmax(state)
        state_action[int(p)]=action
        next_state = next_state[0][:24]
        
        x_s_dash_a=generate_state_action_matrix(state,5,24)
        q_s_dash_a_s=get_q_values(next_state,w,5)
        
        epsilon=np.random.rand(1)
        action_dash=action_space[np.argmax(q_s_dash_a_s[0])]
        q_s_dash=np.max(q_s_dash_a_s[0])
        x_=x_s_dash_a[:,np.argmax(q_s_dash_a_s[0])]
        q_s=q[-1]
        #print(w)
        temp=np.zeros((5,24))
        temp[ac,:]=state
        w=w+alpha*(reward+gamma*q_s_dash-q_s)*(temp)
        #print(reward+gamma*q_s_dash-q_s)

        q.append(q_s_dash)
        
        action=action_dash
        x=x_
      
    cost[ep] = env.cost()
    print(cost[ep])
        
        
    
    

[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
-0.10007085694598639
[[1.         1.         1.         1.         1.         1.
  1.         1.         1.         1.         1.         1.
  1.         1.         1.         1.         1.         1.
  1.         1.         0.98999291 1.         1.         1.        ]
 [1.         1.         1.         1.         1.         1.
  1.         1.         1.         1.         1.         1.
  1.         1.         1.         1.         1.         1.
  1.         1.         1.         1.         1.         1.        ]
 [1.         1.         1.         1.         1.         1.
  1.         1.         1.         1.        

   1.          1.          1.          1.          1.          1.        ]]
-0.1003720722864484
[[ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.         -0.60412458  1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.  

   1.          1.          1.          1.          1.          1.        ]]
-0.10000144699073077
[[ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.         -1.99654912  1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1. 

[[ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.         -3.55794279  1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.        

[[ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.         -4.62117736  1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.        

-0.10001668213363579
[[ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.         -5.42218332  1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1

   1.          1.          1.          1.          1.          1.        ]]
-0.10020583596458066
[[ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.         -6.51461861  1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1. 

   1.          1.          1.          1.          1.          1.        ]]
-0.10034975033511939
[[ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.         -7.44129879  1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1. 

-0.10085303010522839
[[ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.         -8.18991457  1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1

   1.          1.          1.          1.          1.          1.        ]]
-0.10011407321569732
[[ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.         -8.94904369  1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1. 

-0.10130350912018216
[[ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.         -9.69852311  1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.        ]
 [ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.          1.          1.
   1.          1.          1

    1.           1.           1.           1.        ]]
-0.1011264516802719
[[  1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
  -10.64568566   1.           1.           1.        ]
 [  1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.        ]
 [  1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.        ]
 [  1.           1.           1.           1.      

    1.           1.           1.           1.        ]]
-0.10004990166829297
[[  1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
  -11.31685251   1.           1.           1.        ]
 [  1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.        ]
 [  1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.           1.
    1.           1.           1.           1.        ]
 [  1.           1.           1.           1.     

KeyboardInterrupt: 

In [10]:
#Qlearning
cost, cum_reward = {}, {}
gamma=0.9
alpha=0.1
na=5
w=np.ones((5,24))
for ep in range(10):
    q=[]
    states=[]
    state = env.reset()
    state=state[0][:24]
    states.append(state)
    done = False
    
    q_s_a_s=get_q_values(state,w,5)
    ac=np.argmax(q_s_a_s)
    action=action_space[ac]
    q.append(np.max(q_s_a_s))
  
    while not done:
       
        
        next_state, reward, done, _ = env.step([[action]])
        reward = reward_function(reward)[0] 
        p=np.argmax(state)
        #print(p)
        state_action[int(p)]=action
        next_state = next_state[0][:24]
        
        q_s_dash_a_s=get_q_values(next_state,w,5)
        
        epsilon=np.random.rand(1)
        temp=np.zeros((5,24))
        temp[ac,:]=state
        
        if epsilon<=1:
            ac=np.argmax(q_s_dash_a_s)
            action_dash=action_space[ac]
            
            
            
        else:
            ac=np.random.choice(na)
            action_dash=action_space[ac]
            
        q_s_dash=np.max(q_s_dash_a_s)
       
    
        q_s=q[-1]
        w=w+alpha*(reward+gamma*q_s_dash-q_s)*(temp)
        
        q.append(q_s_dash)
        state=next_state
        action=action_dash
        
        
    cost[ep] = env.cost()
    print(cost[ep])
        
        
    
    

186.97245715463265
187.8509799123384
187.8105008969532
188.22908483199066
188.41980155063862
186.8335283481115
186.73450218795173
196.09083106123617
188.8628401889626
198.9785698286455
