In [None]:
import numpy as np
import pickle

from stable_baselines3 import PPO
import environment 

In [2]:
# Electricity rates
def func_price(i):
    # $/kWh
    hour = (i//6)%24
    if hour<7:
        return 0.082
    elif (hour>=7)&(hour<11):
        return 0.113
    elif (hour>=11)&(hour<17):
        return 0.17
    elif (hour>=17)&(hour<19):
        return 0.113
    else:
        return 0.082

# HVAC COP
def func_COP(To):
    return -2/25 * To + 6

In [3]:
# import data
with open('HW2_Data.pickle', 'rb') as f:
    Data = pickle.load(f)
To = Data['To'][:6*24*2] # outdoor air temperature in degC
q_solar = Data['q_solar'][:6*24*2] # absorbed solar radiation in W
u_dist = np.hstack((q_solar[:,None], To[:,None]))

# get price and COP
Price = np.array([func_price(i) for i in range(q_solar.shape[0])])
Price = np.hstack((Price,Price))
COP = np.array([func_COP(i) for i in To])
COP = np.hstack((COP,COP))

# import building model
with open('HW2_SS-Model.pickle', 'rb') as f:
    ss_model = pickle.load(f)
Ad = ss_model['Ad']
Bd = ss_model['Bd']
Cd = ss_model['Cd']
Dd = ss_model['Dd']

Bd_HVAC = Bd[:,:1]
Bd_dist = Bd[:,1:]
Dd_HVAC = Dd[:1]
Dd_dist = Dd[1:]

In [4]:
# create an environment using the building model
Environment = environment.SimpleEnv(Ad, Bd_HVAC, Bd_dist,
                                    Cd, Dd_HVAC, Dd_dist,
                                    COP, Price, u_dist)

In [None]:
model = PPO("MultiInputPolicy", Environment, verbose=1, learning_rate=0.00001, gamma=0.999)
model.learn(total_timesteps=30_000_000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 288      |
|    ep_rew_mean     | -181     |
| time/              |          |
|    fps             | 1916     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 2048     |
---------------------------------


<stable_baselines3.ppo.ppo.PPO at 0x23288b822a0>

In [7]:
model.save("trained_RL")

In [None]:
# vec_env = model.get_env()
# obs_history=[]
# act_history=[]
# obs = vec_env.reset()
# obs_history.append(obs)
# for i in range(288):
#     action, _state = model.predict(obs, deterministic=True)
#     obs, reward, done, info = vec_env.step(action)
#     obs_history.append(obs)
#     act_history.append(action)

In [None]:
# result = {'obs_history':obs_history,
#           'act_history':act_history}
# with open('tmp4.pickle', 'wb') as f:
#     pickle.dump(result, f)