# 1. Discretize the state space

In [None]:
import numpy as np
from LSTM import LSTM
from System import Plant
from Policy import Estimator
import warnings
from tqdm import tqdm
warnings.filterwarnings('ignore')
np.random.seed(0)

resolution = 0.1 #unit: kW 
control_resolution = 1 # TODO

c_dis = np.arange(start=0, stop=7.01, step=resolution) # TODO kW`h should has larger resolution
p_dis = np.arange(start=0, stop=15.01, step=resolution) 
l_dis = np.arange(start=0, stop=10.01, step=resolution)
u_dis = np.arange(start=-5, stop=4, step=control_resolution)

n = c_dis.shape[0] * p_dis.shape[0] * l_dis.shape[0]
print("Size of state space: {}".format(n))
print("Size of action space: {}".format(u_dis.shape[0]))

# 2. Generate transition matrix

In [None]:
def close_idx(list, target):
    '''
    find closest index in list to target
    '''
    return np.argmin(np.abs(list - target))

def get_idx(p_idx, c_idx, l_idx):
    '''
    encode p_idx, c_idx, l_idx to a single integer
    '''
    return (p_idx * c_dis.shape[0] * l_dis.shape[0] + 
            c_idx * l_dis.shape[0]                  +
            l_idx)
    
def get_pcl(idx):
    '''
    decode p_idx, c_idx, l_idx from a single integer
    '''
    p_idx = idx // (c_dis.shape[0] * l_dis.shape[0])
    c_idx = (idx % (c_dis.shape[0] * l_dis.shape[0])) // l_dis.shape[0]
    l_idx = idx % l_dis.shape[0]
    return [p_dis[p_idx], c_dis[c_idx], l_dis[l_idx]]

In [None]:
dt = 1/30 # hour
n_sample = 100 #number of monte carlo samples


P_trans = {}
plant = Plant(dt=dt)
estimator = Estimator()
std = 0.1 # variation of estimated load

for l_idx, l in enumerate(l_dis):
    l_hat = estimator.estimate(l)
    # l_hat = l
    l_hats = np.random.normal(loc=l_hat, scale=std, size=n_sample)
    for p_idx, p in enumerate(tqdm(p_dis)):
        for c_idx, c in enumerate(c_dis):
            for u_idx, u in enumerate(u_dis):
                for l_hat in l_hats:
                    state_idx = get_idx(p_idx, c_idx, l_idx)
                    plant.reset(c, p, l)
                    plant.step(u, l_hat)
                    hit_state_idx = get_idx(close_idx(p_dis, plant.p),
                                            close_idx(c_dis, plant.battery.c),
                                            close_idx(l_dis, plant.l))
                    if P_trans.__contains__((state_idx, u_idx, hit_state_idx)):
                        P_trans[state_idx, u_idx, hit_state_idx] += (1/n_sample)
                    else:
                        P_trans[state_idx, u_idx, hit_state_idx] = (1/n_sample)

Save P_trans dic (super large, takes much time):

In [None]:
import pickle
P_trans_file = open("P_trans.pkl", "wb")
pickle.dump(P_trans, P_trans_file)
P_trans_file.close()

Ensure the transition matrix is valid:

In [None]:
state_idx = 0
u_idx = 0

sum = 0
for j in range(n):
    if P_trans.__contains__((state_idx, u_idx, j)):
        sum += P_trans[state_idx, u_idx, j]

print(sum)

# 3. Dynamic programming

In [None]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

P_trans_tensor = torch.sparse_coo_tensor(list(zip(*P_trans.keys())), list(P_trans.values()), size=(n, u_dis.shape[0], n), device=device)

T  = 360
J = torch.tensor([get_pcl(i)[0] for i in range(n)], device=device).float()
Actions = torch.zeros(size=(T, n), device=device)

for t in range(T-1, 0, -1):
    for i in tqdm(range(n)): 
        J[i], Actions[t][i] = torch.min(P_trans_tensor[i] @ J, dim=0)

In [None]:
print(Actions.shape)

# 4. Testing with real data

In [None]:
device = 'cpu'
Actions = Actions.to(device=device)

In [None]:
import pandas as pd

df = pd.read_csv('data.csv')  # read df from csv file

df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format='%d-%m-%Y %H;%M:%S')
df = df.drop(columns=['Date', 'Time'])
df.set_index('Datetime', inplace=True)

print(df[:5])

In [None]:
import matplotlib.pyplot as plt
import datetime

simu_time = 12 # hour
simu_step = int(simu_time / dt)
cur_time = df.index[0] #2007-10-01 00:00:00 

peaks = []
loads = []
times = []

env = Plant(dt=dt)
p, c, l = env.reset(init_battery=0, init_peak=0, init_load=0)
for i in range(simu_step):
    real_load = df.loc[cur_time, 'Load']
    state_idx = get_idx(close_idx(p_dis, p), close_idx(c_dis, c), close_idx(l_dis, real_load))
    action_idx = int(Actions[i%T][state_idx].item())
    p, c, l = env.step(u_dis[action_idx], real_load)
    print("action: {}, c: {}, load: {}".format(u_dis[action_idx], c, l))
    cur_time += datetime.timedelta(hours=1/30)
    times.append(cur_time)
    peaks.append(p)
    loads.append(l)

fig, axs = plt.subplots(1, figsize=(16, 8))
axs.plot(times, peaks, label='Peak')
axs.plot(times, loads, label='Real Load')
axs.legend()
plt.show()