In [12]:
from typing import Tuple
from datetime import datetime

import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset

from config import *

from data.util import split_weekdays_and_weekends, crop_q_between_dates

from prediction_models.dbn import *
from prediction_models.kelm import *

%reload_ext autoreload
%autoreload 2

In [4]:
DBN_LAYER_SIZES = CONFIG['DBN_LAYER_SIZES']
DBN_HIDDEN_LAYER_SIZES = CONFIG['DBN_HIDDEN_LAYER_SIZES']
GIBBS_SAMPLING_STEPS = CONFIG['GIBBS_SAMPLING_STEPS']
READ_START_DATE = datetime.strptime(CONFIG['READ_START_DATE'], DATE_FORMAT)
READ_END_DATE = datetime.strptime(CONFIG['READ_END_DATE'], DATE_FORMAT)
TRAIN_START_DATE = datetime.strptime(CONFIG['TRAIN_START_DATE'], DATE_FORMAT)
TRAIN_END_DATE = datetime.strptime(CONFIG['TRAIN_END_DATE'], DATE_FORMAT)
TRAIN_TIME_WINDOW_SIZE = CONFIG['TRAIN_TIME_WINDOW_SIZE']

### Pre-train RBMs inside DBN

Train to reconstruct all possible combinations of 0's and 1's

#### Create a dataset

In [5]:
class BinaryVectorDataset(Dataset):
    def __init__(self, n_bits: int):
        self.n_bits = n_bits
        self.format_str = f'{{0:0{self.n_bits}b}}'
    
    def __len__(self) -> int:
        return 2 ** self.n_bits
    
    def __getitem__(self, index: int) \
            -> Tuple[torch.TensorType, torch.TensorType]:
        bin_str = self.format_str.format(index)
        return torch.tensor([float(c) for c in bin_str]), torch.tensor([index,])

dbn_pre_train_loader = DataLoader(BinaryVectorDataset(INPUT_SIZE))

#### Create and pre-train DBN

In [11]:
dbn = DBN(INPUT_SIZE, HIDDEN_LAYER_SIZES, k=GIBBS_SAMPLING_STEPS)
pre_train_dbm(dbn, dbn_pre_train_loader, print_every=1)

Epoch 0, Machine 0:	Loss: -1.9836238622665405
Epoch 0, Machine 1:	Loss: -0.3505793809890747
Epoch 0, Machine 2:	Loss: -0.1488177478313446
Epoch 1, Machine 0:	Loss: -2.5248847007751465
Epoch 1, Machine 1:	Loss: -0.44476449489593506
Epoch 1, Machine 2:	Loss: -0.6324977874755859
Epoch 2, Machine 0:	Loss: -2.6716198921203613
Epoch 2, Machine 1:	Loss: -0.5382086038589478
Epoch 2, Machine 2:	Loss: -0.7409056425094604
Epoch 3, Machine 0:	Loss: -2.728278875350952
Epoch 3, Machine 1:	Loss: -0.5518969297409058
Epoch 3, Machine 2:	Loss: -0.8258986473083496
Epoch 4, Machine 0:	Loss: -2.7757086753845215
Epoch 4, Machine 1:	Loss: -0.5673878192901611
Epoch 4, Machine 2:	Loss: -0.9229025840759277
Epoch 5, Machine 0:	Loss: -2.8171348571777344
Epoch 5, Machine 1:	Loss: -0.6019374132156372
Epoch 5, Machine 2:	Loss: -0.9747148752212524
Epoch 6, Machine 0:	Loss: -2.8436872959136963
Epoch 6, Machine 1:	Loss: -0.6580054759979248
Epoch 6, Machine 2:	Loss: -0.9391961097717285
Epoch 7, Machine 0:	Loss: -2.85986

DBN(
  (rbms): ModuleList(
    (0-2): 3 x RBM()
  )
)

### Prepare training dataset

Load the matrix $E_t$ constructed from $Q$

In [8]:
mat_q_resid = torch.load(out_path('mat_q_resid.pt'))
mat_q_resid = crop_q_between_dates(mat_q_resid, READ_START_DATE, READ_END_DATE, TRAIN_START_DATE, TRAIN_END_DATE)
mat_q_resid.shape

torch.Size([2880, 472])

Split $Q$ into workdays and weekends data

In [10]:
mat_q_wd, mat_q_we = split_weekdays_and_weekends(mat_q, TRAIN_START_DATE, TRAIN_END_DATE)
assert mat_q_wd.shape[1] == mat_q_we.shape[1] == mat_q.shape[1]
assert mat_q_wd.shape[0] + mat_q_we.shape[0] == mat_q.shape[0]
mat_q_wd.shape, mat_q_we.shape

(torch.Size([2112, 472]), torch.Size([768, 472]))

#### Train prediction of the first section

Define the loss function

In [13]:
loss_fn = nn.MSELoss()