In [1]:
import torch
import torchmetrics as tm

from config import *
from stages import *
from train import *

from data.util import crop_q_between, split_weekdays_and_weekends

%reload_ext autoreload
%autoreload 2

In [2]:
mse = tm.MeanSquaredError().to(CONFIG.device)
mape = tm.MeanAbsolutePercentageError().to(CONFIG.device)

Load data

In [3]:
mat_q = CONFIG.load('mat_q.pt')
mat_q.shape

torch.Size([35040, 472])

Split data

##### Compare MSE and MAPE losses for different parameter configurations

Method 1: Pre-train RBMs + attach KELM

In [15]:
# Weekday/Weekend > P > N := MSE
results_1 = {}

for P in range(3125, 28125, 3125):
    CONFIG.spectral_threshold = P
    dbn = get_pre_trained_dbn(CONFIG, print_each=0)
    for N in range(4, 36, 4):
        CONFIG.dbn_hidden_layer_sizes = [N, N, N]

        mat_q_trend, mat_q_resid = preprocess_data(P, mat_q)
        mat_c, mat_x = compress_data(
            mat_q_resid.abs(), CONFIG.read_period, CONFIG.train_period, CONFIG.alpha)

        mat_c_wd, mat_c_we = split_weekdays_and_weekends(
            mat_c, CONFIG.train_period[0])

        train_c, val_c, test_c = split_train_val_test(
            mat_c_we, *CONFIG.data_split)
        train_dataset = SlidingWindowDataset(
            train_c.T, CONFIG.time_window_length, 1)
        kelm = fit_kelm_to_dbn(dbn, train_dataset)

        test_dataloader = DataLoader(SlidingWindowDataset(
            train_c.T, CONFIG.time_window_length, 1))
        mse_loss = 0.
        n_samples = 0
        for X, y in test_dataloader:
            n_samples += 1
            pred = dbn(X).squeeze()
            pred = kelm(pred).T

            mse_loss += mse(pred, y).item()
        mse_loss /= n_samples
        print(f'P={P}, N={N}, Loss={mse_loss}')

P=3125, N=4, Loss=21906222.76
P=3125, N=8, Loss=21906222.76
P=3125, N=12, Loss=21906222.76
P=3125, N=16, Loss=21906222.76
P=3125, N=20, Loss=21906222.76
P=3125, N=24, Loss=21906222.76
P=3125, N=28, Loss=21906222.76
P=3125, N=32, Loss=21906222.76
P=28125, N=4, Loss=7449782.47
P=28125, N=8, Loss=7449782.47
P=28125, N=12, Loss=7449782.47
P=28125, N=16, Loss=7449782.47
P=28125, N=20, Loss=7449782.47
P=28125, N=24, Loss=7449782.47
P=28125, N=28, Loss=7449782.47
P=28125, N=32, Loss=7449782.47
P=3125, N=4, Loss=30824.5703125
P=3125, N=8, Loss=30824.5703125
P=3125, N=12, Loss=30824.5703125
P=3125, N=16, Loss=30824.5703125
P=3125, N=20, Loss=30824.5703125
P=3125, N=24, Loss=30824.5703125


KeyboardInterrupt: 

Method 2: Pre-train DBN + Train DBN attaching KELM on each step

In [12]:
class Echo(torch.nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
    
    def forward(x):
        return x

random_dataset = BinaryVectorDataset(1000, 10)
print(len(random_dataset))

X = []
y = []

for X_, y_ in DataLoader(random_dataset):
    X.append(X_.to(CONFIG.device))
    y.append(y_.to(CONFIG.device))

X = torch.concat(X)
y = torch.concat(y)

kelm = KELM()
kelm.fit(X, y)
mse(kelm(X[4:8]).squeeze(), y[4:8].squeeze())

1000


tensor(2.4297e-06, device='cuda:0')