In [15]:
import torch
import numpy as np
import os, sys
import matplotlib.pyplot as plt

# data file paths
filepaths = [f"/home/sbose/time-series-forecasting-federation/data/NREL{s}dataset.npz" for s in ['CA','IL','NY']]

# in this notebook, we will analyse the data files and try to see how we can make a custom data loader for the same.

data_CA = np.load(filepaths[0])['data']
data_IL = np.load(filepaths[1])['data']
data_NY = np.load(filepaths[2])['data']

In [16]:
# here we write some basic code which allows the creation of a dataloader

import torch
from torch.utils.data import Dataset
from typing import Union, List, Tuple
from itertools import combinations

class LFDataset(Dataset):
    
    def __init__(
        self,
        data: np.array,
        lookback: int,
        lookahead: int,
        idx_y: Union[List,Tuple],
        idx_x: Union[List,Tuple],
        idx_u: Union[List,Tuple],
        idx_s: Union[List,Tuple],
        dtype: torch.dtype = torch.float32
    ):
        
        # sanity checks
        assert len(data.shape) == 2, "Incorrect number of dimensions in data."
        assert len(idx_y) > 0, "Cannot forecast indices of size 0"
        assert lookback > 0, "Cannot have non-positive lookback!"
        assert lookahead > 0, "Cannot have non-positive lookahead!"
        assert len(idx_y)+len(idx_x)+len(idx_u)+len(idx_s) == data.shape[1], "Indices provided do not sum upto the input dimension."
        assert all(not set(a) & set(b) for a, b in combinations([idx_y, idx_x, idx_u, idx_s], 2)), "All indices are not mutually exclusive."
        assert data.shape[0] >= lookback+lookahead, "Data too short to generate even 1 sample!"
        
        # save inputs
        self.data, self.dtype = data, dtype
        self.lookback, self.lookahead = lookback, lookahead
        self.idx_y, self.idx_x, self.idx_u, self.idx_s = idx_y, idx_x, idx_u, idx_s
        
        # generate datas
        self.records = []
        for tidx in range(self.data.shape[0]-self.lookback-self.lookahead+1):
            y_past = torch.tensor(self.data[tidx:tidx+lookback,idx_y], dtype=self.dtype)
            x_past = torch.tensor(self.data[tidx:tidx+lookback,idx_x], dtype=self.dtype)
            u_past = torch.tensor(self.data[tidx:tidx+lookback,idx_u], dtype=self.dtype)
            s_past = torch.tensor(self.data[tidx:tidx+lookback,idx_s], dtype=self.dtype)
            y_target = torch.tensor(self.data[tidx+lookback+lookahead-1,idx_y], dtype=self.dtype)
            y_all_target = torch.tensor(self.data[tidx+lookback:tidx+lookback+lookahead,idx_y], dtype = dtype)
            u_future = torch.tensor(self.data[tidx:tidx+lookback:tidx+lookback+lookahead,idx_u], dtype=self.dtype)
            self.records.append((y_past,x_past,u_past,s_past,y_target,y_all_target,u_future))
        
    def __len__(self):
        
        return len(self.records)
    
    def __getitem__(self, idx):
        
        record = self.records[idx]
        y_past, x_past, u_past, s_past, y_target, y_all_target, u_future = record
        inp = (y_past,x_past,u_past,s_past,u_future)
        lab = (y_target, y_all_target)
        
        return inp, lab

In [17]:
# We now test out our dataset with california data

# create dataset
data_CA_truncated = data_CA[0,:,:]
CA_dset = LFDataset(
    data = data_CA_truncated,
    lookback = 8,
    lookahead = 4,
    idx_y = [0],
    idx_x = [3,4],
    idx_u = [1,2],
    idx_s = [5,6,7],
    dtype = torch.float32
)

# load into dataloader
from torch.utils.data import DataLoader
CA_dataloader = DataLoader(CA_dset, batch_size = 32, shuffle = True)

In [18]:
# for cidx, (a,b) in enumerate(CA_dataloader):
#     print(f"On {cidx+1}th item of dataloader, type of a is {type(a)}, type of b is {type(b)}")
#     for idx,itm in enumerate(a):
#         print(f"Shape of {idx+1}th item in a is {itm.shape}.")
#     for idx,itm in enumerate(b):
#         print(f"Shape of {idx+1}th item in b is {itm.shape}.")

In [19]:
# test out LSTM vanilla version

import sys
import torch
import torch.nn as nn
sys.path.insert(1,os.getcwd()+'/time-series-forecasting-federation')
from models.LSTM.LSTMFCDecoder import LSTMFCDecoder

model = LSTMFCDecoder(
    input_size = 8,
    hidden_size = 20,
    num_layers = 2,
    y_size = 1,
    fcnn_sizes = (160,10,10,1),
    activation = nn.ReLU,
    lookback = 8,
    lookahead = 4,
    dtype = torch.float32
)

# evaluate the model
for a,b in CA_dataloader:
    w = model(a)
    print(f"Shape of LSTM output is {tuple(w.shape)}.")
    break

Shape of LSTM output is (32, 1).
