In [1]:
# infer GPU in use
!nvidia-smi

Tue Jul  9 05:38:15 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.154.05             Driver Version: 535.154.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          Off | 00000000:07:00.0 Off |                    0 |
| N/A   22C    P0              61W / 400W |      0MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
import torch
import numpy as np
import os, sys
import matplotlib.pyplot as plt

# data file paths
filepaths = [f"/home/sbose/time-series-forecasting-federation/data/NREL{s}dataset.npz" for s in ['CA','IL','NY']]

# in this notebook, we will analyse the data files and try to see how we can make a custom data loader for the same.

data_y_s = np.load('/lcrc/project/NEXTGENOPT/NREL_COMSTOCK_DATA/grouped/G4601010_data.npz')
data_x_u = np.load('/lcrc/project/NEXTGENOPT/NREL_COMSTOCK_DATA/grouped/G4601010_weather.npz')

In [3]:
# here we write some basic code which allows the creation of a dataloader

import torch
from torch.utils.data import Dataset
from typing import Union, List, Tuple
from itertools import combinations

class LFDataset(Dataset):
    
    def __init__(
        self,
        data_y_s: np.array,
        data_x_u: np.array,
        lookback: int,
        lookahead: int,
        client_idx: int,
        idx_x: Union[List,Tuple],
        idx_u: Union[List,Tuple],
        dtype: torch.dtype = torch.float32
    ):
        
        # sanity checks
        assert lookback > 0, "Cannot have non-positive lookback!"
        assert lookahead > 0, "Cannot have non-positive lookahead!"
        assert client_idx < data_y_s['load'].shape[0], "Client index exceeds number of clients present."
        assert len(idx_x)+len(idx_u) == data_x_u['wdata'].shape[0], "Indices provided do not sum upto the input dimension."
        assert all(not set(a) & set(b) for a, b in combinations([idx_x, idx_u], 2)), "All indices are not mutually exclusive."
        
        # save inputs
        self.load = data_y_s['load'][client_idx,:]
        self.static = data_y_s['static'][client_idx,:]
        self.x, self.u = data_x_u['wdata'][idx_x,:], data_x_u['wdata'][idx_u,:]
        self.idx_x, idx_u = idx_x, idx_u
        self.lookback, self.lookahead = lookback, lookahead
        self.dtype = dtype
        
        # max length
        self.maxlen = self.load.shape[0] - lookback - lookahead + 1
        
    def __len__(self):
        
        return self.maxlen
    
    def __getitem__(self, idx):
        
        y_past = torch.tensor(self.load[idx:idx+self.lookback][:,None], dtype=self.dtype)
        x_past = torch.tensor(self.x[:,idx:idx+self.lookback].T, dtype=self.dtype)
        u_past = torch.tensor(self.u[:,idx:idx+self.lookback].T, dtype=self.dtype)
        u_future = torch.tensor(self.u[:,idx+self.lookback:idx+self.lookback+self.lookahead].T, dtype=self.dtype)
        s_past = torch.tensor(self.static[None,:].repeat(self.lookback,axis=0), dtype=self.dtype)
        y_target = torch.tensor(self.load[idx+self.lookback+self.lookahead-1].reshape((1,)), dtype=self.dtype)
        y_all_target = torch.tensor(self.load[idx+self.lookback:idx+self.lookback+self.lookahead][:,None], dtype=self.dtype)
        
        inp = (y_past,x_past,u_past,s_past,u_future)
        lab = (y_target, y_all_target)
        
        return inp, lab

In [4]:
# We now test out our dataset with california data

# create dataset
CA_dset = LFDataset(
    data_y_s = data_y_s,
    data_x_u = data_x_u,
    lookback = 8,
    lookahead = 4,
    client_idx = 0,
    idx_x = [0,1],
    idx_u = [2,3],
    dtype = torch.float32
)

# load into dataloader
from torch.utils.data import DataLoader
CA_dataloader = DataLoader(CA_dset, batch_size = 32, shuffle = True)

In [5]:
# Test out the shape of the dataloader outputs

for cidx, (a,b) in enumerate(CA_dataloader):
    print(f"On {cidx+1}th item of dataloader, type of a is {type(a)}, type of b is {type(b)}")
    for idx,itm in enumerate(a):
        print(f"Shape of {idx+1}th item in a is {itm.shape}.")
    for idx,itm in enumerate(b):
        print(f"Shape of {idx+1}th item in b is {itm.shape}.")
    break

On 1th item of dataloader, type of a is <class 'list'>, type of b is <class 'list'>
Shape of 1th item in a is torch.Size([32, 8, 1]).
Shape of 2th item in a is torch.Size([32, 8, 2]).
Shape of 3th item in a is torch.Size([32, 8, 2]).
Shape of 4th item in a is torch.Size([32, 8, 3]).
Shape of 5th item in a is torch.Size([32, 4, 2]).
Shape of 1th item in b is torch.Size([32, 1]).
Shape of 2th item in b is torch.Size([32, 4, 1]).


In [6]:
# Ensure that relative imports from the git repository can always be found

import sys
sys.path.insert(0,'/home/sbose/time-series-forecasting-federation')

In [7]:
# test out LSTM vanilla version

import torch
import torch.nn as nn
from models.LSTM.LSTMFCDecoder import LSTMFCDecoder

model = LSTMFCDecoder(
    input_size = 8,
    hidden_size = 20,
    num_layers = 2,
    y_size = 1,
    fcnn_sizes = (160,10,10,1),
    activation = nn.ReLU,
    lookback = 8,
    lookahead = 4,
    dtype = torch.float32
)

# evaluate the model
for a,b in CA_dataloader:
    w = model(a)
    print(f"Shape of LSTM output is {tuple(w.shape)}.")
    break

Shape of LSTM output is (32, 1).


In [8]:
# test out LSTM autoregressive version

import torch
import torch.nn as nn
from models.LSTM.LSTMAR import LSTMAR

model = LSTMAR(
    input_size = 8,
    u_size = 2,
    hidden_size = 20,
    num_layers = 2,
    y_size = 1,
    fcnn_sizes = (20,10,10,1),
    activation = nn.ReLU,
    lookahead = 4,
    dtype = torch.float32
)

# evaluate the model
for a,b in CA_dataloader:
    w = model(a)
    print(f"Shape of LSTM output is {tuple(w.shape)}.")
    break

Shape of LSTM output is (32, 4, 1).
