In [9]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
import matplotlib.dates as mdates
import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.loggers.csv_logs import CSVLogger

In [10]:
class TimeSeriesDataset(Dataset):
    def __init__(self, data, train_window):
        self.data = data
        self.sequences = self.create_inout_sequences(data, train_window)

    def create_inout_sequences(self, input_data, tw):
        inout_seq = []
        L = len(input_data)
        for i in range(L-tw):
            train_seq = input_data[i:i+tw]
            train_label = input_data[i+tw:i+tw+1]
            inout_seq.append((train_seq, train_label))
        return inout_seq

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx]

In [11]:
class ColoradoDataModule(pl.LightningDataModule):
  def __init__(self, seq_len = 1, batch_size = 128, num_workers=0):
    super().__init__()
    self.seq_len = seq_len
    self.batch_size = batch_size
    self.num_workers = num_workers
    self.train = None
    self.val = None
    self.test = None
    self.columns = None
    self.preprocessing = None

  def prepare_data(self):
    pass

  def setup(self, stage=None):
    if stage == 'fit' and self.train is not None:
      return 
    if stage == 'test' and self.test is not None:
      return
    if stage is None and self.train is not None and self.test is not None:  
      return

    # add colorado data preprocessing instead
    df = pd.read_csv('ColoradoData_Boulder.csv')
    df.index = df['Start_DateTime']
    df = df[['Start_DateTime', 'Energy_Consumption']].sort_index()
    df.dropna(inplace=True)
    df['Start_DateTime'] = pd.to_datetime(df['Start_DateTime'], format='%Y-%m-%d %H:%M:%S')
    df.set_index('Start_DateTime', inplace=True)

    # splitting data into train, val, test
    train_size = int(0.6 * len(df))
    validation_size = int(0.2 * len(df))
    test_size = int(0.2 * len(df))

    all_data = df['Energy_Consumption'].values.astype(float)

    train_set = all_data[:train_size]
    validation_set = all_data[train_size:train_size + validation_size]
    test_set = all_data[train_size + validation_size:]

    # scaling the data splits
    scaler = MinMaxScaler(feature_range=(-1, 1))

    if stage == 'fit' or stage is None:
      train_data_normalized = scaler.fit_transform(train_set.reshape(-1, 1))
      validation_data_normalized = scaler.fit_transform(validation_set.reshape(-1, 1))
      self.train = torch.FloatTensor(train_data_normalized).view(-1)
      self.val = torch.FloatTensor(validation_data_normalized).view(-1)

    if stage == 'test' or stage is None:
      test_data_normalized = scaler.fit_transform(test_set.reshape(-1, 1))
      self.test = torch.FloatTensor(test_data_normalized).view(-1)

  def train_dataloader(self):
    train_dataset = TimeSeriesDataset(self.train, self.seq_len)
    train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
    return train_loader

  def val_dataloader(self):
    val_dataset = TimeSeriesDataset(self.val, self.seq_len)
    val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
    return val_loader

  def test_dataloader(self):
    test_dataset = TimeSeriesDataset(self.test, self.seq_len)
    test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
    return test_loader   

In [None]:
class LSTM(nn.Module):
  def __init__(self, input_size=1, hidden_layer_size=100, output_size=1, n_features, 
                seq_len, batch_size, num_layers, dropout, learning_rate, criterion):
    super().__init__()
    self.seq_len = seq_len
    self.batch_size = batch_size
    self.num_layers = num_layers
    self.dropout = dropout
    self.criterion = criterion
    self.learning_rate = learning_rate
    self.hidden_layer_size = hidden_layer_size
    self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_layer_size, num_layers=num_layers, dropout=dropout, batch_first=True)
    self.linear = nn.Linear(hidden_layer_size, output_size)
    self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size), torch.zeros(1,1,self.hidden_layer_size))
    self.name = "LSTM"

  def forward(self, input_seq):
    lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
    predictions = self.linear(lstm_out.view(len(input_seq), -1))
    return predictions[-1]
  
  def configure_optimizers(self):
    return torch.optim.Adam(self.parameters(), lr=self.learning_rate)

  def training_step(self, batch, batch_idx):
    pass
    #https://www.kaggle.com/code/tartakovsky/pytorch-lightning-lstm-timeseries-clean-code