# Objective

* Dataset from: https://www.kaggle.com/uciml/electric-power-consumption-data-set
* Use PyTorchLightning to train and predict a model for 'Global_active_power'
    * Create a Datset for the time series
    * Create a Data Module
    * Create a model
* Use the information from the notebook 'DataAnalysis'
* The notebook has been inspired by: https://www.kaggle.com/tartakovsky/pytorch-lightning-lstm-timeseries-clean-code

# Setup

In [3]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.loggers import WandbLogger

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

# Create a Dataset

In [4]:
class PowConDataSet(Dataset):
    def __init__(self, X, y, seq_len=1):
        
        self.X = torch.tensor(X).float()
        self.y = torch.tensor(y).float()
        self.seq_len = seq_len 
        
    def __getitem__(self, idx):
        return (self.X[idx:idx+self.seq_len-1], self.y[idx+self.seq_len-1])
        
    def __len__(self):
        return self.X.__len__() - (self.seq_len-1)       

# Create a Data Module

In [5]:
class PowConDataModule(pl.LightningDataModule):
    
    def __init__(self, seq_len = 1, batch_size = 128, num_workers=8):
        super().__init__()
        self.seq_len = seq_len
        self.batch_size = batch_size
        self.num_workers = num_workers
    
    def setup(self, stage):
        '''
        * read Data
        * 'Date' and 'Time' columns are merged into 'date' index
        * convert all to float and delete nans
        * resampled to hourly intervals
        * define X (features) and y (lables)
        '''
        # read data
        filepath = '../data/household_power_consumption.txt'
        df_powcon = pd.read_csv(filepath, sep=';',
                        parse_dates={'date':['Date','Time']},
                        infer_datetime_format=True,
                        index_col='date')
        
        # change types to float (and all no number values to nan)
        for i in range(len(df_powcon.columns)):
            df_powcon.iloc[:,i] = pd.to_numeric(df_powcon.iloc[:,i], errors='coerce')
        
        # resamble to hourly means
        df_powcon = df_powcon.resample('h').mean()
        
        df_powcon.dropna(inplace=True)
        df_powcon = df_powcon.astype(float)
        
        # define features (X) and labels (y)
        y = df_powcon['Global_active_power'].values

        columns = ['Global_reactive_power', 'Voltage', 'Global_intensity', 'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']
        X = np.zeros((len(columns), len(y)))
        X = df_powcon[columns].values
        
        # train - valid - test splits
        X_tmp, self.X_test, y_tmp, self.y_test = train_test_split(X, y, shuffle=False, test_size=.2)
        self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(X_tmp, y_tmp, shuffle=False, test_size=.25)
        print('train:', self.X_train.shape, self.y_train.shape)
        print('valid:', self.X_val.shape, self.y_val.shape)
        print('test:', self.X_test.shape, self.y_test.shape)


        # normalize each column
        scaler = StandardScaler()
        scaler.fit(self.X_train)
            
        self.X_train = scaler.transform(self.X_train)
        self.X_val = scaler.transform(self.X_val)
        self.X_test = scaler.transform(self.X_test)
        self.y_train = self.y_train.reshape(-1,1)
        self.y_val = self.y_val.reshape(-1,1)
        self.y_test = self.y_test.reshape(-1,1)
        
    def train_dataloader(self):
        '''
        * no further transformation necessary
        * wrap dataset in dataloader
        '''
        # create dataset
        train_dataset = PowConDataSet(self.X_train, self.y_train, seq_len=self.seq_len)
        
        # wrap dataset in dataloader
        train_dataloader = DataLoader(train_dataset, batch_size = self.batch_size, shuffle = False, 
                                      num_workers = self.num_workers)
        
        return train_dataloader
    
    def val_dataloader(self):
        # create dataset
        val_dataset = PowConDataSet(self.X_val, self.y_val, seq_len=self.seq_len)
        
        # wrap dataset in dataloader
        val_dataloader = DataLoader(val_dataset, batch_size = self.batch_size, shuffle = False,
                                   num_workers = self.num_workers)
        
        return val_dataloader
    
    def test_dataloader(self):
        # create dataset
        test_dataset = PowConDataSet(self.X_test, self.y_test, seq_len=self.seq_len)
        
        # wrap dataset in dataloader
        test_dataloader = DataLoader(test_dataset, batch_size = self.batch_size, shuffle = False,
                                   num_workers = self.num_workers)
        
        return test_dataloader

# Create a Model

In [12]:
class PowConModel(pl.LightningModule):
    
    def __init__(self, n_features, hidden_size, seq_len,
                num_layers, dropout,
                 learning_rate, criterion):
        super().__init__()
        self.n_features = n_features
        self.hidden_size = hidden_size
        self.seq_len = seq_len
        self.num_layers = num_layers
        self.dropout = dropout
        self.learning_rate = learning_rate
        self.criterion = criterion
        
        self.lstm = nn.LSTM(input_size=n_features,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            dropout=dropout,
                            batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x) # lstm_out = (batch_size, seq_len, hidden_size)
        x = self.fc(lstm_out[:,-1])
        return x
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer
    
    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        y_hat = self.forward(x)
        loss = self.criterion(y_hat, y)
        #result = pl.TrainResult(minimize=loss)
        self.log('train_loss', loss, prog_bar=True)
        return loss

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        y_hat = self.forward(x)
        loss = self.criterion(y_hat, y)
        #result = pl.EvalResult(checkpoint_on=loss)
        self.log('val_loss', loss)
        return loss
    
    def test_step(self, test_batch, batch_idx):
        x, y = test_batch
        y_hat = self.forward(x)
        loss = self.criterion(y_hat,y)
        #result = pl.EvalResult()
        self.log('test_loss', loss)
        return loss  

# Set Parameters

In [7]:
p = dict(
    seq_len = 24,
    batch_size = 128, 
    criterion = nn.MSELoss(),
    max_epochs = 5,
    n_features = 6,
    hidden_size = 10,
    num_layers = 1,
    dropout = 0.2,
    learning_rate = 0.01,
)

# Train 

In [13]:
from fix_path import fix_python_path_if_working_locally
fix_python_path_if_working_locally()
from models.lstm import LSTMRegressor

seed_everything(42)
wandb_logger = WandbLogger(project="spatio-temporal prediction")


data_module = PowConDataModule(seq_len = p['seq_len'],
                           batch_size = p['batch_size'])

model = PowConModel(n_features = p['n_features'],
                    hidden_size = p['hidden_size'],
                    seq_len = p['seq_len'],
                    criterion = p['criterion'],
                    num_layers = p['num_layers'],
                    dropout = p['dropout'],
                    learning_rate = p['learning_rate'])

trainer = Trainer(max_epochs=p['max_epochs'], gpus=0)
trainer.fit(model, data_module)

  "num_layers={}".format(dropout, num_layers))
GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  "GPU available but not used. Set the gpus flag in your trainer `Trainer(gpus=1)` or script `--gpus=1`."
  fn(*args, **kwargs)

  | Name      | Type    | Params
--------------------------------------
0 | criterion | MSELoss | 0     
1 | lstm      | LSTM    | 720   
2 | fc        | Linear  | 11    
--------------------------------------
731       Trainable params
0         Non-trainable params
731       Total params
0.003     Total estimated model params size (MB)


train: (20500, 6) (20500,)
valid: (6834, 6) (6834,)
test: (6834, 6) (6834,)


Validation sanity check: 0it [00:00, ?it/s]

tensor([1.5841]) tensor([0.3659])
tensor([0.7418]) tensor([0.3887])


Training: 0it [00:00, ?it/s]

tensor([3.3260]) tensor([0.3822], grad_fn=<SelectBackward0>)
tensor([1.6982]) tensor([0.4764], grad_fn=<SelectBackward0>)
tensor([2.5357]) tensor([0.4981], grad_fn=<SelectBackward0>)
tensor([1.4591]) tensor([0.4960], grad_fn=<SelectBackward0>)
tensor([1.3605]) tensor([0.6612], grad_fn=<SelectBackward0>)
tensor([2.6555]) tensor([0.6280], grad_fn=<SelectBackward0>)
tensor([1.6331]) tensor([0.6202], grad_fn=<SelectBackward0>)
tensor([1.8474]) tensor([0.7419], grad_fn=<SelectBackward0>)
tensor([1.5726]) tensor([0.6845], grad_fn=<SelectBackward0>)
tensor([2.5464]) tensor([1.2417], grad_fn=<SelectBackward0>)
tensor([0.3206]) tensor([0.9861], grad_fn=<SelectBackward0>)
tensor([2.5486]) tensor([1.0717], grad_fn=<SelectBackward0>)
tensor([0.3284]) tensor([1.0648], grad_fn=<SelectBackward0>)
tensor([0.2797]) tensor([0.8224], grad_fn=<SelectBackward0>)
tensor([0.2694]) tensor([0.7906], grad_fn=<SelectBackward0>)
tensor([1.4053]) tensor([1.4960], grad_fn=<SelectBackward0>)
tensor([0.3082]) tensor(

Validating: 0it [00:00, ?it/s]

tensor([1.5841]) tensor([1.7833])
tensor([0.7418]) tensor([1.4179])
tensor([0.4031]) tensor([0.6920])
tensor([1.5727]) tensor([1.7069])
tensor([0.4598]) tensor([0.7868])
tensor([0.3609]) tensor([1.0276])
tensor([0.8862]) tensor([1.6712])
tensor([0.6603]) tensor([1.4622])
tensor([0.3223]) tensor([0.7597])
tensor([1.4837]) tensor([1.7010])
tensor([0.3734]) tensor([1.0690])
tensor([1.5351]) tensor([0.9243])
tensor([0.5969]) tensor([0.7970])
tensor([0.3928]) tensor([0.8763])
tensor([3.0504]) tensor([1.4427])
tensor([0.2807]) tensor([0.6867])
tensor([1.8418]) tensor([1.5975])
tensor([0.3923]) tensor([0.8049])
tensor([0.7017]) tensor([0.7609])
tensor([0.2618]) tensor([0.7189])
tensor([0.2907]) tensor([0.7162])
tensor([0.3284]) tensor([1.1210])
tensor([0.3462]) tensor([0.8687])
tensor([1.0652]) tensor([1.5424])
tensor([0.5058]) tensor([0.7882])
tensor([1.7689]) tensor([2.1702])
tensor([1.4982]) tensor([2.1850])
tensor([0.8277]) tensor([1.2586])
tensor([0.3212]) tensor([1.1375])
tensor([0.4049

Validating: 0it [00:00, ?it/s]

tensor([1.5841]) tensor([1.5473])
tensor([0.7418]) tensor([1.5083])
tensor([0.4031]) tensor([0.4896])
tensor([1.5727]) tensor([1.5501])
tensor([0.4598]) tensor([0.6722])
tensor([0.3609]) tensor([0.9674])
tensor([0.8862]) tensor([1.6039])
tensor([0.6603]) tensor([1.4444])
tensor([0.3223]) tensor([0.5773])
tensor([1.4837]) tensor([1.4765])
tensor([0.3734]) tensor([0.5854])
tensor([1.5351]) tensor([0.8131])
tensor([0.5969]) tensor([0.5457])
tensor([0.3928]) tensor([0.6879])
tensor([3.0504]) tensor([1.5497])
tensor([0.2807]) tensor([0.5743])
tensor([1.8418]) tensor([2.0490])
tensor([0.3923]) tensor([0.7196])
tensor([0.7017]) tensor([0.6377])
tensor([0.2618]) tensor([0.6387])
tensor([0.2907]) tensor([0.5480])
tensor([0.3284]) tensor([1.0083])
tensor([0.3462]) tensor([0.8965])
tensor([1.0652]) tensor([2.1559])
tensor([0.5058]) tensor([0.6576])
tensor([1.7689]) tensor([1.9189])
tensor([1.4982]) tensor([2.5549])
tensor([0.8277]) tensor([1.3087])
tensor([0.3212]) tensor([0.8483])
tensor([0.4049

Validating: 0it [00:00, ?it/s]

tensor([1.5841]) tensor([1.5455])
tensor([0.7418]) tensor([1.3917])
tensor([0.4031]) tensor([0.3799])
tensor([1.5727]) tensor([1.5093])
tensor([0.4598]) tensor([0.6308])
tensor([0.3609]) tensor([0.8139])
tensor([0.8862]) tensor([1.4562])
tensor([0.6603]) tensor([1.3090])
tensor([0.3223]) tensor([0.4162])
tensor([1.4837]) tensor([1.3962])
tensor([0.3734]) tensor([0.5509])
tensor([1.5351]) tensor([0.7521])
tensor([0.5969]) tensor([0.5734])
tensor([0.3928]) tensor([0.6142])
tensor([3.0504]) tensor([1.4560])
tensor([0.2807]) tensor([0.4665])
tensor([1.8418]) tensor([2.0282])
tensor([0.3923]) tensor([0.5584])
tensor([0.7017]) tensor([0.5260])
tensor([0.2618]) tensor([0.5466])
tensor([0.2907]) tensor([0.4902])
tensor([0.3284]) tensor([0.8641])
tensor([0.3462]) tensor([0.7529])
tensor([1.0652]) tensor([2.1279])
tensor([0.5058]) tensor([0.6081])
tensor([1.7689]) tensor([1.8252])
tensor([1.4982]) tensor([2.5267])
tensor([0.8277]) tensor([1.1192])
tensor([0.3212]) tensor([0.7810])
tensor([0.4049

Validating: 0it [00:00, ?it/s]

tensor([1.5841]) tensor([1.5146])
tensor([0.7418]) tensor([1.3350])
tensor([0.4031]) tensor([0.3174])
tensor([1.5727]) tensor([1.5186])
tensor([0.4598]) tensor([0.6582])
tensor([0.3609]) tensor([0.6681])
tensor([0.8862]) tensor([1.3880])
tensor([0.6603]) tensor([1.2664])
tensor([0.3223]) tensor([0.3479])
tensor([1.4837]) tensor([1.3816])
tensor([0.3734]) tensor([0.5941])
tensor([1.5351]) tensor([0.7939])
tensor([0.5969]) tensor([0.7357])
tensor([0.3928]) tensor([0.6125])
tensor([3.0504]) tensor([1.4606])
tensor([0.2807]) tensor([0.5076])
tensor([1.8418]) tensor([2.0297])
tensor([0.3923]) tensor([0.5331])
tensor([0.7017]) tensor([0.5142])
tensor([0.2618]) tensor([0.5567])
tensor([0.2907]) tensor([0.5250])
tensor([0.3284]) tensor([0.8317])
tensor([0.3462]) tensor([0.7456])
tensor([1.0652]) tensor([2.2192])
tensor([0.5058]) tensor([0.6690])
tensor([1.7689]) tensor([1.8089])
tensor([1.4982]) tensor([2.4958])
tensor([0.8277]) tensor([1.0764])
tensor([0.3212]) tensor([0.7637])
tensor([0.4049

Validating: 0it [00:00, ?it/s]

tensor([1.5841]) tensor([1.5437])
tensor([0.7418]) tensor([1.3096])
tensor([0.4031]) tensor([0.2668])
tensor([1.5727]) tensor([1.5141])
tensor([0.4598]) tensor([0.7210])
tensor([0.3609]) tensor([0.5642])
tensor([0.8862]) tensor([1.3438])
tensor([0.6603]) tensor([1.2582])
tensor([0.3223]) tensor([0.2337])
tensor([1.4837]) tensor([1.4086])
tensor([0.3734]) tensor([0.6157])
tensor([1.5351]) tensor([0.8595])
tensor([0.5969]) tensor([0.8270])
tensor([0.3928]) tensor([0.6548])
tensor([3.0504]) tensor([1.5029])
tensor([0.2807]) tensor([0.4392])
tensor([1.8418]) tensor([2.0165])
tensor([0.3923]) tensor([0.2858])
tensor([0.7017]) tensor([0.3737])
tensor([0.2618]) tensor([0.4901])
tensor([0.2907]) tensor([0.4199])
tensor([0.3284]) tensor([0.7411])
tensor([0.3462]) tensor([0.5721])
tensor([1.0652]) tensor([2.1758])
tensor([0.5058]) tensor([0.6062])
tensor([1.7689]) tensor([1.8158])
tensor([1.4982]) tensor([2.5188])
tensor([0.8277]) tensor([0.9262])
tensor([0.3212]) tensor([0.7021])
tensor([0.4049