In [40]:
from typing import Tuple

import itertools

import pickle

#from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
torch.manual_seed(99)

from matplotlib import pyplot as plot

from sklearn.metrics import explained_variance_score, mean_squared_error, max_error, mean_absolute_error
from scipy.stats import pearsonr

In [41]:
class LSTM(nn.Module):

    output_size: int

    hidden_layer_size: int
    lstm: nn.LSTM
    linear: nn.Linear
    hidden_cell: tuple[torch.Tensor, torch.Tensor]
    scaler: MinMaxScaler
    device: str

    def __init__(self, input_size=1, hidden_layer_size=100, output_size=1, num_layers=1):
        super().__init__()

        self.output_size = output_size

        self.hidden_layer_size = hidden_layer_size

        self.lstm = nn.LSTM(input_size, hidden_layer_size)


        self.linear = nn.Linear(hidden_layer_size, output_size)

        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))

        self.scaler = MinMaxScaler(feature_range=(-1, 1))

        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(self.device)

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]
    
    def predict(self, input):
        X = self.scaler.fit_transform(input.reshape(-1, 1))
        X = seq = torch.FloatTensor(X).to(self.device)
        with torch.no_grad():
            self.hidden = (torch.zeros(1, 1, self.hidden_layer_size),
                            torch.zeros(1, 1, self.hidden_layer_size))
            preds = self(X)
            Y = np.array(preds.cpu())
        actual_predictions = self.scaler.inverse_transform(Y.reshape(-1, 1))
        return actual_predictions

    def set_device(self):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(self.device)
        
        self.to(self.device)

    def train(self, train_data, train_window = 50, epochs = 100):

        # Initiate loss function and optimizer

        loss_function = nn.MSELoss().to(self.device)
        optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        
        # Scale
        
        train_data_normalized = self.scaler.fit_transform(train_data.reshape(-1, 1))
        train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1).to(self.device)

        # Create sequences
        
        train_inout_seq = []
        L = len(train_data_normalized)
        for i in range(L-train_window):
            train_seq = train_data_normalized[i:i+train_window]
            # TODO - offset on the start for direct decoding with several (5) of these models giving (5) concurrent predictions?
            train_label = train_data_normalized[i+train_window:i+train_window+self.output_size]
            train_inout_seq.append((train_seq ,train_label))
        
        # Train the model

        for i in range(epochs):
            _ix = -1
            for seq, labels in train_inout_seq:
                seq = seq.to(self.device)
                labels = labels.to(self.device)

                if len(labels) != self.output_size:
                    continue

                _ix +=1
                
                #if torch.isnan(seq).any().item():
                #    print(f"nan values in seq at {_ix}")
                #    continue

                #if pd.isna(labels.item()):
                #    print(f"nan labels at {_ix}")
                #    continue

                optimizer.zero_grad()
                self.hidden_cell = (torch.zeros(1, 1, self.hidden_layer_size).to(self.device),
                                torch.zeros(1, 1, self.hidden_layer_size).to(self.device))

                y_pred = self(seq)
                #if pd.isna(y_pred.item()):
                #    print(f"nan preds at {_ix}")
                #    continue

                single_loss = loss_function(y_pred, labels)
                #if pd.isna(single_loss.item()):
                #    print(f"nan loss at {_ix}")
                #    raise
                single_loss.backward()
                optimizer.step()

            if i%25 == 1:
                print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

        print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

        # Finalize
        #self.eval()

# Train the Model

In [42]:
df = pd.read_csv("./data/datasets/historical.data")
df = df.sort_values(by="date",ascending=True)
df_inside = df.loc[df["label"] == "inside"].drop(["label"], axis=1)
df_outside = df.loc[df["label"] == "outside"].drop(["label"], axis=1)
inside_arr=np.array(df_inside)
outside_arr=np.array(df_outside)

In [43]:
def split_data(arr: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    total_len,_=arr.shape

    train_p = 0.80
    test_p = 0.10
    ivs_p = 0.10

    train_len = round(total_len*train_p)
    test_len = round(total_len*test_p)
    ivs_len = round(total_len*ivs_p)

    model_len = train_len+test_len

    total_used_len = model_len+ivs_len

    train = arr[:train_len]
    test = arr[train_len:model_len]
    ivs = arr[model_len:total_used_len]

    return train, test, ivs


inside_train, inside_test, inside_ivs = split_data(inside_arr)

In [44]:
per_min = (2*60) + 30
per_30_mins = per_min / 30

print("Number of Wanted Predictions: " + str(per_30_mins))

Number of Wanted Predictions: 5.0


In [45]:
train_data = inside_train[:,1]

In [46]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTM().to(device)

cuda


In [47]:
model.train(train_data, epochs=1)

epoch:   0 loss: 0.0002483580


In [48]:
test_data = inside_test[:,1]

In [49]:
print(model.predict(test_data[0:49]))
print(test_data[50])

[[23.494268]]
23.375


In [50]:
print(model.predict(test_data[1:50]))
print(test_data[51])

[[23.527887]]
23.437


In [51]:
print(model.predict(test_data[2:51]))
print(test_data[52])

[[23.507456]]
23.312


# Test the Model

In [52]:

test_window = 50

test_inout_seq = []
L = len(test_data)
for i in range(L-test_window):
    train_seq = test_data[i:i+test_window]
    train_label = test_data[i+test_window:i+test_window+1]
    test_inout_seq.append((train_seq ,train_label))

In [53]:
def printRegStatistics(truth, preds):
    print("The RVE is: ", explained_variance_score(truth, preds))
    print("The rmse is: ", mean_squared_error(truth, preds, squared=False))
    corr, pval = pearsonr(truth, preds)
    print("The Correlation Score is is: %6.4f (p-value=%e)\n"%(corr,pval))
    print("The Maximum Error is is: ", max_error(truth, preds))
    print("The Mean Absolute Error is: ", mean_absolute_error(truth, preds))

test_X = [ x for x, _ in test_inout_seq ]
true_Y = [ y[0] for _, y in test_inout_seq ]

In [54]:
pred_Y = []
for X in test_X:
    Y = model.predict(X)[0]
    pred_Y.append(Y)

In [55]:
printRegStatistics(true_Y, pred_Y)

The RVE is:  0.8978764419591279
The rmse is:  0.1934079703350684
The Correlation Score is is: 0.9481 (p-value=0.000000e+00)

The Maximum Error is is:  1.2700949361588663
The Mean Absolute Error is:  0.15038657969655386


In [56]:
print(model.predict(np.array([test_data[1]])))
print(test_data[2])

[[23.774408]]
23.562


In [57]:
test_data

array([23.625, 23.437, 23.562, ..., 24.0, 24.312, 24.625], dtype=object)

In [58]:
pred_Y = model.predict(np.array(test_data[0:49]))[0]
pred_Y = list(pred_Y)

test_X_stream = test_data[50:]
_ix = 0
for X in test_X_stream:
    if _ix == len(test_X_stream) - 1:
        break
    Y = model.predict(np.array([X]))[0,0]
    pred_Y.append(Y)
    _ix+=1

pred_Y = np.array(pred_Y)
true_Y = test_data[50:]

In [59]:
printRegStatistics(true_Y, pred_Y)

The RVE is:  0.9215021944542544
The rmse is:  0.3666193784647493
The Correlation Score is is: 0.9608 (p-value=0.000000e+00)

The Maximum Error is is:  1.650580470826835
The Mean Absolute Error is:  0.3415378658962491


# Find the best Hyperparams

In [60]:
results = []

truth = test_data[50:]

hidden_layer_sizes = [50,100,150,200]
nums_layers = [1,2,4]

hyper_params = itertools.product(hidden_layer_sizes, nums_layers)

for params in hyper_params:

    hidden_layer_size, num_layers = params

    model = LSTM(output_size=1, hidden_layer_size=hidden_layer_size, num_layers=num_layers).to(device)
    model.train(train_data, epochs=1)
    #model.eval()

    preds = model.predict(np.array(test_data[0:49]))[0]
    preds = list(preds)

    _ix = 0
    for X in test_data[50:]:
        if _ix == len(test_data[50:]) - 1:
            break
        Y = model.predict(np.array([X]))[0,0]
        preds.append(Y)
        _ix+=1
        

    RVE = explained_variance_score(truth, preds)

    result = {"RVE": RVE, "hidden_layer_size": hidden_layer_size, "num_layers": num_layers}
    print(f"RVE {RVE} -> hidden_layer_size '{hidden_layer_size}' ; num_layers '{num_layers}'")
    results.append(result)

cuda
epoch:   0 loss: 0.0001781297
RVE 0.9215223812215173 -> hidden_layer_size '50' ; num_layers '1'
cuda
epoch:   0 loss: 0.0009761230
RVE 0.9215358010995972 -> hidden_layer_size '50' ; num_layers '2'
cuda
epoch:   0 loss: 0.0005194269
RVE 0.9214689878168127 -> hidden_layer_size '50' ; num_layers '4'
cuda
epoch:   0 loss: 0.0001543104
RVE 0.9214912420035739 -> hidden_layer_size '100' ; num_layers '1'
cuda
epoch:   0 loss: 0.0002654023
RVE 0.9215121020220935 -> hidden_layer_size '100' ; num_layers '2'
cuda
epoch:   0 loss: 0.0001428257
RVE 0.9215137929952318 -> hidden_layer_size '100' ; num_layers '4'
cuda
epoch:   0 loss: 0.0002610044
RVE 0.9215320601239063 -> hidden_layer_size '150' ; num_layers '1'
cuda
epoch:   0 loss: 0.0002356943
RVE 0.9215061980910825 -> hidden_layer_size '150' ; num_layers '2'
cuda
epoch:   0 loss: 0.0003015394
RVE 0.9215073628998662 -> hidden_layer_size '150' ; num_layers '4'
cuda
epoch:   0 loss: 0.0002456461
RVE 0.921430979552585 -> hidden_layer_size '200' ;

# With multiple inputs + outputs

In [61]:
results_multilabel = []

truth = test_data[50:]

hidden_layer_sizes = [50,100,150,200]
nums_layers = [1,2,4]

hyper_params = itertools.product(hidden_layer_sizes, nums_layers)

for params in hyper_params:

    hidden_layer_size, num_layers = params

    model = LSTM(output_size=5, hidden_layer_size=hidden_layer_size, num_layers=num_layers).to(device)
    model.train(train_data, epochs=1)
    #model.eval()

    preds = model.predict(np.array(test_data[0:49]))[0]
    preds = list(preds)

    _ix = 0
    for X in test_data[50:]:
        if _ix == len(test_data[50:]) - 1:
            break
        Y = model.predict(np.array([X]))[0,0]
        preds.append(Y)
        _ix+=1
        

    RVE = explained_variance_score(truth, preds)

    result = {"RVE": RVE, "hidden_layer_size": hidden_layer_size, "num_layers": num_layers}
    print(f"RVE {RVE} -> hidden_layer_size '{hidden_layer_size}' ; num_layers '{num_layers}'")
    results_multilabel.append(result)

cuda
epoch:   0 loss: 0.0002685341
RVE 0.9214938741789619 -> hidden_layer_size '50' ; num_layers '1'
cuda
epoch:   0 loss: 0.0002727906
RVE 0.9214787378947527 -> hidden_layer_size '50' ; num_layers '2'
cuda
epoch:   0 loss: 0.0002610883
RVE 0.9215093735563208 -> hidden_layer_size '50' ; num_layers '4'
cuda
epoch:   0 loss: 0.0002999370
RVE 0.9214766680656206 -> hidden_layer_size '100' ; num_layers '1'
cuda
epoch:   0 loss: 0.0002586408
RVE 0.9213946255915522 -> hidden_layer_size '100' ; num_layers '2'
cuda
epoch:   0 loss: 0.0002734783
RVE 0.9215010420884162 -> hidden_layer_size '100' ; num_layers '4'
cuda
epoch:   0 loss: 0.0002559267
RVE 0.9215099047368696 -> hidden_layer_size '150' ; num_layers '1'
cuda
epoch:   0 loss: 0.0002418453
RVE 0.9215074634137506 -> hidden_layer_size '150' ; num_layers '2'
cuda
epoch:   0 loss: 0.0002745653
RVE 0.9214963739094709 -> hidden_layer_size '150' ; num_layers '4'
cuda
epoch:   0 loss: 0.0002724830
RVE 0.9214516656042985 -> hidden_layer_size '200' 

In [71]:
model_data = np.concatenate((train_data, test_data), axis=0)

model = LSTM(output_size=5, hidden_layer_size=150, num_layers=1).to(device)
#model.train(model_data, epochs=1)
model.train(model_data, epochs=26)
#model.train(model_data, epochs=200)

ivs_data = inside_ivs[:,1]

truth = ivs_data[50:]

preds = model.predict(np.array(ivs_data[0:49]))[0]
preds = list(preds)

_ix = 0
for X in ivs_data[50:]:
    if _ix == len(ivs_data[50:]) - 1:
        break
    Y = model.predict(np.array([X]))[0,0]
    preds.append(Y)
    _ix+=1

printRegStatistics(truth, preds)

cuda
epoch:   1 loss: 0.00307437
epoch:  26 loss: 0.00216066
epoch:  51 loss: 0.00314611


KeyboardInterrupt: 

In [None]:
with open("./model","wb") as o:
    pickle.dump(model, o)

In [None]:
with open("./model","rb") as o:
    model = pickle.load(o, encoding='bytes')
print(model)

LSTM(
  (lstm): LSTM(1, 100)
  (linear): Linear(in_features=100, out_features=5, bias=True)
)
