In [209]:
import pickle
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error as mse

from matplotlib import cm
import matplotlib.pyplot as plt
import numpy as np

from tqdm.notebook import tqdm

import torch
import torch.nn as nn
import torch.optim as optim

In [8]:
with open('train_data.pkl', 'rb') as f:
    test_data = pickle.load(f)

with open('val_data.pkl', 'rb') as f:
    val_data = pickle.load(f)

with open('test_data.pkl', 'rb') as f:
    train_data = pickle.load(f)

In [26]:
N_COMPONENTS = 3
INPUT_STEP = 10
OUTPUT_STEP = 1

## PCA

### Try with multiple frame

In [3]:
pca = PCA(n_components=N_COMPONENTS)

In [10]:
pca.fit(train_data)

PCA(n_components=3)

In [11]:
pca.components_

array([[ 0.08543325,  0.08543325,  0.08543323,  0.08543324,  0.08543325,
         0.08543325,  0.08543324,  0.08543325,  0.08543325,  0.08543324,
         0.08543324,  0.08543325,  0.08543325,  0.08543324,  0.08543325,
         0.08543325,  0.08543326,  0.08543323,  0.08543326,  0.08543324,
         0.08543325,  0.08543322,  0.08543325,  0.08543326,  0.08543324,
         0.08543324,  0.08543324,  0.08543326,  0.08543324,  0.08543324,
         0.08543325,  0.08543325,  0.08543324,  0.08543323,  0.08543325,
         0.08543325,  0.08543323,  0.08543324,  0.08543325,  0.08543324,
         0.08543324,  0.08543325,  0.08543326,  0.08543325,  0.08543324,
         0.08543325,  0.08543326,  0.08543324,  0.08543323,  0.08543324,
         0.08543326,  0.08543322,  0.08543325,  0.08543324,  0.08543326,
         0.08543322,  0.08543325,  0.08543324,  0.08543325,  0.08543323,
         0.08543325,  0.08543325,  0.08543325,  0.08543323,  0.08543325,
         0.08543325,  0.08543323,  0.08543324,  0.0

In [12]:
pca.explained_variance_

array([8.27107338e+05, 1.36517215e-05, 4.00563914e-06])

## Reconstructing the original data
Since we have the eigenvalues and eigenvectors of the covariance matrix of the pressure data (PCA), we can reconstruct the original data.

In [13]:
transformed_data = pca.transform(train_data)

In [14]:
reconstructed_sklearn = pca.inverse_transform(transformed_data)

In [16]:
mse(reconstructed_sklearn, train_data, squared=False)

0.00028923178488159633

## Learning based on transformed data

In [105]:
transformed_data = torch.Tensor(transformed_data.T)

In [135]:
# Training
# 1 2 3 -> 4
# 2 3 4 -> 5
# Testing
# 3 4 5 -> (6) todo mse of this step
# 4 5 (6) -> (7) todo mse of this step
# 5 (6) (7) -> (8) todo mse of this step
# todo 5 is seen, so performance may be benefited from this

In [110]:
srcs = transformed_data.unfold(1, INPUT_STEP, 1)

In [156]:
tgts = transformed_data[:, INPUT_STEP:]

In [148]:
srcs[0].shape

torch.Size([984912, 10])

In [157]:
tgts.shape

torch.Size([3, 984911])

In [150]:
srcs[0]

tensor([[-1003.5660,   -33.3699,  1431.9847,  ...,   901.2780,    27.1452,
         -1398.3889],
        [  -33.3699,  1431.9847, -1027.7018,  ...,    27.1452, -1398.3889,
           281.4958],
        [ 1431.9847, -1027.7018,   792.4210,  ..., -1398.3889,   281.4958,
          1157.5013],
        ...,
        [  -96.8112, -1341.9706, -1127.2883,  ...,  -115.4223, -1476.8699,
          -367.7948],
        [-1341.9706, -1127.2883,   878.6872,  ..., -1476.8699,  -367.7948,
          -231.1852],
        [-1127.2883,   878.6872,   806.2330,  ...,  -367.7948,  -231.1852,
         -1049.1104]])

In [129]:
srcs[:, 0, :], tgts[:, 0]

(tensor([[-1.0036e+03, -3.3370e+01,  1.4320e+03, -1.0277e+03,  7.9242e+02,
          -3.4004e+02, -7.5217e+02,  9.0128e+02,  2.7145e+01, -1.3984e+03],
         [-5.5365e-04, -6.0106e-04, -6.7077e-04, -5.5244e-04, -6.4014e-04,
          -5.8555e-04, -5.6565e-04, -6.4527e-04, -6.0434e-04, -5.3453e-04],
         [-2.8339e-05,  2.0457e-06,  3.0842e-05, -2.9358e-05,  1.6483e-05,
          -1.2228e-05, -2.2981e-05,  1.8214e-05,  6.9045e-06, -3.9138e-05]]),
 tensor([ 2.8150e+02, -6.1572e-04,  5.4138e-06]))

In [130]:
srcs[:, 1, :], tgts[:, 1]

(tensor([[-3.3370e+01,  1.4320e+03, -1.0277e+03,  7.9242e+02, -3.4004e+02,
          -7.5217e+02,  9.0128e+02,  2.7145e+01, -1.3984e+03,  2.8150e+02],
         [-6.0106e-04, -6.7077e-04, -5.5244e-04, -6.4014e-04, -5.8555e-04,
          -5.6565e-04, -6.4527e-04, -6.0434e-04, -5.3453e-04, -6.1572e-04],
         [ 2.0457e-06,  3.0842e-05, -2.9358e-05,  1.6483e-05, -1.2228e-05,
          -2.2981e-05,  1.8214e-05,  6.9045e-06, -3.9138e-05,  5.4138e-06]]),
 tensor([ 1.1575e+03, -6.5779e-04,  2.6151e-05]))

In [186]:
def train(model):
    model.train()
    loss = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-10)
    for epoch in range(3):  # loop over the dataset multiple times
        for i in tqdm(range(srcs.shape[1]//100)):
            optimizer.zero_grad()
            running_loss = 0.0

            src = srcs[:,i,:]
            tgt = torch.unsqueeze(tgts[:,i], dim=0)

            outputs, _ = model(src)
            l = loss(outputs, tgt)
            l.backward()
            optimizer.step()

            # print statistics
            running_loss += l
        print(f'[{epoch + 1}] loss: {running_loss / 2000:}')
    return model  

### LSTM

In [187]:
lstm = nn.LSTM(INPUT_STEP, OUTPUT_STEP, 2)
lstm = train(lstm)

  0%|          | 0/9849 [00:00<?, ?it/s]

[1] loss: 334.6110534667969


  0%|          | 0/9849 [00:00<?, ?it/s]

[2] loss: 334.6110534667969


  0%|          | 0/9849 [00:00<?, ?it/s]

[3] loss: 334.6110534667969


In [28]:
lstm.eval()
lstm_eval = transformed_data.copy()
for i in range(4, p_data.shape[1]-1):
    print(lstm_eval.shape)
    predict, _ = lstm(torch.unsqueeze(torch.FloatTensor(lstm_eval[-2-INPUT_STEP:-2]), 0))                               
    print(predict.shape)
    lstm_eval = np.append(lstm_eval, predict.squeeze(2).detach().numpy(), axis=0)  # append prediction 

(5, 3)
torch.Size([1, 3, 1])
(6, 3)
torch.Size([1, 3, 1])
(7, 3)
torch.Size([1, 3, 1])


In [29]:
reconstruct(lstm_eval)

0.004563250385318567


### Transformer

We will learn based on this transformed data

In [32]:
reconstructed_sklearn = pca.inverse_transform(np.random.random((10,3)))

In [33]:
reconstructed_sklearn.shape, p_learn.shape

((10, 272551), (5, 272551))

In [190]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [210]:
loss = nn.MSELoss()
transformer_model = nn.Transformer(nhead=N_COMPONENTS, num_encoder_layers=32,   # d_model divisible by nhead
                                   d_model=N_COMPONENTS, batch_first=True).to(device)
optimizer = optim.Adam(transformer_model.parameters(), lr=0.0001)

In [211]:
def train_tf(model):
    model.train()
    loss = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-8)
    for epoch in range(3):  # loop over the dataset multiple times
        for i in tqdm(range(srcs.shape[1]//100)):
            optimizer.zero_grad()
            running_loss = 0.0

            src = srcs[:,i,:].T
            tgt = torch.unsqueeze(tgts[:,i], dim=0)
            outputs = model(src, tgt)
            l = loss(outputs, tgt)
            l.backward()
            optimizer.step()

            # print statistics
            running_loss += l
        print(f'[{epoch + 1}] loss: {running_loss / 2000:}')
    return model  

In [None]:
transformer_model = train_tf(transformer_model)

  0%|          | 0/9849 [00:00<?, ?it/s]

[1] loss: 334.14703369140625


  0%|          | 0/9849 [00:00<?, ?it/s]

## Reconstruct 

In [37]:
type(p_data)

pandas.core.frame.DataFrame

In [38]:
transformed_data.shape

(5, 3)

In [39]:
transformer_model.eval()
transformer_eval = transformed_data.copy()
for i in range(4, p_data.shape[1]-1):
    print(transformer_eval.shape)
    predict = transformer_model(torch.unsqueeze(torch.FloatTensor(transformer_eval[-2-INPUT_STEP:-2]), 0),  # INPUT_STEP steps before
                               torch.unsqueeze(torch.FloatTensor(transformer_eval[-1:]), 0))
    print(predict.shape)
    transformer_eval = np.append(transformer_eval, predict[0].detach().numpy(), axis=0)  # append prediction and 

(5, 3)
torch.Size([1, 1, 3])
(6, 3)
torch.Size([1, 1, 3])
(7, 3)
torch.Size([1, 1, 3])


In [40]:
transformer_eval.shape

(8, 3)

In [41]:
reconstruct(transformer_eval)

0.006454510468586365


todo

10s
10.1s
10.2s

a common algo to reduce dim, pca. 
- look at robustness for prediction further in time.
    - how to get more data simscale. Horizon 8 hours, train 6h, predict 2h. First step 10 mins
        - Increase data slowly, see the limit of ML methods
        - Tradition ML (lstm, transformers) will have problem. How long can we predict using this?
            - Change ML approach maybe to operator inference