<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Lorenz-trajectory" data-toc-modified-id="Lorenz-trajectory-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Lorenz trajectory</a></span><ul class="toc-item"><li><span><a href="#CV-predictions" data-toc-modified-id="CV-predictions-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>CV predictions</a></span><ul class="toc-item"><li><span><a href="#Heal-test" data-toc-modified-id="Heal-test-1.1.1"><span class="toc-item-num">1.1.1&nbsp;&nbsp;</span>Heal test</a></span></li></ul></li></ul></li></ul></div>

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

sns.set()

from tqdm.auto import tqdm

In [2]:
from src.Lorentz import Lorentz
from src.TSProcessor_GPU import TSProcessor

%load_ext autoreload
%autoreload 2

In [3]:
from src.utils import normalize, denormalize, gen_sin_wave, plot_trajectories, plot_runs
from copy import deepcopy

In [4]:
import torch
import gc
import pickle

In [156]:
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.optim as optim

## Lorenz trajectory

### CV predictions

In [234]:
# basic NN
# takes x as input
# (takes is_predicted as input)
# outputs the estimated next value
# (outputs the estimated forecasting error)

# batch norm
# training plots
# adam

In [317]:
class LorenzDataset(torch.utils.data.Dataset):
    def __init__(self, x, seq_len):
        super().__init__()
        self.x = x
        self.seq_len = seq_len

    def __getitem__(self, idx: int):
        return self.x[idx:idx+self.seq_len]
    
    def __len__(self):
        return len(self.x) - self.seq_len + 1

In [318]:
# class LorenzDataset(torch.utils.data.Dataset):
#     def __init__(self, x, seq_len, h_max):
#         super().__init__()
#         self.x = x
#         self.seq_len = seq_len
#         self.h_max = h_max

#     def __getitem__(self, idx: int):
#         X_start = self.x[idx:idx+self.seq_len]
#         X_test = self.x[idx+self.seq_len:idx+self.seq_len+self.h_max]
#         return X_start, X_test
    
#     def __len__(self):
#         return len(self.x) - self.seq_len + 1

In [319]:
train_size = 10000
val_size = 2000
h_max = 100

batch_size = 64
seq_len = 128
learning_rate = 0.01
EPOCHS = 10
device = 'cuda:0'

In [320]:
x, _, _ = Lorentz().generate(0.1, 3000+train_size+val_size-1) # -1 because of an implementation bug
x, x_min, x_max = normalize(x[3000:]) # "For the Lorenz series, the first 3000 observations are discarded..."

In [321]:
x = torch.from_numpy(x.reshape(-1, 1)).type(torch.float32)

In [322]:
x_train = x[:train_size]
x_val = x[train_size:]

train = LorenzDataset(x_train, seq_len)
val = LorenzDataset(x_val, seq_len)

train_loader = DataLoader(train, batch_size=batch_size, shuffle=False, drop_last=True)
val_loader = DataLoader(val, batch_size=batch_size, shuffle=False, drop_last=True)

In [323]:
# x_train = x[:train_size]
# x_val = x[train_size:]

# train = LorenzDataset(x_train, seq_len, h_max)
# val = LorenzDataset(x_val, seq_len, h_max)

# train_loader = DataLoader(train, batch_size=batch_size, shuffle=False, drop_last=True)
# val_loader = DataLoader(val, batch_size=batch_size, shuffle=False, drop_last=True)

In [325]:
# x_start, x_true = next(iter(train_loader))

In [326]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, dropout_prob):
        super(RNNModel, self).__init__()

        # Defining the number of layers and the nodes in each layer
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        # RNN layers
        self.rnn = nn.RNN(
            input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout_prob
        )
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out)
        return out

In [327]:
model = RNNModel(1, 10, 1, 1, 0).type(torch.float32)

In [328]:
model.to(device);

In [329]:
loss_func = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [330]:
for epoch in tqdm(range(EPOCHS)):
    model.train()
    losses = []
    for x_true in train_loader:
        x_true = x_true.to(device)
        x_pred = model(x_true)
        
        loss = loss_func(x_pred[:, 1:, :], x_true[:, :-1, :])
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
    
    print(epoch, torch.Tensor(losses).mean())

    model.eval()
    with torch.no_grad():
        losses = []
        for x_true in val_loader:
            x_true = x_true.to(device)
            x_pred = model(x_true)

            loss = loss_func(x_pred[:, 1:, :], x_true[:, :-1, :])
            
            losses.append(loss.item())

            
        print(epoch, torch.Tensor(losses).mean())

  0%|          | 0/10 [00:00<?, ?it/s]

0 tensor(0.0223)
0 tensor(0.0008)
1 tensor(0.0005)
1 tensor(0.0003)
2 tensor(0.0002)
2 tensor(0.0001)
3 tensor(8.2325e-05)
3 tensor(5.1047e-05)
4 tensor(3.4501e-05)
4 tensor(2.3374e-05)
5 tensor(1.7598e-05)
5 tensor(1.3306e-05)
6 tensor(1.0883e-05)
6 tensor(8.6847e-06)
7 tensor(7.6189e-06)
7 tensor(6.9178e-06)
8 tensor(5.6033e-06)
8 tensor(4.7056e-06)
9 tensor(4.2713e-06)
9 tensor(3.5838e-06)


#### Heal test

In [None]:
train_size = 50
h_max = 20 # max prediction horizon (t+h)
n_folds = 500


points_in_template = 3
max_template_spread = 2 # max distance between y_t1 and y_t2, y_1 and y_11


# trajectories prediction parameters
eps = 0.01
n_trajectories = 24
noise_amp = 0.01

In [None]:
x = np.concatenate([
    np.array(range(20)),
    np.array(range(18, 0, -1)),
    np.array(range(20)),
    np.array(range(18, 0, -1)),
    np.array(range(20)),
    np.array(range(18, 0, -1)),
    np.array(range(20)),
    np.array(range(18, 0, -1)),
    np.array(range(20)),
    np.array(range(18, 0, -1)),
    np.array(range(20)),
    np.array(range(18, 0, -1)),
    np.array(range(20)),
    np.array(range(18, 0, -1)),
    np.array(range(20)),
    np.array(range(18, 0, -1)),
])

In [None]:
t=0
X_train = x[t:t+train_size]
X_train = torch.from_numpy(X_train).type(torch.float32).to('cuda')

start_points = max_template_spread * (points_in_template-1)

X_start = x[t+train_size-start_points:t+train_size]
X_start = torch.from_numpy(X_start).type(torch.float32).to('cuda')

X_test_cpu = x[t+train_size:t+train_size+h_max]
X_test = torch.from_numpy(X_test_cpu).type(torch.float32).to('cuda')

tsp = TSProcessor(
    points_in_template=points_in_template,
    max_template_spread=max_template_spread,
    X_train=X_train,
)

In [None]:
X_pred = X_test.clone()
X_pred[0] = np.nan
X_pred[6] = np.nan
X_pred[10] = np.nan

In [None]:
X_test

In [None]:
X_traj_pred = tsp.heal(
    X_start, h_max,
    eps=eps,
    n_trajectories=n_trajectories,
    noise_amp=noise_amp,
    X_pred=X_pred,
    random_seed=1
)

In [None]:
X_traj_pred = X_traj_pred.cpu().numpy()

In [None]:
unified_result = tsp.predict_unified(
    X_traj_pred,
    method='cluster',
    dbs_min_trajectories=dbs_min_trajectories,
    dbs_eps=dbs_eps,
    dbs_min_samples=dbs_min_samples,
)
X_pred = unified_result['X_pred']

non_pred = np.isnan(X_pred).astype(int)
rmse = (X_pred - X_test_cpu)**2

gc.collect()
torch.cuda.empty_cache()

In [None]:
plot_trajectories(
    'random', X_train.cpu().numpy(), X_test.cpu().numpy(), # plotting since X_start
    noise_amp, n_trajectories,
    X_traj_pred, X_pred#, filename=f'Lorenz_cluster_{fold}_{noise_amp:.2f}.png'
)

In [None]:
X_traj_pred = tsp.heal(
    X_start, h_max,
    eps=eps,
    n_trajectories=n_trajectories,
    noise_amp=noise_amp,
    X_pred=X_pred,
    random_seed=1
)