In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
from sklearn.linear_model import Ridge
from matplotlib.colors import Normalize
import networkx as nx
from scipy.signal import welch
import matplotlib.cm as cm
import seaborn as sns
import torch
import torch.nn as nn
from torch.optim import Adam
from collections import defaultdict
from sklearn.neighbors import NearestNeighbors
# import neurokit2 as nk
import matplotlib.pyplot as plt
import numpy as np
import os
import json
import itertools
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm

## LSTM

In [2]:
class LSTMBaseline3D:
    """
    Lightweight single-layer LSTM for 3-dim Lorenz forecasting.
    * hidden_size=32 → ~4.8k trainable parameters
    * fit() trains in teacher-forcing mode
    * predict() produces autoregressive roll-out
    """

    def __init__(self,
                 input_dim:  int = 3,
                 hidden_size: int = 37,
                 output_dim: int = 3,
                 lr: float = 1e-3,
                 epochs: int = 30,
                 device: str = 'cpu',
                 seed: int = 0):
        torch.manual_seed(seed); np.random.seed(seed)

        self.device  = torch.device(device)
        self.epochs  = epochs
        self.model   = nn.LSTM(input_dim, hidden_size,
                               batch_first=True).to(self.device)
        self.head    = nn.Linear(hidden_size, output_dim).to(self.device)
        self.crit    = nn.MSELoss()
        self.optim   = Adam(list(self.model.parameters())+
                            list(self.head.parameters()), lr=lr)
        
    def total_parameters(self):
        total = 0
        for param in list(self.model.parameters()) + list(self.head.parameters()):
            total += param.numel()
        return total

    # ---------------------------------------------------------
    @torch.no_grad()
    def _init_hidden(self, batch_sz=1):
        h0 = torch.zeros(1, batch_sz,
                         self.model.hidden_size,
                         device=self.device)
        c0 = torch.zeros_like(h0)
        return (h0, c0)

    # ---------------------------------------------------------
    def fit(self, x_np: np.ndarray, y_np: np.ndarray):
        """
        x_np shape [T, 3]  (input  at t)
        y_np shape [T, 3]  (target at t)
        """
        x = torch.tensor(x_np, dtype=torch.float32,
                         device=self.device).unsqueeze(0)  # [1,T,3]
        y = torch.tensor(y_np, dtype=torch.float32,
                         device=self.device).unsqueeze(0)

        for _ in range(self.epochs):
            self.optim.zero_grad()
            out, _ = self.model(x, self._init_hidden())
            pred   = self.head(out)
            loss   = self.crit(pred, y)
            loss.backward()
            self.optim.step()

    # ---------------------------------------------------------
    @torch.no_grad()
    def predict(self, init_u: np.ndarray, n_steps: int):
        """
        Autoregressive roll-out.
        init_u : initial 3-vector (last known sample)
        Returns array of shape [n_steps, 3].
        """
        self.model.eval(); self.head.eval()

        inp     = torch.tensor(init_u[None, None, :],
                               dtype=torch.float32, device=self.device)
        h, c    = self._init_hidden()
        preds   = np.empty((n_steps, 3), dtype=np.float32)

        for t in range(n_steps):
            out, (h, c) = self.model(inp, (h, c))
            y           = self.head(out)
            preds[t]    = y.squeeze(0).cpu().numpy()
            inp         = y.detach()    # feed prediction back

        return preds
    
    @torch.no_grad()
    def predict_open_loop(self, x_np: np.ndarray):
        """
        Open-loop prediction using teacher-forced inputs (like during training).
        x_np shape: [T, 3] – input sequence
        Returns:
            preds: [T, 3] – predicted output sequence
        """
        self.model.eval(); self.head.eval()

        x = torch.tensor(x_np, dtype=torch.float32,
                         device=self.device).unsqueeze(0)  # [1, T, 3]
        out, _ = self.model(x, self._init_hidden())
        preds = self.head(out).squeeze(0).cpu().numpy()  # [T, 3]

        return preds

if __name__ == "__main__":
    model = LSTMBaseline3D()
    print(f"Total trainable parameters: {model.total_parameters()}")

# lstm_baseline = LSTMBaseline3D(
#                     hidden_size=38,         # parameter budget ~ 4800
#                     lr=1e-3,
#                     epochs=100,
#                     device='cuda' if torch.cuda.is_available() else 'cpu',
#                     seed=45)
# lstm_baseline.fit(train_input, train_target)

# # one-step roll-out to build an initial vector for auto-regressive mode
# init_vec = train_target[-1]                # last teacher-forced target
# lstm_preds = lstm_baseline.predict(init_vec,
#                                    n_steps=len(test_input))
# lstm_preds_open_loop = lstm_baseline.predict_open_loop(test_input)


Total trainable parameters: 6330


In [3]:
class TCNBaseline3D(nn.Module):
    """
    2-layer causal TCN       (kernel=3, dilation=1 & 2, padding chosen
    so receptive field = 5 time-steps, identical to NVAR window length).
    ----------------------
    • input_dim  = 3
    • hidden_dim = 32  → total ≈ 4.9 k parameters
    • output_dim = 3    (one-step prediction)
    """
    def __init__(self,
                 input_dim:  int = 3,
                 hidden_dim: int = 32,
                 output_dim: int = 3,
                 lr: float = 1e-3,
                 epochs: int = 40,
                 device: str = "cpu",
                 seed: int = 0):
        super().__init__()
        torch.manual_seed(seed); np.random.seed(seed)

        k = 3  # kernel
        # layer 1: dilation 1  → pad 2 to keep length
        self.conv1 = nn.Conv1d(input_dim, hidden_dim,
                               kernel_size=k,
                               dilation=1,
                               padding=2,
                               bias=True)
        # layer 2: dilation 2  → pad 4
        self.conv2 = nn.Conv1d(hidden_dim, hidden_dim,
                               kernel_size=k,
                               dilation=2,
                               padding=4,
                               bias=True)
        self.relu  = nn.ReLU()
        self.head  = nn.Conv1d(hidden_dim, output_dim,
                               kernel_size=1, bias=True)

        self.lr, self.epochs = lr, epochs
        self.to(device)
        self.optim = Adam(self.parameters(), lr=lr)
        self.crit  = nn.MSELoss()

    # ---------------------------------------------------------
    def forward(self, x):
        """
        x shape  [B, T, 3]  (batch, time, channels)
        return  [B, T, 3]
        """
        # reshape to Conv1d convention: (B, C, T)
        x = x.permute(0, 2, 1)
        y = self.conv1(x); y = self.relu(y[:, :, :-2])     # remove look-ahead pad
        y = self.conv2(y); y = self.relu(y[:, :, :-4])     # remove look-ahead pad
        out = self.head(y).permute(0, 2, 1)                # back to (B,T,C)
        return out

    # ---------------------------------------------------------
    def fit(self, x_np: np.ndarray, y_np: np.ndarray):
        """
        Teacher-forcing on entire sequence (batch size = 1).
        x_np, y_np shape [T, 3]
        """
        x = torch.tensor(x_np[None], dtype=torch.float32, device=next(self.parameters()).device)
        y = torch.tensor(y_np[None], dtype=torch.float32, device=next(self.parameters()).device)

        for _ in range(self.epochs):
            self.optim.zero_grad()
            pred = self.forward(x)
            loss = self.crit(pred[:, :-1], y[:, 1:])  # predict next step
            loss.backward()
            self.optim.step()

    # ---------------------------------------------------------
    @torch.no_grad()
    def predict(self, init_window: np.ndarray, n_steps: int):
        """
        Autoregressive roll-out.
        init_window : length L≥5, shape [L,3] (latest samples, earliest first)
        Returns      : [n_steps,3]
        """
        device = next(self.parameters()).device
        window = init_window.copy()
        preds  = np.empty((n_steps, 3), dtype=np.float32)

        for t in range(n_steps):
            inp = torch.tensor(window[None], dtype=torch.float32, device=device)
            y   = self.forward(inp)[0, -1].cpu().numpy()
            preds[t] = y
            window   = np.vstack([window[1:], y])  # slide window

        return preds


# tcn = TCNBaseline3D(hidden_dim=32, epochs=50, lr=1e-3, device="cpu", seed=46)
# tcn.fit(train_input, train_target)

# # initial window must be ≥5 samples:
# init_win = test_input[:5].copy()
# tcn_preds = tcn.predict(init_win, n_steps=len(test_target))

# ============================================================
#  Causal Transformer baseline for 3-D Lorenz forecasting
#  (PyTorch ≥ 1.9)
# ============================================================
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam

class SmallCausalTransformer3D(nn.Module):
    """
    Single-layer causal Transformer:
      • d_model = 24,   nhead = 1,   d_ff = 4·d_model
      • receptive field  = sequence length L (set in fit / predict)
      • total parameters ≈ 4 900
    """
    def __init__(self,
                 d_model: int = 24,
                 nhead: int = 1,
                 d_ff: int = 96,        # 4 × d_model
                 lr: float = 2e-3,
                 epochs: int = 60,
                 device: str = "cpu",
                 seed: int = 0):
        super().__init__()
        torch.manual_seed(seed); np.random.seed(seed)
        self.device, self.epochs = device, epochs

        self.in_proj   = nn.Linear(3, d_model)     # 3-dim input → tokens
        encoder_layer  = nn.TransformerEncoderLayer(
                             d_model=d_model,
                             nhead=nhead,
                             dim_feedforward=d_ff,
                             batch_first=True,
                             activation="gelu",
                             norm_first=True)
        self.encoder   = nn.TransformerEncoder(encoder_layer, num_layers=1)
        self.pos_embed = None                      # built on first call
        self.head      = nn.Linear(d_model, 3)     # back to 3-dim output

        self.to(device)
        self.opt  = Adam(self.parameters(), lr=lr)
        self.crit = nn.MSELoss()

    # ----------------------------------------
    def _get_posembed(self, L: int, d: int):
        """Fixed sinusoidal positional embedding (same as Vaswani et al.)."""
        pos = torch.arange(L, dtype=torch.float32, device=self.device)
        i   = torch.arange(d//2, dtype=torch.float32, device=self.device)
        angles = pos[:, None] / (10000 ** (2*i/d))
        pe = torch.zeros(L, d, device=self.device)
        pe[:, 0::2] = torch.sin(angles)
        pe[:, 1::2] = torch.cos(angles)
        return pe[None]                                # shape (1,L,d)

    # ----------------------------------------
    def fit(self, x_np: np.ndarray, y_np: np.ndarray, L: int = 20):
        """
        Teacher-forcing with sliding windows of length L.
        x_np, y_np  shape [T, 3];  y_np[t] is the desired prediction for x_np[t].
        """
        x = torch.tensor(x_np, dtype=torch.float32, device=self.device)
        y = torch.tensor(y_np, dtype=torch.float32, device=self.device)

        if self.pos_embed is None or self.pos_embed.size(1) != L:
            self.pos_embed = self._get_posembed(L, self.in_proj.out_features)

        # build training batches as overlapping windows (stride 1)
        windows   = x.unfold(0, L, 1)        # shape [T-L+1, L, 3]
        targets   = y[L-1:]                  # predict the last step
        dataset   = torch.utils.data.TensorDataset(windows, targets)
        loader    = torch.utils.data.DataLoader(dataset,
                                                batch_size=64,
                                                shuffle=True)

        for _ in range(self.epochs):
            for batch_x, batch_y in loader:
                self.opt.zero_grad()
                z   = self.in_proj(batch_x) + self.pos_embed
                out = self.encoder(z)
                pred = self.head(out[:, -1])          # last token
                loss = self.crit(pred, batch_y)
                loss.backward(); self.opt.step()

    # ----------------------------------------
    @torch.no_grad()
    def predict(self, init_window: np.ndarray, n_steps: int):
        """
        Autoregressive roll-out.
        init_window : numpy (L,3)  – most recent L samples (old → new)
        Returns      : numpy (n_steps,3)
        """
        L = init_window.shape[0]
        if self.pos_embed is None or self.pos_embed.size(1) != L:
            self.pos_embed = self._get_posembed(L, self.in_proj.out_features)

        window = torch.tensor(init_window, dtype=torch.float32,
                              device=self.device)
        preds  = np.empty((n_steps, 3), dtype=np.float32)

        for t in range(n_steps):
            z   = self.in_proj(window[None]) + self.pos_embed
            y   = self.head(self.encoder(z)[:, -1])[0]
            preds[t] = y.cpu().numpy()

            window = torch.vstack([window[1:], y])

        return preds

# # ---------------------------------------------------
# #  hyper-parameters chosen to match ≈ 5 k weights
# # ---------------------------------------------------
# tformer = SmallCausalTransformer3D(d_model=24,
#                                    d_ff=96,
#                                    epochs=60,
#                                    lr=2e-3,
#                                    device="cpu",
#                                    seed=47)

# seq_len = 20                         # receptive field (same as NVAR window)
# tformer.fit(train_input, train_target, L=seq_len)

# init_win = test_input[:seq_len].copy()
# tf_preds = tformer.predict(init_win, n_steps=len(test_target))


# ============================================================
#  Non-linear Vector Auto-Regression (NVAR) baseline for 3-D Lorenz
# ============================================================
import numpy as np
from itertools import combinations_with_replacement
from sklearn.linear_model import Ridge

class NVARBaseline3D:
    """
    • delay window length k   (default 5 samples)
    • quadratic polynomial lift (all monomials up to degree 2)
    • closed-form ridge regression read-out
    """
    def __init__(self,
                 k: int = 5,
                 ridge_alpha: float = 1e-4):
        self.k          = k
        self.alpha      = ridge_alpha
        self.scaler_mu  = None
        self.scaler_sig = None
        self.reg        = Ridge(alpha=self.alpha, fit_intercept=False)

        # indices for quadratic terms
        L  = 3 * k                 # length of flattened delay vector
        self.idxs_quad = list(combinations_with_replacement(range(L), 2))

    # ---------------------------------------------------------
    def _build_feature(self, window: np.ndarray) -> np.ndarray:
        """
        window: shape (k, 3)  -> returns (F,) where
          F = 1 + 3k + (3k)(3k+1)/2
        """
        lin = window.flatten()                 # linear terms
        quad = np.array([lin[i]*lin[j] for i, j in self.idxs_quad])
        return np.concatenate(([1.0], lin, quad), dtype=np.float32)
    
    def total_parameters(self):
        total = 0
        for param in list(self.model.parameters()) + list(self.head.parameters()):
            total += param.numel()
        return total

    # ---------------------------------------------------------
    def fit(self, x_np: np.ndarray, y_np: np.ndarray):
        """
        x_np shape [T, 3] (driver)
        y_np shape [T, 3] (target 1-step ahead)
        Assumes x_np[t] predicts y_np[t].
        """
        k = self.k
        assert len(x_np) == len(y_np)
        # normalise inputs
        self.scaler_mu  = x_np.mean(0, keepdims=True)
        self.scaler_sig = x_np.std (0, keepdims=True) + 1e-9
        x_norm = (x_np - self.scaler_mu)/self.scaler_sig

        feats, targets = [], []
        for t in range(k, len(x_norm)):
            window = x_norm[t-k:t]              # shape (k,3)
            feats.append(self._build_feature(window))
            targets.append(y_np[t])

        X = np.vstack(feats)
        Y = np.vstack(targets)
        self.reg.fit(X, Y)

    # ---------------------------------------------------------
    def predict(self, init_window: np.ndarray, n_steps: int):
        """
        Autoregressive roll-out.
        init_window : array (k,3)  – most recent k inputs (y-values).
        Returns array (n_steps,3)
        """
        k = self.k
        window = init_window.copy()
        preds  = np.empty((n_steps, 3), dtype=np.float32)

        for t in range(n_steps):
            w_norm  = (window - self.scaler_mu)/self.scaler_sig
            phi     = self._build_feature(w_norm)
            y_hat   = self.reg.predict(phi[None, :])[0]
            preds[t] = y_hat
            # slide window: drop oldest, append new prediction
            window = np.vstack([window[1:], y_hat])

        return preds

# # ---------------------------------------------------
# #  create training windows  (same split as before)
# # ---------------------------------------------------
# k = 5
# nvar = NVARBaseline3D(k=k, ridge_alpha=1e-4)
# nvar.fit(train_input, train_target)

# # prepare the last k samples as initial window
# init_window = test_input[:k].copy()
# nvar_preds  = nvar.predict(init_window, n_steps=len(test_target))


In [4]:
def evaluate_nrmse(all_preds, test_target, horizons):
    """
    Evaluate model performance over multiple prediction horizons
    for teacher-forced single-step forecasting or autoregressive rollout.
    """
    horizon_nrmse = {}
    for horizon in horizons:
        preds = all_preds[:horizon]
        targets = test_target[:horizon]
        squared_errors = (preds - targets) ** 2
        variance = np.var(targets, axis=0)
        variance[variance == 0] = 1e-8  # avoid divide-by-zero
        nrmse = np.sqrt(np.sum(squared_errors) / (horizon * np.sum(variance)))
        horizon_nrmse[horizon] = nrmse
    return horizon_nrmse

In [5]:
def compute_valid_prediction_time(y_true, y_pred, t_vals, threshold, lambda_max, dt):
    """
    Compute the Valid Prediction Time (VPT) and compare it to Lyapunov time T_lambda = 1 / lambda_max.
    
    Parameters
    ----------
    y_true : ndarray of shape (N, dim)
        True trajectory over time.
    y_pred : ndarray of shape (N, dim)
        Model's predicted trajectory over time (closed-loop).
    t_vals : ndarray of shape (N,)
        Time values corresponding to the trajectory steps.
    threshold : float, optional
        The error threshold, default is 0.4 as in your snippet.
    lambda_max : float, optional
        Largest Lyapunov exponent. Default=0.9 for Lorenz.
        
    Returns
    -------
    T_VPT : float
        Valid prediction time. The earliest time at which normalized error surpasses threshold
        (or the last time if never surpassed).
    T_lambda : float
        Lyapunov time = 1 / lambda_max
    ratio : float
        How many Lyapunov times the model prediction remains valid, i.e. T_VPT / T_lambda.
    """
    # 1) Average of y_true
    y_mean = np.mean(y_true, axis=0)  # shape (dim,)
    
    # 2) Time-averaged norm^2 of (y_true - y_mean)
    y_centered = y_true - y_mean
    denom = np.mean(np.sum(y_centered**2, axis=1))  # scalar
    
    # 3) Compute the normalized error delta_gamma(t) = ||y_true - y_pred||^2 / denom
    diff = y_true - y_pred
    err_sq = np.sum(diff**2, axis=1)  # shape (N,)
    delta_gamma = err_sq / denom      # shape (N,)
    
    # 4) Find the first time index where delta_gamma(t) exceeds threshold
    idx_exceed = np.where(delta_gamma > threshold)[0]
    if len(idx_exceed) == 0:
        # never exceeds threshold => set T_VPT to the final time
        T_VPT = t_vals[-1]
    else:
        T_VPT = t_vals[idx_exceed[0]]
    
    # 5) Compute T_lambda and ratio
    T_lambda = 1.0 / lambda_max

    # print(f"\n--- Valid Prediction Time (VPT) with threshold={threshold}, lambda_max={lambda_max} ---")

    T_VPT = (T_VPT - t_vals[0])  # Adjust T_VPT to be relative to the start time
    ratio = T_VPT / T_lambda

    return T_VPT, T_lambda, ratio

In [6]:
def compute_attractor_deviation(predictions, targets, cube_size=(0.1, 0.1, 0.1)):
    """
    Compute the Attractor Deviation (ADev) metric.

    Parameters:
        predictions (numpy.ndarray): Predicted trajectories of shape (n, 3).
        targets (numpy.ndarray): True trajectories of shape (n, 3).
        cube_size (tuple): Dimensions of the cube (dx, dy, dz).

    Returns:
        float: The ADev metric.
    """
    # Define the cube grid based on the range of the data and cube size
    min_coords = np.min(np.vstack((predictions, targets)), axis=0)
    max_coords = np.max(np.vstack((predictions, targets)), axis=0)

    # Create a grid of cubes
    grid_shape = ((max_coords - min_coords) / cube_size).astype(int) + 1

    # Initialize the cube occupancy arrays
    pred_cubes = np.zeros(grid_shape, dtype=int)
    target_cubes = np.zeros(grid_shape, dtype=int)

    # Map trajectories to cubes
    pred_indices = ((predictions - min_coords) / cube_size).astype(int)
    target_indices = ((targets - min_coords) / cube_size).astype(int)

    # Mark cubes visited by predictions and targets
    for idx in pred_indices:
        pred_cubes[tuple(idx)] = 1
    for idx in target_indices:
        target_cubes[tuple(idx)] = 1

    # Compute the ADev metric
    adev = np.sum(np.abs(pred_cubes - target_cubes))
    return adev

### MIT-BIH

In [7]:
# def create_delay_embedding(signal, embed_dim):
#     L = len(signal) - embed_dim + 1
#     emb = np.zeros((L, embed_dim))
#     for i in range(L):
#         emb[i, :] = signal[i:i+embed_dim]
#     return emb
# import wfdb

# # Download and load record and annotations for patient #100
# record = wfdb.rdrecord('100', sampfrom=0, sampto=25002, pn_dir='mitdb')  # first 20,000 samples
# annotation = wfdb.rdann('100', 'atr', sampfrom=0, sampto=25002, pn_dir='mitdb')
# # Get input signal u(t) from the first channel
# u = record.p_signal[:, 0] 
# u
# # Normalize input
# u_min = np.min(u)
# u_max = np.max(u)
# u_norm = (u - u_min) / (u_max - u_min)
# fs = record.fs  # sampling frequency (should be 360 Hz)
# t_vals = np.arange(len(u_norm)) / fs
# emb_dim = 3
# # inputs = u_norm
# inputs = create_delay_embedding(u_norm, emb_dim)

# # Create target array (heartbeat locations)
# targets = np.zeros(len(u_norm))
# targets[annotation.sample] = 1  # mark annotations as 1 (heartbeat)
# targets = create_delay_embedding(targets, emb_dim)
# data_size = len(inputs)
# train_size = 15000
# train_input = inputs[:train_size]
# train_target = targets[:train_size]
# test_input = inputs[train_size+1:]
# test_target = targets[train_size+1:]
# test_size = len(test_input)
# print(f"Total samples: {data_size}, train size: {train_size}, test size: {test_size}") 

In [8]:
# device='cuda' if torch.cuda.is_available() else 'cpu'
# device

In [9]:
# all_horizons = list(range(10, 1001, 10))

# nrmse_dict = defaultdict(list)
# seeds = range(995, 996)

# for seed in seeds:
#     lstm_baseline = LSTMBaseline3D(
#                         hidden_size=500,         # parameter budget ~ 4800
#                         lr=1e-3,
#                         epochs=80,
#                         device='cuda' if torch.cuda.is_available() else 'cpu',
#                         seed=seed)
#     print(lstm_baseline.total_parameters())
#     lstm_baseline.fit(train_input, train_target)

#     # one-step roll-out to build an initial vector for auto-regressive mode
#     init_vec = train_target[-1]                # last teacher-forced target
#     lstm_preds = lstm_baseline.predict_open_loop(test_input)

#     nrmse = evaluate_nrmse(lstm_preds, test_target, all_horizons)
#     nrmse_dict['LSTM'].append(nrmse)
#     # for horizon, value in nrmse.items():
#     #     nrmse_dict[horizon].append(value)

In [10]:
# nrmse_dict

In [11]:
# with open('lstmmitbih.json', 'w') as f:
#     json.dump(nrmse_dict, f)

In [12]:
# horizons = [300, 600, 1000]
# print("\nNRMSE for Different Prediction Horizons:")
# print("-" * 140)
# print(f"{'LSTM':<17}")
# print("-" * 140)

# for horizon in horizons:
#     hfr_vals = [np.mean(hfr_nrmse[horizon]) for hfr_nrmse in nrmse_dict['LSTM']]

#     print(f"{horizon:<10}", end=" ")
#     for vals in [hfr_vals]:
#         mean = np.mean(vals)
#         std = np.std(vals)
#         print(f"{mean} ± {std}".ljust(18), end="")
#     print()

### Sunspot (Monthly)

In [13]:
# import pandas as pd
# file_path = '../RealWorld/datasets/SN_m_tot_V2.0.csv'

# df = pd.read_csv(file_path, sep=';', header = None)
# df
# data = df.iloc[:, 3].values
# dt = 1
# dataset_size = len(data)
# data = create_delay_embedding(data, 3)
# print(f"Dataset size: {dataset_size}")

# # Train/Test Split
# train_end = 2000
# train_input  = data[:train_end]
# train_target = data[1:train_end+1]
# test_input   = data[train_end:-1]
# test_target  = data[train_end+1:]
# y_test = test_target
# n_test_steps = len(test_target)
# time_test = np.arange(n_test_steps) * dt

# print(f"Train size: {len(train_input)}\nTest size: {len(test_input)}")

In [14]:
# lstm_baseline = LSTMBaseline3D(
#                     hidden_size=32,         # parameter budget ~ 4800
#                     lr=1e-3,
#                     epochs=400,
#                     device='cuda' if torch.cuda.is_available() else 'cpu',
#                     seed=42)
# lstm_baseline.fit(train_input, train_target)

# # one-step roll-out to build an initial vector for auto-regressive mode
# init_vec = train_target[-1]                # last teacher-forced target
# lstm_preds = lstm_baseline.predict_open_loop(test_input)

# nrmse = evaluate_nrmse(lstm_preds, test_target, horizons=[200, 400, 600, 800, 1000])

In [15]:
# nrmse

In [16]:
# horizons = [300, 600, 1000]

# all_horizons = list(range(10, 1001, 10))

# nrmse_dict = defaultdict(list)
# seeds = range(995, 996)

# for seed in seeds:
#     lstm_baseline = LSTMBaseline3D(
#                         hidden_size=1000,         # parameter budget ~ 4800
#                         lr=1e-3,
#                         epochs=200,
#                         device='cuda' if torch.cuda.is_available() else 'cpu',
#                         seed=seed)
#     print(lstm_baseline.total_parameters())
#     lstm_baseline.fit(train_input, train_target)

#     # one-step roll-out to build an initial vector for auto-regressive mode
#     init_vec = train_target[-1]                # last teacher-forced target
#     lstm_preds = lstm_baseline.predict_open_loop(test_input)

#     nrmse = evaluate_nrmse(lstm_preds, test_target, all_horizons)
#     nrmse_dict['LSTM'].append(nrmse)
#     # for horizon, value in nrmse.items():
#     #     nrmse_dict[horizon].append(value)

In [17]:
# print("\nNRMSE for Different Prediction Horizons:")
# print("-" * 140)
# print(f"{'LSTM':<17}")
# print("-" * 140)

# for horizon in horizons:
#     hfr_vals = [np.mean(hfr_nrmse[horizon]) for hfr_nrmse in nrmse_dict['LSTM']]

#     print(f"{horizon:<10}", end=" ")
#     for vals in [hfr_vals]:
#         mean = np.mean(vals)
#         std = np.std(vals)
#         print(f"{mean} ± {std}".ljust(18), end="")
#     print()

### Santa Fe

In [18]:
# file_path = 'RealWorld/datasets/santa-fe-time-series-competition-data-set-b-1.0.0/b1.txt'

# df = pd.read_csv(file_path, header=None, sep=' ')
# df
# # Normalize the first column (column 0) of the DataFrame
# df[0] = (df[0] - df[0].min()) / (df[0].max() - df[0].min())
# data = df.iloc[:, 0].values
# chosen_system = "SantaFe"
# dt = 1
# T_data = len(data)
# data = create_delay_embedding(data, 3)
# print(f"Data length: {T_data}.")

# # Train/Test Split
# train_end = 7000
# train_input  = data[:train_end]
# train_target = data[1:train_end+1]
# test_input   = data[train_end:-1]
# test_target  = data[train_end+1:]
# y_test = test_target
# n_test_steps = len(test_target)
# time_test = np.arange(n_test_steps) * dt

# print(f"Train size: {len(train_input)}  \nTest size: {len(test_input)}")


In [19]:
# horizons = [300, 600, 1000]

# nrmse_dict = defaultdict(list)
# seeds = range(995, 1025)

# for seed in seeds:
#     lstm_baseline = LSTMBaseline3D(
#                         hidden_size=32,         # parameter budget ~ 4800
#                         lr=1e-3,
#                         epochs=400,
#                         device='cuda' if torch.cuda.is_available() else 'cpu',
#                         seed=seed)
#     lstm_baseline.fit(train_input, train_target)

#     # one-step roll-out to build an initial vector for auto-regressive mode
#     init_vec = train_target[-1]                # last teacher-forced target
#     lstm_preds = lstm_baseline.predict_open_loop(test_input)

#     nrmse = evaluate_nrmse(lstm_preds, test_target, horizons)
#     nrmse_dict['LSTM'].append(nrmse)
#     # for horizon, value in nrmse.items():
#     #     nrmse_dict[horizon].append(value)

In [20]:
# print("\nNRMSE for Different Prediction Horizons:")
# print("-" * 140)
# print(f"{'LSTM':<17}")
# print("-" * 140)

# for horizon in horizons:
#     hfr_vals = [np.mean(hfr_nrmse[horizon]) for hfr_nrmse in nrmse_dict['LSTM']]

#     print(f"{horizon:<10}", end=" ")
#     for vals in [hfr_vals]:
#         mean = np.mean(vals)
#         std = np.std(vals)
#         print(f"{mean} ± {std}".ljust(18), end="")
#     print()

### BIDMC

In [21]:
# import wfdb
# import numpy as np
# import os

# # ─── Load BIDMC Record ─────────────────────────────────────────────────────
# record_id = 'bidmc01'
# record = wfdb.rdrecord(record_id, pn_dir='bidmc', sampto=8 * 60 * 125)  # 8 mins at 125Hz
# signals = record.p_signal  # shape: (60000, 5)
# names = [n.strip().strip(',') for n in record.sig_name]

# # ─── Get Indices of ECG Lead II and RESP ──────────────────────────────────
# idx_ecg = names.index('II')     # ECG Lead II
# idx_resp = names.index('RESP')  # Respiration signal

# # ─── Parameters ────────────────────────────────────────────────────────────
# N_train = 10000
# N_test = 5000
# emb_dim = 3

# # ─── Select Signals ────────────────────────────────────────────────────────
# u = signals[:, idx_ecg]   # input: ECG Lead II
# v = signals[:, idx_resp]  # target: RESP

# # ─── Normalize to [-1, 1] ──────────────────────────────────────────────────
# u_norm = 2 * (u - np.min(u)) / (np.max(u) - np.min(u)) - 1
# v_norm = 2 * (v - np.min(v)) / (np.max(v) - np.min(v)) - 1

# # ─── Delay Embedding ───────────────────────────────────────────────────────
# inputs = create_delay_embedding(u_norm, emb_dim)
# targets = create_delay_embedding(v_norm, emb_dim)

# # ─── Train/Test Split ──────────────────────────────────────────────────────
# train_input = inputs[:N_train]
# train_target = targets[:N_train]
# test_input = inputs[N_train:N_train+N_test]
# test_target = targets[N_train:N_train+N_test]

# # ─── Summary ───────────────────────────────────────────────────────────────
# print(f"Train input shape:  {train_input.shape}")
# print(f"Train target shape: {train_target.shape}")
# print(f"Test input shape:   {test_input.shape}")
# print(f"Test target shape:  {test_target.shape}")

In [22]:
# horizons = [300, 600, 1000]

# nrmse_dict = defaultdict(list)
# seeds = range(995, 1025)

# for seed in seeds:
#     lstm_baseline = LSTMBaseline3D(
#                         hidden_size=32,         # parameter budget ~ 4800
#                         lr=1e-3,
#                         epochs=400,
#                         device='cuda' if torch.cuda.is_available() else 'cpu',
#                         seed=seed)
#     lstm_baseline.fit(train_input, train_target)

#     # one-step roll-out to build an initial vector for auto-regressive mode
#     init_vec = train_target[-1]                # last teacher-forced target
#     lstm_preds = lstm_baseline.predict_open_loop(test_input)

#     nrmse = evaluate_nrmse(lstm_preds, test_target, horizons)
#     nrmse_dict['LSTM'].append(nrmse)
#     # for horizon, value in nrmse.items():
#     #     nrmse_dict[horizon].append(value)

In [23]:
# print("\nNRMSE for Different Prediction Horizons:")
# print("-" * 140)
# print(f"{'LSTM':<17}")
# print("-" * 140)

# for horizon in horizons:
#     hfr_vals = [np.mean(hfr_nrmse[horizon]) for hfr_nrmse in nrmse_dict['LSTM']]

#     print(f"{horizon:<10}", end=" ")
#     for vals in [hfr_vals]:
#         mean = np.mean(vals)
#         std = np.std(vals)
#         print(f"{mean} ± {std}".ljust(18), end="")
#     print()

### Canonical Datasets

In [24]:
def lorenz_deriv(state, t, sigma=10.0, rho=28.0, beta=8.0/3.0):
    x, y, z = state
    dxdt = sigma * (y - x)
    dydt = x*(rho - z) - y
    dzdt = x*y - beta*z
    return [dxdt, dydt, dzdt]

def generate_lorenz_data(
    initial_state=[1.0, 1.0, 1.0],
    tmax=25.0,
    dt=0.01,
    sigma=10.0,
    rho=28.0,
    beta=8.0/3.0
):
    num_steps = int(tmax / dt) + 1 # +1 to include t=0
    t_vals = np.linspace(0, tmax, num_steps)
    sol = odeint(lorenz_deriv, initial_state, t_vals, args=(sigma, rho, beta))
    return t_vals, sol

def rossler_derivatives(state, t, a=0.2, b=0.2, c=5.7):
    """Compute time derivatives [dx/dt, dy/dt, dz/dt] for the Rössler system."""
    x, y, z = state
    dxdt = -y - z
    dydt = x + a * y
    dzdt = b + z * (x - c)
    return [dxdt, dydt, dzdt]

def generate_rossler_data(
    initial_state=[1.0, 0.0, 0.0],
    tmax=25.0,
    dt=0.01,
    a=0.2,
    b=0.2,
    c=5.7
):
    """
    Numerically integrate Rössler equations x'(t), y'(t), z'(t) using odeint.
    Returns:
       t_vals: array of time points
       sol   : array shape [num_steps, 3] of [x(t), y(t), z(t)]
    """
    num_steps = int(tmax / dt)
    t_vals = np.linspace(0, tmax, num_steps)
    sol = odeint(rossler_derivatives, initial_state, t_vals, args=(a, b, c))
    return t_vals, sol

def chen_deriv(state, t, a=35.0, b=3.0, c=28.0):
    """
    Computes derivatives [dx/dt, dy/dt, dz/dt] for Chen system:
      dx/dt = a*(y - x)
      dy/dt = (c - a)*x + c*y - x*z
      dz/dt = x*y - b*z
    """
    x, y, z = state
    dxdt = a*(y - x)
    dydt = (c - a)*x + c*y - x*z
    dzdt = x*y - b*z
    return [dxdt, dydt, dzdt]

def generate_chen_data(
    initial_state=[1.0, 1.0, 1.0],
    tmax=50.0,
    dt=0.01,
    a=35.0,
    b=3.0,
    c=28.0
):
    """
    Integrates Chen's system from 'initial_state' up to time 'tmax' with step size 'dt'.
    Returns:
      t_vals: time array of length T
      sol   : array shape [T, 3], the trajectory [x(t), y(t), z(t)]
    """
    num_steps = int(tmax / dt)
    t_vals = np.linspace(0, tmax, num_steps)
    sol = odeint(chen_deriv, initial_state, t_vals, args=(a, b, c))
    return t_vals, sol

In [25]:
grid = {
    "input_dim": [3],
    "hidden_size": [500],
    "output_dim": [3],
    "lr": [1e-3],
    "epochs": [80],
    "device": ['cuda'],
}

In [None]:
def run_grid_search(model_class, param_grid, model_name,
                    output_path="grid_search_results.json", f=generate_chen_data, lambda_max=0.9):
    combos = list(itertools.product(*param_grid.values()))
    param_keys = list(param_grid.keys())
    print(f"\n== Initial grid search for {model_name} with {len(combos)} combinations ==")

    results = []
    # horizons = list(range(10, 1001, 10))
    horizons = [200, 400, 600, 800, 1000]
    

    for comb in tqdm(combos, desc="Grid Search"):
        params = dict(zip(param_keys, comb))
        seed_scores_vpt = []
        horizon_nrmse_all = {h: [] for h in horizons}
        adev_scores = []
        # ldev_scores = []

        for initial_state in [[1.0, 1.0, 1.0], [1.0, 2.0, 3.0], [2.0, 1.5, 4.0]]:
            tmax = 250
            dt = 0.02
            t_vals, lorenz_traj = f(
                initial_state=initial_state,
                tmax=tmax,
                dt=dt
            )

            washout = 2000
            t_vals = t_vals[washout:]
            lorenz_traj = lorenz_traj[washout:]

            scaler = MinMaxScaler()
            scaler.fit(lorenz_traj)
            lorenz_traj = scaler.transform(lorenz_traj)

            T_data = len(lorenz_traj)
            for train_frac in [0.7, 0.75, 0.8]:
                train_end = int(train_frac * (T_data - 1))
                train_input = lorenz_traj[:train_end]
                train_target = lorenz_traj[1:train_end + 1]
                test_input = lorenz_traj[train_end:-1]
                test_target = lorenz_traj[train_end + 1:]
                n_test_steps = len(test_input)
                initial_in = test_input[0]

                for seed in np.arange(1, 5):
                    model = model_class(**params, seed=seed)
                    model.fit(train_input, train_target)
                    preds = model.predict(initial_in, n_test_steps)

                    # T_VPT_s, _, ratio = compute_valid_prediction_time(test_target, preds, t_vals, 0.4, lambda_max, dt)
                    # seed_scores_vpt.append(ratio)

                    horizon_nrmse = evaluate_nrmse(preds, test_target, horizons)
                    for h in horizons:
                        horizon_nrmse_all[h].append(horizon_nrmse[h])

                    # adev = compute_attractor_deviation(preds, test_target)
                    # adev_scores.append(adev)

                    # ldev = compute_lyapunov_exponent("Lorenz", preds, dt)
                    # ldev_scores.append(ldev)

        # mean_vpt = float(np.mean(seed_scores_vpt))
        # std_vpt = float(np.std(seed_scores_vpt))
        mean_nrmse_dict = {str(h): float(np.mean(horizon_nrmse_all[h])) for h in horizons}
        std_nrmse_dict  = {str(h): float(np.std(horizon_nrmse_all[h]))  for h in horizons}
        # mean_adev = float(np.mean(adev_scores))
        # std_adev = float(np.std(adev_scores))
        # mean_ldev = float(np.mean(ldev_scores))
        # std_ldev = float(np.std(ldev_scores))

        results.append({
            "params": params,
            # "seed_scores_T_VPT": seed_scores_vpt,
            # "mean_T_VPT": mean_vpt,
            # "std_T_VPT": std_vpt,
            "mean_NRMSEs": mean_nrmse_dict,
            "std_NRMSEs": std_nrmse_dict,
            # "mean_ADev": mean_adev,
            # "std_ADev": std_adev,
            # "mean_LDev": mean_ldev,
            # "std_LDev": std_ldev
        })

    with open(output_path, "w") as f:
        json.dump(results, f, indent=2)
    print(f"\nAll results saved to `{output_path}`")

    return results

In [28]:
run_grid_search(LSTMBaseline3D, grid, "lstm", output_path="lstm_lorenz_best_param.json", f=generate_lorenz_data, lambda_max=0.9)


== Initial grid search for lstm with 1 combinations ==


Grid Search:   0%|          | 0/1 [00:40<?, ?it/s]


KeyboardInterrupt: 