In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from utils.utils import load_config
from datasets.SyntheticData import SyntheticData
import numpy as np
import pandas as pd
from torch_geometric.utils import to_dense_adj
import os

Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


## two-step-SINDy

In [3]:
from models.baseline.TSS.NumericalDerivatives import NumericalDeriv
from models.baseline.TSS.ElementaryFunctions_Matrix import ElementaryFunctions_Matrix
from models.baseline.TSS.TwoPhaseInference import TwoPhaseInference

## Utils

In [6]:
def load_data_tss(config):
    dataset = SyntheticData(
        root=config['data_folder'],
        dynamics=config['name'],
        t_span=config['t_span'],
        t_max=config['t_eval_steps'],
        num_samples=config['num_samples'],
        seed=config['seed'],
        n_ics=config['n_iter'],
        input_range=config['input_range'],
        device=config['device'],
        horizon = config['horizon'],
        history = config['history'],
        stride=config.get('stride', 5),
        predict_deriv=config.get("predict_deriv", False),
        **config['integration_kwargs']
    )
    
    raw_data = dataset.raw_data_sampled.cpu().detach().numpy() # shape: (ics, time_steps, n_nodes, 1)
    time = dataset.t_sampled
    
    edge_index = dataset[0].edge_index
    A = to_dense_adj(edge_index)[0].cpu().detach().numpy()
    
    return raw_data, A, time


def get_matrix_tss(raw_data, time, A, Dim=1, selfPolyOrder = 3):
    dt = time[0, 1] - time[0, 0]
    dt = dt.item()
    Nnodes = A.shape[0]
    
    data = []
    num_deriv = []
    Matrix = []
    
    for ic in range(raw_data.shape[0]):
        data_ic = raw_data[ic].squeeze(-1)  # shape: (time_steps, n_nodes)
        num_deriv_ic = NumericalDeriv(
            TimeSeries=data_ic,
            dim=1,
            Nnodes=data_ic.shape[1],
            deltT=dt
        )   # pd DatafRame
        
        data_ic = data_ic[2:-2,:]
        data.append(data_ic)
        num_deriv.append(num_deriv_ic)
        matrix_ic = ElementaryFunctions_Matrix(
            data_ic, 
            Dim, 
            Nnodes, 
            A, 
            selfPolyOrder, 
            coupledPolyOrder = 1, 
            PolynomialIndex = True, 
            TrigonometricIndex = True, 
            ExponentialIndex = True, 
            FractionalIndex = False, 
            ActivationIndex = False, 
            RescalingIndex = False, 
            CoupledPolynomialIndex = True,
            CoupledTrigonometricIndex = True, 
            CoupledExponentialIndex = True, 
            CoupledFractionalIndex = False,
            CoupledActivationIndex = True, 
            CoupledRescalingIndex = False
        )
        
        Matrix.append(matrix_ic)
        

    data = np.concatenate(data, axis=0)
    num_deriv = pd.concat(num_deriv, ignore_index=True)
    Matrix = pd.concat(Matrix, ignore_index=True)
    Matrix = Matrix.replace([np.inf, -np.inf], np.nan).dropna(axis=1)
    
    return Matrix, num_deriv, data


def two_step_sindy(Matrix, num_deriv, Nnodes, out_path, Dim = 1, plotstart = 0.5, plotend = 0.9, Keep = 10, SampleTimes = 20, Batchsize = 1):
    Lambda = pd.DataFrame([[0.01, 0.5, 1]])
    os.makedirs(out_path, exist_ok=True)
    
    for dim in range(Dim):
        InferredResults, _, _, _ = TwoPhaseInference(
            Matrix, 
            num_deriv, 
            Nnodes, 
            dim, 
            Dim, 
            Keep, 
            SampleTimes,
            Batchsize, 
            Lambda, 
            plotstart, 
            plotend
        )
        
        InferredResults.to_csv(f"{out_path}/results_dim={dim}.csv")
    
    
        

## Two Phase Inference

In [8]:
configs = [
    'configs/config_pred_deriv/config_ic1/config_kuramoto.yml',
    'configs/config_pred_deriv/config_ic1/config_biochemical.yml',
    'configs/config_pred_deriv/config_ic1/config_epidemics.yml',
    'configs/config_pred_deriv/config_ic1/config_population.yml'
]

for conf_path in configs:
    conf= load_config(config_path=conf_path)
    raw_data, A, time = load_data_tss(conf)
    
    Matrix, num_deriv, _ = get_matrix_tss(
        raw_data=raw_data,
        time = time,
        A=A,
        Dim=1,
        selfPolyOrder=3
    )
    
    two_step_sindy(
        Matrix=Matrix,
        num_deriv=num_deriv,
        Nnodes=A.shape[0],
        out_path=f'./saved_models_optuna/tss/{conf['name']}-{conf['n_iter']}'
    )
    
    

0.9999980867044097
Best threshold: 0.001
Elementary functions discovered by Phase 1 without constant.
expx1jMinusx1i    -2.446746e-07
x1                 1.000000e-10
x1x1x1             1.000000e-10
x1x1               1.000000e-10
cosx1              1.000000e-10
tanhx1j            1.000000e-10
tanhx1jMinusx1i    1.000000e-10
tanhx1ix1j         1.000000e-10
x1itanhx1j         1.000000e-10
sinx1jMinusx1i     4.993027e-01
dtype: float64
Elementary functions discovered by Phase 1 with constant.
expx1jMinusx1i    -2.446746e-07
x1                 1.000000e-10
x1x1x1             1.000000e-10
x1x1               1.000000e-10
cosx1              1.000000e-10
tanhx1jMinusx1i    1.000000e-10
tanhx1ix1j         1.000000e-10
x1itanhx1j         1.000000e-10
sinx1jMinusx1i     4.993027e-01
constant           2.000026e+00
dtype: float64
-10448.293151657102 13736.321661095055 -10450.293151657102
1.0 -1.3146952771818496 1.0001914188251584
Index(['expx1jMinusx1i', 'x1', 'x1x1x1', 'x1x1', 'cosx1', 'tanhx1jMi