In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from utils.utils import load_config
from datasets.SyntheticData import SyntheticData
import numpy as np
import pandas as pd
from torch_geometric.utils import to_dense_adj
import os
from datasets.RealEpidemics import RealEpidemics

Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


## two-step-SINDy

In [None]:
from models.baseline.TSS.NumericalDerivatives import NumericalDeriv
from models.baseline.TSS.ElementaryFunctions_Matrix import ElementaryFunctions_Matrix
from models.baseline.TSS.TwoPhaseInference import TwoPhaseInference

## Utils

In [None]:
def load_data_tss(config, snr_db = -1, real_epid=False):
    if not real_epid:
        dataset = SyntheticData(
            root=config['data_folder'],
            dynamics=config['name'],
            t_span=config['t_span'],
            t_max=config['t_eval_steps'],
            num_samples=config['num_samples'],
            seed=config['seed'],
            n_ics=config['n_iter'],
            input_range=config['input_range'],
            device=config['device'],
            horizon = config['horizon'],
            history = config['history'],
            stride=config.get('stride', 5),
            predict_deriv=config.get("predict_deriv", False),
            snr_db=snr_db,
            **config['integration_kwargs']
        )
    else:
        dataset = RealEpidemics(
            root = './data_real_epid_covid_orig',
            name = 'RealEpid',
            predict_deriv=True,
            scale=False,
        )
    
    raw_data = dataset.raw_data_sampled.cpu().detach().numpy() # shape: (ics, time_steps, n_nodes, 1)
    time = dataset.t_sampled
    
    edge_index = dataset[0].edge_index
    A = to_dense_adj(edge_index)[0].cpu().detach().numpy()
    
    return raw_data, A, time


def get_matrix_tss(raw_data, time, A, Dim=1, selfPolyOrder = 3, act_index=False):
    dt = time[0, 1] - time[0, 0]
    dt = dt.item()
    Nnodes = A.shape[0]
    
    data = []
    num_deriv = []
    Matrix = []
    
    for ic in range(raw_data.shape[0]):
        data_ic = raw_data[ic].squeeze(-1)  # shape: (time_steps, n_nodes)
        num_deriv_ic = NumericalDeriv(
            TimeSeries=data_ic,
            dim=1,
            Nnodes=data_ic.shape[1],
            deltT=dt
        )   # pd DatafRame
        
        data_ic = data_ic[2:-2,:]
        data.append(data_ic)
        num_deriv.append(num_deriv_ic)
        matrix_ic = ElementaryFunctions_Matrix(
            data_ic, 
            Dim, 
            Nnodes, 
            A, 
            selfPolyOrder, 
            coupledPolyOrder = 1, 
            PolynomialIndex = True, 
            TrigonometricIndex = True, 
            ExponentialIndex = True, 
            FractionalIndex = False, 
            ActivationIndex = act_index, 
            RescalingIndex = False, 
            CoupledPolynomialIndex = True,
            CoupledTrigonometricIndex = True, 
            CoupledExponentialIndex = True, 
            CoupledFractionalIndex = False,
            CoupledActivationIndex = act_index, 
            CoupledRescalingIndex = False
        )
        
        Matrix.append(matrix_ic)
        

    data = np.concatenate(data, axis=0)
    num_deriv = pd.concat(num_deriv, ignore_index=True)
    Matrix = pd.concat(Matrix, ignore_index=True)
    Matrix = Matrix.replace([np.inf, -np.inf], np.nan).dropna(axis=1)
    
    return Matrix, num_deriv, data


def two_step_sindy(Matrix, num_deriv, Nnodes, out_path, Dim = 1, plotstart = 0.5, plotend = 0.9, Keep = 10, SampleTimes = 20, Batchsize = 1,
                   snr_db = -1):
    Lambda = pd.DataFrame([[0.01, 0.5, 1]])
    os.makedirs(out_path, exist_ok=True)
    
    for dim in range(Dim):
        InferredResults, _, _, _ = TwoPhaseInference(
            Matrix, 
            num_deriv, 
            Nnodes, 
            dim, 
            Dim, 
            Keep, 
            SampleTimes,
            Batchsize, 
            Lambda, 
            plotstart, 
            plotend
        )
        
        save_file = f"{out_path}/results_dim={dim}.csv" if snr_db < 0 else f"{out_path}/results_dim={dim}_{snr_db}_db.csv"
        InferredResults.to_csv(save_file)
    
    
        

## Two Phase Inference

### Clean data

In [None]:
configs = [
    'configs/config_pred_deriv/config_ic1/config_kuramoto.yml',
    'configs/config_pred_deriv/config_ic1/config_biochemical.yml',
    'configs/config_pred_deriv/config_ic1/config_epidemics.yml',
    'configs/config_pred_deriv/config_ic1/config_population.yml'
]

for conf_path in configs:
    conf= load_config(config_path=conf_path)
    raw_data, A, time = load_data_tss(conf)
    
    Matrix, num_deriv, _ = get_matrix_tss(
        raw_data=raw_data,
        time = time,
        A=A,
        Dim=1,
        selfPolyOrder=3
    )
    
    two_step_sindy(
        Matrix=Matrix,
        num_deriv=num_deriv,
        Nnodes=A.shape[0],
        out_path=f'./saved_models_optuna/tss/{conf['name']}-{conf['n_iter']}_nofrac'
    )

### Noise

In [None]:
configs = [
    'configs/config_pred_deriv/config_ic1/config_kuramoto.yml',
    'configs/config_pred_deriv/config_ic1/config_biochemical.yml',
    'configs/config_pred_deriv/config_ic1/config_epidemics.yml',
    'configs/config_pred_deriv/config_ic1/config_population.yml'
]

snr_db_levels = [70, 50, 20]

for conf_path in configs:
    for snr_db in snr_db_levels:
        
        conf = load_config(config_path=conf_path)
        raw_data, A, time = load_data_tss(conf, snr_db=snr_db)
        
        Matrix, num_deriv, _ = get_matrix_tss(
            raw_data=raw_data,
            time = time,
            A=A,
            Dim=1,
            selfPolyOrder=3
        )
        
        two_step_sindy(
            Matrix=Matrix,
            num_deriv=num_deriv,
            Nnodes=A.shape[0],
            out_path=f'./saved_models_optuna/tss/{conf['name']}-{conf['n_iter']}_nofrac',
            snr_db=snr_db
        )

## Post Processing

In [3]:
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

from post_processing import set_pytorch_seed, get_test_set, get_symb_test_error
from utils.utils import load_config
import torch


set_pytorch_seed(0)

kur_config = load_config("./configs/config_pred_deriv/config_ic1/config_kuramoto.yml")

KUR = get_test_set(
    dynamics=kur_config['name'],
    device='cuda',
    input_range=kur_config['input_range'],
    **kur_config['integration_kwargs']
)

g_symb = lambda x: torch.sin(x[:, 1] - x[:, 0]).unsqueeze(-1)
h_symb = lambda x: 2.0 + 0.5 * x[:, 1].unsqueeze(-1)

test_losses = get_symb_test_error(
    g_symb=g_symb,
    h_symb=h_symb,
    test_set=KUR,
    message_passing=True,
    include_time=False,
    is_symb=False
)

ts_mean = np.mean(test_losses)
ts_var = np.var(test_losses)
ts_std = np.std(test_losses)

print(f"Mean Test loss of symbolic formula: {ts_mean}")
print(f"Var Test loss of symbolic formula: {ts_var}")
print(f"Std Test loss of symbolic formula: {ts_std}")

"""### Epidemics"""

epid_config = load_config("./configs/config_pred_deriv/config_ic1/config_epidemics.yml")

EPID = get_test_set(
    dynamics=epid_config['name'],
    device='cuda',
    input_range=epid_config['input_range'],
    **epid_config['integration_kwargs']
)

g_symb = lambda x: 0.5*x[:, 1].unsqueeze(-1) * (1 - x[:, 0].unsqueeze(-1))
h_symb = lambda x: x[:, 1].unsqueeze(1) - 0.5 * x[:, 0].unsqueeze(-1)

test_losses = get_symb_test_error(
    g_symb=g_symb,
    h_symb=h_symb,
    test_set=EPID,
    message_passing=True,
    include_time=False,
    is_symb=False
)


ts_mean = np.mean(test_losses)
ts_var = np.var(test_losses)
ts_std = np.std(test_losses)

print(f"Mean Test loss of symbolic formula: {ts_mean}")
print(f"Var Test loss of symbolic formula: {ts_var}")
print(f"Std Test loss of symbolic formula: {ts_std}")

"""### Population"""

pop_config = load_config("./configs/config_pred_deriv/config_ic1/config_population.yml")

POP = get_test_set(
    dynamics=pop_config['name'],
    device='cuda',
    input_range=pop_config['input_range'],
    **pop_config['integration_kwargs']
)

g_symb = lambda x: 0.2*torch.pow(x[:, 1].unsqueeze(-1), 3)
h_symb = lambda x: -0.5 * x[:, 0].unsqueeze(-1) + x[:, 1].unsqueeze(1)

test_losses = get_symb_test_error(
    g_symb=g_symb,
    h_symb=h_symb,
    test_set=POP,
    message_passing=True,
    include_time=False,
    is_symb=False
)

ts_mean = np.mean(test_losses)
ts_var = np.var(test_losses)
ts_std = np.std(test_losses)

print(f"Mean Test loss of symbolic formula: {ts_mean}")
print(f"Var Test loss of symbolic formula: {ts_var}")
print(f"Std Test loss of symbolic formula: {ts_std}")

"""### Biochemical"""

bio_config = load_config("./configs/config_pred_deriv/config_ic1/config_biochemical.yml")

BIO = get_test_set(
    dynamics=bio_config['name'],
    device='cuda',
    input_range=bio_config['input_range'],
    **bio_config['integration_kwargs']
)

g_symb = lambda x: (-0.5*x[:, 1] * x[:, 0]).unsqueeze(-1)
h_symb = lambda x: (1.0 - 0.5 * x[:, 0]).unsqueeze(-1)  + x[:, 1].unsqueeze(-1)

test_losses = get_symb_test_error(
    g_symb=g_symb,
    h_symb=h_symb,
    test_set=BIO,
    message_passing=True,
    include_time=False,
    is_symb=False
)

ts_mean = np.mean(test_losses)
ts_var = np.var(test_losses)
ts_std = np.std(test_losses)

print(f"Mean Test loss of symbolic formula: {ts_mean}")
print(f"Var Test loss of symbolic formula: {ts_var}")
print(f"Std Test loss of symbolic formula: {ts_std}")


Mean Test loss of symbolic formula: 1.3504788815528931e-05
Var Test loss of symbolic formula: 1.314533673970284e-13
Std Test loss of symbolic formula: 3.625649836884809e-07
Mean Test loss of symbolic formula: 1.071706852447581e-06
Var Test loss of symbolic formula: 8.008373820613663e-14
Std Test loss of symbolic formula: 2.829907033917133e-07
Mean Test loss of symbolic formula: 3.735399635237021e-06
Var Test loss of symbolic formula: 4.746857081617248e-13
Std Test loss of symbolic formula: 6.889743886108719e-07
Mean Test loss of symbolic formula: 1.2006473374034006e-06
Var Test loss of symbolic formula: 7.184599599254429e-14
Std Test loss of symbolic formula: 2.6804103415810103e-07


In [4]:
import sympy as sp
import json


def get_tss_test_error(
    text_sympy_mapping_g,
    text_sympy_mapping_h,
    row_means,
    test_set,
    result_dict,
    suffix = '',
    method = "dopri5"
):
    g_symb = sp.S(0)
    h_symb = sp.S(0)

    
    for symb_g in text_sympy_mapping_g.keys():
        g_symb += row_means[symb_g] * text_sympy_mapping_g[symb_g]
    for symb_h in text_sympy_mapping_h.keys():
        h_symb += row_means[symb_h] * text_sympy_mapping_h[symb_h]


    try:
        test_losses = get_symb_test_error(
            g_symb=g_symb,
            h_symb=h_symb,
            test_set=test_set,
            message_passing=False,
            include_time=False,
            method=method,
            atol=1e-5,
            rtol=1e-5,
            is_symb=True
        )

        ts_mean = np.mean(test_losses)
        ts_var = np.var(test_losses)
        ts_std = np.std(test_losses)

        print(f"Mean Test loss of symbolic formula: {ts_mean}")
        print(f"Var Test loss of symbolic formula: {ts_var}")
        print(f"Std Test loss of symbolic formula: {ts_std}")
        
        result_dict[f'tss_test_mae_{suffix}'] = ts_mean
        result_dict[f'tss_test_var_{suffix}'] = ts_var
        result_dict[f'tss_test_std_{suffix}'] = ts_std
        
    except AssertionError:
        print("Evaluation failed !")
        result_dict[f'error_{suffix}'] = 'Evaluation failed !'

### KUR

In [5]:
df = pd.read_csv("./saved_models_optuna/tss/Kuramoto-1/results_dim=0.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN               0.000000
sinx1jMinusx1i    0.499495
constant          2.000012
dtype: float64

In [6]:
results_kur = {}

x_i, x_j = sp.symbols('x_i x_j')

text_sympy_mapping_g = {
    "sinx1jMinusx1i": sp.sin(x_j - x_i)
}
text_sympy_mapping_h = {
    "constant": sp.S(1.0)
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=KUR,
    result_dict=results_kur
)

Mean Test loss of symbolic formula: 0.00029117241501808167
Var Test loss of symbolic formula: 4.4434753406930564e-10
Std Test loss of symbolic formula: 2.1079552511125697e-05


#### 70 DB

In [7]:
df = pd.read_csv("./saved_models_optuna/tss/Kuramoto-1/results_dim=0_70_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)
row_means

0
NaN               0.000000
sinx1jMinusx1i    0.496489
constant          2.000509
dtype: float64

In [8]:
x_i, x_j = sp.symbols('x_i x_j')

text_sympy_mapping_g = {
    "sinx1jMinusx1i": sp.sin(x_j - x_i)
}
text_sympy_mapping_h = {
    "constant": sp.S(1.0)
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=KUR,
    result_dict=results_kur,
    suffix='70db'
)

Mean Test loss of symbolic formula: 0.0021191818717246256
Var Test loss of symbolic formula: 2.269309859505237e-08
Std Test loss of symbolic formula: 0.00015064228687540682


#### 50 DB

In [9]:
df = pd.read_csv("./saved_models_optuna/tss/Kuramoto-1/results_dim=0_50_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)
row_means

0
NaN               0.000000
sinx1jMinusx1i    0.531575
fracx1            7.369008
dtype: float64

In [10]:
x_i, x_j = sp.symbols('x_i x_j')

text_sympy_mapping_g = {
    "sinx1jMinusx1i": sp.sin(x_j - x_i)
}
text_sympy_mapping_h = {
    "fracx1": 1/ x_i
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=KUR,
    result_dict=results_kur,
    suffix='50db'
)

Mean Test loss of symbolic formula: 0.558358391125997
Var Test loss of symbolic formula: 0.0024888551943536502
Std Test loss of symbolic formula: 0.049888427459217934


#### 20 DB

In [11]:
df = pd.read_csv("./saved_models_optuna/tss/Kuramoto-1/results_dim=0_20_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)
row_means

0
NaN                0.000000
fracx1jMinusx1i   -0.004194
x1ifracx1j         0.087818
x1iexpx1j          0.000292
fracx1ix1j        -0.000736
dtype: float64

In [12]:
x_i, x_j = sp.symbols('x_i x_j')

text_sympy_mapping_g = {
    "x1iexpx1j": x_i * sp.exp(x_j),
    "fracx1jMinusx1i": 1/(x_j - x_i),
    "fracx1ix1j": 1/(x_i * x_j),
    "x1ifracx1j": x_i / x_j
}

text_sympy_mapping_h = {}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=KUR,
    result_dict=results_kur,
    suffix='20db',
    method="rk4"
)

Mean Test loss of symbolic formula: nan
Var Test loss of symbolic formula: nan
Std Test loss of symbolic formula: nan


In [13]:
with open("./saved_models_optuna/tss/Kuramoto-1/post_process_res.json", 'w') as file:
        json.dump(results_kur, file, indent=4)

### EPID

In [14]:
x_i, x_j = sp.symbols('x_i x_j')
results_epid = {}
df = pd.read_csv("./saved_models_optuna/tss/Epidemics-1/results_dim=0.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN               0.000000
constant         -0.567966
expx1jMinusx1i    0.208438
dtype: float64

In [15]:
text_sympy_mapping_g = {
    "expx1jMinusx1i": sp.exp(x_j - x_i)
}

text_sympy_mapping_h = {
    "constant": sp.S(1.0)
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=EPID,
    result_dict=results_epid
)

Mean Test loss of symbolic formula: 0.1941202183564504
Var Test loss of symbolic formula: 0.0005310114619173017
Std Test loss of symbolic formula: 0.023043685944685623


#### 70 DB

In [16]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Epidemics-1/results_dim=0_70_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN               0.000000
x1x1x1           -1.256831
constant          0.569542
expx1jMinusx1i    0.013251
dtype: float64

In [17]:
text_sympy_mapping_g = {
    "expx1jMinusx1i": sp.exp(x_j - x_i)
}

text_sympy_mapping_h = {
    "x1x1x1": x_i * x_i * x_i,
    "constant": sp.S(1.0)
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=EPID,
    result_dict=results_epid,
    suffix='70db'
)

Mean Test loss of symbolic formula: 0.053738709539175034
Var Test loss of symbolic formula: 5.180841923608607e-05
Std Test loss of symbolic formula: 0.007197806557284384


#### 50 DB

In [18]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Epidemics-1/results_dim=0_50_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN               0.000000
x1x1x1           -1.091028
expx1jMinusx1i    0.111314
dtype: float64

In [19]:
text_sympy_mapping_g = {
    "expx1jMinusx1i": sp.exp(x_j - x_i)
}

text_sympy_mapping_h = {
    "x1x1x1": x_i * x_i * x_i
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=EPID,
    result_dict=results_epid,
    suffix='50db'
)

Mean Test loss of symbolic formula: 0.016693448647856712
Var Test loss of symbolic formula: 8.436841921384436e-06
Std Test loss of symbolic formula: 0.002904624230668132


#### 20 DB

In [20]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Epidemics-1/results_dim=0_20_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN                0.000000
x1ifracx1j        -0.019999
fracx1ix1j         0.014712
fracx1jMinusx1i    0.000244
dtype: float64

In [21]:
text_sympy_mapping_g = {
    "fracx1ix1j": 1/(x_i * x_j),
    "x1ifracx1j": x_i / x_j,
    "fracx1jMinusx1i": 1/(x_j - x_i)
}

text_sympy_mapping_h = {}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=EPID,
    result_dict=results_epid,
    suffix='20db',
    method="rk4"
)

Mean Test loss of symbolic formula: 0.08860065788030624
Var Test loss of symbolic formula: 4.077819097984363e-06
Std Test loss of symbolic formula: 0.002019361061817416


In [23]:
with open("./saved_models_optuna/tss/Epidemics-1/post_process_res.json", 'w') as file:
    json.dump(results_epid, file, indent=4)

### BIO

In [24]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Biochemical-1/results_dim=0.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN         0.000000
x1ix1j     -0.711350
constant    0.867073
dtype: float64

In [25]:
results_bio = {}

text_sympy_mapping_g = {
    "x1ix1j": x_i * x_j
}

text_sympy_mapping_h = {
    "constant": sp.S(1.0)
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=BIO,
    result_dict=results_bio
)

Mean Test loss of symbolic formula: 0.036044025172789894
Var Test loss of symbolic formula: 1.2206604668749429e-06
Std Test loss of symbolic formula: 0.0011048350405716424


#### 70 DB

In [26]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Biochemical-1/results_dim=0_70_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN         0.000000
x1ix1j     -0.740610
constant    0.905639
dtype: float64

In [27]:
text_sympy_mapping_g = {
    "x1ix1j": x_i * x_j
}

text_sympy_mapping_h = {
    "constant": sp.S(1.0)
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=BIO,
    result_dict=results_bio,
    suffix='70db'
)

Mean Test loss of symbolic formula: 0.03708266963561376
Var Test loss of symbolic formula: 2.1507763171681031e-07
Std Test loss of symbolic formula: 0.00046376462965259685


#### 50 DB

In [28]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Biochemical-1/results_dim=0_50_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN       0.000000
x1ix1j    1.077840
x1x1     -2.325285
dtype: float64

In [29]:
text_sympy_mapping_g = {
    "x1ix1j": x_i * x_j
}

text_sympy_mapping_h = {
    "x1x1": x_i * x_i
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=BIO,
    result_dict=results_bio,
    suffix='50db',
    method="euler"
)

Mean Test loss of symbolic formula: nan
Var Test loss of symbolic formula: nan
Std Test loss of symbolic formula: nan


#### 20 DB

In [30]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Biochemical-1/results_dim=0_20_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN                0.000000
x1ifracx1j         0.011954
fracx1ix1j         0.003264
fracx1jMinusx1i    0.000703
dtype: float64

In [31]:
text_sympy_mapping_g = {
    "fracx1ix1j": 1/(x_i * x_j),
    "x1ifracx1j": x_i / x_j,
    "fracx1jMinusx1i": 1/(x_j - x_i)
}

text_sympy_mapping_h = {}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=BIO,
    result_dict=results_bio,
    suffix='20db'
)

Evaluation failed !


In [32]:
with open("./saved_models_optuna/tss/Biochemical-1/post_process_res.json", 'w') as file:
    json.dump(results_bio, file, indent=4)

### POP

In [33]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Population-1/results_dim=0.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN         0.000000
constant   -0.016272
x1j         0.040017
sinx1j      0.003158
dtype: float64

In [34]:
results_pop = {}

text_sympy_mapping_g = {
    "sinx1j": sp.sin(x_j),
    "x1j": x_j
}

text_sympy_mapping_h = {
    "constant": sp.S(1.0)
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=POP,
    result_dict=results_pop
)

Mean Test loss of symbolic formula: 0.0990138625105222
Var Test loss of symbolic formula: 1.0323049151655323e-05
Std Test loss of symbolic formula: 0.0032129502255178687


#### 70 DB

In [35]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Population-1/results_dim=0_70_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN         0.000000
constant   -0.000443
x1j         0.023472
dtype: float64

In [36]:
text_sympy_mapping_g = {
    "x1j": x_j
}

text_sympy_mapping_h = {
    "constant": sp.S(1.0)
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=POP,
    result_dict=results_pop,
    suffix='70db'
)

Mean Test loss of symbolic formula: 0.09954124689102173
Var Test loss of symbolic formula: 5.92840036887458e-06
Std Test loss of symbolic formula: 0.0024348306653388815


#### 50 DB

In [37]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Population-1/results_dim=0_50_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN       0.000000
sinx1j   -1.508801
x1j       1.489724
dtype: float64

In [38]:
text_sympy_mapping_g = {
    "sinx1j": sp.sin(x_j),
    "x1j": x_j
}

text_sympy_mapping_h = {}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=POP,
    result_dict=results_pop,
    suffix='50db'
)

Mean Test loss of symbolic formula: 0.11308030039072037
Var Test loss of symbolic formula: 1.7624745905830963e-05
Std Test loss of symbolic formula: 0.00419818364365245


#### 20 DB

In [39]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Population-1/results_dim=0_20_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN      0.000000
x1x1     0.028332
sinx1   -0.482987
dtype: float64

In [40]:
text_sympy_mapping_g = {}

text_sympy_mapping_h = {
    "sinx1": sp.sin(x_i),
    "x1x1": x_i * x_i
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=POP,
    result_dict=results_pop,
    suffix='20db'
)

Mean Test loss of symbolic formula: 0.03507093029717604
Var Test loss of symbolic formula: 1.4470365959317085e-05
Std Test loss of symbolic formula: 0.0038039934226174847


In [41]:
with open("./saved_models_optuna/tss/Population-1/post_process_res.json", 'w') as file:
        json.dump(results_pop, file, indent=4)

## Re-fitting coefficients

In [42]:
import torch
import sympy as sp
from post_processing import make_callable, get_model, set_pytorch_seed
import numpy as np

set_pytorch_seed(0)

def build_symb_model_tss():
    x_i, x_j = sp.symbols('x_i x_j')    

    g_symb = (1 / (1 + sp.exp(- (x_j - x_i))))
    h_symb = x_i
    
    g_symb = make_callable(g_symb)
    h_symb = make_callable(h_symb)

    symb_model = get_model(
        g = g_symb,
        h = h_symb,
        message_passing=False,
        include_time=False,
        integration_method='rk4'
    )
    symb_model.predict_deriv = True
    return symb_model


def get_dxdt_pred(data, symb_model, device='cpu'):
    self_int = []
    pair_int = []
    for snapshot in data:
        snapshot = snapshot.to(device)
        _ = symb_model(snapshot)
        self_int.append(symb_model.conv.model.h_out)    # h_out shape = g_out shape = (N, 1)
        pair_int.append(symb_model.conv.model.g_out)
    
    self_int = torch.stack(self_int, dim=1)
    pair_int = torch.stack(pair_int, dim=1)
    
    return self_int.cpu().detach().numpy().flatten(), pair_int.cpu().detach().numpy().flatten()


def sum_over_dxdt(self_int, pair_int, n_nodes, T):
    lib_new = []

    for i in range(n_nodes):
        for t in range(T):
            start = i * T
            end = start + t + 1  # Python slice is exclusive at end
            val1 = np.sum(self_int[start:end])
            val2 = np.sum(pair_int[start:end])  # index 35 in MATLAB = 34 in Python
            lib_new.append([val1, val2])

    lib_new = np.array(lib_new)
    return lib_new



In [45]:
import os
coeffs_path = "./inferred_coeffs/tpsindy"
os.makedirs(coeffs_path, exist_ok=True)

### COVID

In [46]:
from datasets.RealEpidemics import RealEpidemics


data_real_epid_orig = RealEpidemics(
    root = './data_real_epid_covid_orig',
    name = 'RealEpid',
    predict_deriv=True,
    scale=False,
)

symb_model_tss = build_symb_model_tss()

self_int, pair_int = get_dxdt_pred(
    data=data_real_epid_orig,
    symb_model=symb_model_tss,
    device=data_real_epid_orig.device
)

lib = sum_over_dxdt(
    self_int=self_int,
    pair_int=pair_int,
    n_nodes=data_real_epid_orig[0].x.shape[0],
    T=len(data_real_epid_orig)
)

import pandas as pd
lib_df = pd.DataFrame(lib, columns=['x', 'sigxjminusxi'])
lib_df.to_csv(f'{coeffs_path}/lib_new_covid.csv', index=False)


### H1N1

In [47]:
data_real_epid_orig_h1n1 = RealEpidemics(
    root = './data_real_epid_h1n1_orig',
    name = 'RealEpid',
    predict_deriv=True,
    scale=False,
    infection_data="./data/RealEpidemics/infected_numbers_H1N1.csv",
    inf_threshold=100
)

symb_model_tss = build_symb_model_tss()

self_int, pair_int = get_dxdt_pred(
    data=data_real_epid_orig_h1n1,
    symb_model=symb_model_tss,
    device=data_real_epid_orig_h1n1.device
)

lib = sum_over_dxdt(
    self_int=self_int,
    pair_int=pair_int,
    n_nodes=data_real_epid_orig_h1n1[0].x.shape[0],
    T=len(data_real_epid_orig_h1n1)
)

lib_df = pd.DataFrame(lib, columns=['x', 'sigxjminusxi'])
lib_df.to_csv(f'{coeffs_path}/lib_new_h1n1.csv', index=False)

### SARS

In [48]:
from datasets.RealEpidemics import RealEpidemics
import pandas as pd

data_real_epid_orig_sars = RealEpidemics(
    root = './data_real_epid_sars_orig',
    name = 'RealEpid',
    predict_deriv=True,
    scale=False,
    infection_data="./data/RealEpidemics/infected_numbers_sars.csv",
    inf_threshold=100
)

symb_model_tss = build_symb_model_tss()

self_int, pair_int = get_dxdt_pred(
    data=data_real_epid_orig_sars,
    symb_model=symb_model_tss,
    device=data_real_epid_orig_sars.device
)

lib = sum_over_dxdt(
    self_int=self_int,
    pair_int=pair_int,
    n_nodes=data_real_epid_orig_sars[0].x.shape[0],
    T=len(data_real_epid_orig_sars)
)

lib_df = pd.DataFrame(lib, columns=['x', 'sigxjminusxi'])
lib_df.to_csv(f'{coeffs_path}/lib_new_sars.csv', index=False)

### Fair fitting of coefficients

In [None]:
from sklearn.linear_model import LinearRegression 

righ_sides = [
    f"{coeffs_path}/lib_new_covid.csv",
    f"{coeffs_path}/lib_new_h1n1.csv",
    f"{coeffs_path}/lib_new_sars.csv"
]

lef_sides = [
    "./inferred_coeffs/tpsindy/left_side_components_covid.csv",
    "./inferred_coeffs/tpsindy/left_side_components_H1N1.csv",
    "./inferred_coeffs/tpsindy/left_side_components_Sars.csv"
]

n_nodes = [82, 21, 4]
names = ['covid', 'h1n1', 'sars']


for j, (rs, ls) in enumerate(zip(righ_sides, lef_sides)):
    X_all = pd.read_csv(rs)
    y_all = pd.read_csv(ls)
    N = n_nodes[j]
    X_mat = X_all.values
    y_mat = y_all.values
    num = len(X_mat[0])
    num2 = len(y_mat[0])
    L = int(len(X_mat)/N)
    times = N
    Coef = np.zeros(shape=(2,times))
    for i in range(0,times):
        X = X_all.iloc[i*L:(i+1)*L,:]
        y = y_all.iloc[i*L:(i+1)*L,:]
        
        cutoff = int(0.9 * len(X))
        X = X.iloc[:cutoff, :]
        y = y.iloc[:cutoff, :]
        
        v1 = X['x']
        v2 = X['sigxjminusxi']
        y1 = y['X']
        Xin = pd.concat([v1,v2],axis=1)
        model = LinearRegression(fit_intercept=False)
        model.fit(Xin,y1)
        a = model.coef_
        a = (pd.DataFrame(a)).values
        Coef[0,i] = a[0]
        Coef[1,i] = a[1]
        
    Coef = pd.DataFrame(Coef)
    # print(Coef)
    Coef.to_csv(f"{coeffs_path}/inf_coeffs_test_{names[j]}.csv", index=0)

