## Dynamics

Dynamics | $\partial_{\tau}x_i=$ |
| :--------: | :-------: |
Biochemical | $F -B x_i - R \sum_j A_{ij} x_i x_j$ |
Epidemics | $-B x_i + R \sum_j A_{ij} (1-x_i)x_j$ |
Population | $-B x_i^{b} + R \sum_j A_{ij} x_j^a$ |
Synchronization | $\omega_i + R \sum_j A_{ij} \sin(x_j-x_i)$ |

In [1]:
%load_ext autoreload
%autoreload 2

## Importing

In [2]:
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

In [3]:
from utils.utils import *
import optuna
from optuna.storages import JournalStorage
from optuna.storages.journal import JournalFileBackend
from experiments.experiments_gkan import ExperimentsGKAN
from experiments.experiments_mpnn import ExperimentsMPNN
from train_and_eval import eval_model
import sympytorch

storage = JournalStorage(JournalFileBackend("optuna_journal_storage.log"))

Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


In [4]:
import random

def set_pytorch_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
    torch.use_deterministic_algorithms(True)
set_pytorch_seed(0)

In [5]:
# config = load_config("./configs/config_pred_deriv/config_real_epid_mpnn.yml")
# config['patience'] = 450
# exp = ExperimentsMPNN(
#     config=config,
#     n_trials=3,
#     study_name="test_mult_2"
# )

In [6]:
# exp.run()

In [7]:
# config = load_config("./configs/config_pred_deriv/config_ic1/config_population.yml")
# # config['t_span'] = [0, 1]
# exp = ExperimentsGKAN(
#     config=config,
#     n_trials=1,
#     study_name="test_mult_10",
#     snr_db=20
# )

In [8]:
# data = exp.training_set.raw_data_sampled[0].detach().cpu().numpy()
# plt.plot(data[:, 30, :])

In [9]:
# exp.training_set[0].y

In [10]:
# exp.epochs = 10
# exp.run()

In [11]:
# config_pop = load_config("./configs/config_pred_deriv/config_ic1/config_population_mpnn.yml")
# # config_pop["t_eval_steps"] = 1000
# # config_pop["t_span"] = [0, 10]

# exp = ExperimentsMPNN(
#     config=config_pop,
#     n_trials=1,
#     study_name='test_mult_3'
# )

In [12]:
# data = exp.training_set.raw_data_sampled[0].detach().cpu().numpy()
# plt.plot(data[:, 6, :])

In [13]:
# exp.training_set.raw_data_sampled.min()

In [14]:
# exp.epochs = 10
# exp.run()

## Utils

In [15]:
from models.utils.MPNN import MPNN
from models.baseline.MPNN_ODE import MPNN_ODE
from train_and_eval import eval_model
from datasets.SyntheticData import SyntheticData
from sympy import symbols, sin, summation, simplify
import networkx as nx
from torch_geometric.utils import from_networkx
from utils.utils import integrate
from torch_geometric.data import Data
from models.kan.KAN import KAN
from models.GKAN_ODE import GKAN_ODE

import optuna

import warnings
warnings.filterwarnings("ignore")

In [16]:
from sympy import latex
from torch.utils.data import DataLoader

def get_model(g, h, message_passing=True, include_time=False, atol=1e-5, rtol=1e-5, integration_method = 'scipy_solver'):
    conv = MPNN(
        g_net = g,
        h_net = h, 
        message_passing=message_passing,
        include_time=include_time
    )
    
    symb = MPNN_ODE(
        conv=conv,
        model_path="./saved_models_optuna/tmp_symb",
        adjoint=True,
        integration_method=integration_method,
        atol=atol,
        rtol=rtol
    )
    
    symb = symb.eval()
    return symb


def get_symb_test_error(g_symb, h_symb, test_set, message_passing=False, include_time=False, atol=1e-5, rtol=1e-5, scaler = None, inverse_scale=False, method='scipy_solver'):
    
    # total_len = len(dataset)
    # test_set = dataset[int(0.9*total_len):]
    test_losses = []
    
    for ts in test_set:
        symb = get_model(
            g=g_symb,
            h=h_symb,
            message_passing=message_passing,
            include_time=include_time,
            atol=atol,
            rtol=rtol,
            integration_method=method
        )
        
        collate_fn = lambda samples_list: samples_list
        test_loader = DataLoader(ts, batch_size=len(ts), shuffle=True, collate_fn=collate_fn)
        
        test_loss = eval_model(
            model=symb,
            valid_loader=test_loader,
            criterion=torch.nn.L1Loss(),
            scaler=scaler,
            inverse_scale=inverse_scale,
            pred_deriv=False
        )
        
        test_losses.append(test_loss)
    
    return test_losses



def get_test_set(dynamics, device='cuda', input_range=(0, 1), t_span = (0, 1), **integration_kwargs):
    seeds = [12345, 67890, 111213]
    
    graphs = [
        nx.barabasi_albert_graph(70, 3, seed=seeds[0]),      
        nx.watts_strogatz_graph(50, 6, 0.3, seed=seeds[1]),  
        nx.erdos_renyi_graph(100, 0.05, seed=seeds[2])        
    ]
    
    test_set = []
    for i, graph in enumerate(graphs):
        snapshots = integrate_test_set(
            graph=graph,
            dynamics=dynamics,
            seed=seeds[i],
            device=device,
            input_range=input_range,
            t_span=t_span,
            **integration_kwargs
        )
        test_set.append(snapshots)
    
    return test_set
    


def integrate_test_set(graph, dynamics, seed=12345, device='cuda', input_range = (0, 1), t_span = (0, 1), **integration_kwargs):
    # graph = nx.barabasi_albert_graph(100, 3, seed=seed)
    edge_index = from_networkx(graph).edge_index
    edge_index = edge_index.to(torch.device(device))
    rng = np.random.default_rng(seed=seed)
    
    data, t = integrate(
        input_range=input_range,
        t_span = t_span,
        t_eval_steps=300,
        dynamics=dynamics,
        device=device,
        graph=graph,
        rng = rng,
        **integration_kwargs
    )
    
    snapshot = Data(
        x = data[0].unsqueeze(0),
        y = data[1:],
        edge_index=edge_index,
        edge_attr=None,
        t_span = t
    )
    
    return [snapshot]


def build_model_from_file(model_path, message_passing, include_time, method='dopri5', adjoint=False, atol=1e-5, rtol=1e-5):
    best_params_file = f"{model_path}/best_params.json"
    best_state_path = f"{model_path}/gkan/state_dict.pth"
    
    with open(best_params_file, 'r') as f:
        best_hyperparams = json.load(f)
    
    # g_net
    g_net = KAN(
        layers_hidden=[2, best_hyperparams['hidden_dim_g_net'], 1],
        grid_size=best_hyperparams['grid_size_g_net'],
        spline_order=best_hyperparams['spline_order_g_net'],
        grid_range=[-best_hyperparams['range_limit_g_net'], best_hyperparams['range_limit_g_net']],
        mu_1=best_hyperparams['mu_1_g_net'],
        mu_2=best_hyperparams['mu_2_g_net'],
        device='cuda',
        compute_mult=True,
        store_act=True
    )
    
    time_dim = 1 if include_time else 0
    in_dim_h = 2 if message_passing else 1
    in_dim_h += time_dim
    
    # h_net
    h_net = KAN(
        layers_hidden=[in_dim_h, best_hyperparams['hidden_dim_h_net'], 1],
        grid_size=best_hyperparams['grid_size_h_net'],
        spline_order=best_hyperparams['spline_order_h_net'],
        grid_range=[-best_hyperparams['range_limit_h_net'], best_hyperparams['range_limit_h_net']],
        mu_1=best_hyperparams['mu_1_h_net'],
        mu_2=best_hyperparams['mu_2_h_net'],
        device='cuda',
        compute_mult=True,
        store_act=True
    )
    
    gkan = MPNN(
        h_net=h_net,
        g_net=g_net,
        message_passing=message_passing,
        include_time=include_time
    )
    
    model = GKAN_ODE(
        conv=gkan,
        model_path='./saved_models_optuna/tmp',
        lmbd_g=best_hyperparams['lamb_g_net'],
        lmbd_h=best_hyperparams['lamb_h_net'],
        integration_method=method,
        adjoint=adjoint,
        atol=atol,
        rtol=rtol
    )
    
    model = model.to(torch.device('cuda'))
    model.load_state_dict(torch.load(best_state_path, weights_only=False, map_location=torch.device('cuda')))
    
    return model


def post_process_gkan(
    g_symb, 
    h_symb, 
    model_path, 
    test_set, 
    message_passing=False, 
    include_time=False,
    atol=1e-5,
    rtol=1e-5,
    method='dopri5',
    scaler=None,
    inverse_scale=False,
    adjoint=True
):
    test_losses_symb = get_symb_test_error(
        g_symb=g_symb,
        h_symb=h_symb,
        test_set=test_set,
        message_passing=message_passing,
        include_time=include_time,
        atol=atol,
        rtol=rtol,
        method=method,
        scaler=scaler,
        inverse_scale=inverse_scale
    )

    ts_mean = np.mean(test_losses_symb)
    ts_var = np.var(test_losses_symb)
    ts_std = np.std(test_losses_symb)
    
    print(f"Mean Test loss of symbolic formula: {ts_mean}")
    print(f"Var Test loss of symbolic formula: {ts_var}")
    print(f"Std Test loss of symbolic formula: {ts_std}")
    
    # Loading best model
    best_model = build_model_from_file(
        model_path=model_path,
        message_passing=message_passing,
        include_time=include_time,
        method=method,
        adjoint=adjoint,
        atol=atol,
        rtol=rtol
    )
    
    print()
    
    tot_params = sum(p.numel() for p in best_model.parameters() if p.requires_grad)
    print(f"Number of model's parameters: {tot_params}")
    
    best_model = best_model.eval()
    test_losses_model = get_symb_test_error(
        g_symb=best_model.conv.model.g_net,
        h_symb=best_model.conv.model.h_net,
        test_set=test_set,
        message_passing=message_passing,
        include_time=include_time,
        atol=atol,
        rtol=rtol,
        method=method,
        scaler=scaler,
        inverse_scale=inverse_scale
    )
    
    ts_mean = np.mean(test_losses_model)
    ts_var = np.var(test_losses_model)
    ts_std = np.std(test_losses_model)
    
    print()
    
    print(f"Mean Test loss of best model: {ts_mean}")
    print(f"Var Test loss of best model: {ts_var}")
    print(f"Std Test loss of best model: {ts_std}")

    # y_true = test_set[0].y.detach().cpu().numpy()
    # y_pred = best_model(snapshot = test_set[0]).detach().cpu().numpy()
    
    # return y_true, y_pred


def make_callable(expr):
    free_syms = expr.free_symbols
    if not free_syms:
        # Expression is constant
        const_value = float(expr)
        return lambda x: torch.full((x.shape[0], 1), const_value, dtype=x.dtype, device=x.device)

    sym_module = sympytorch.SymPyModule(expressions=[expr])
    syms = {str(s) for s in free_syms}
    if {'x_i', 'x_j'} <= syms:
        return lambda x: sym_module(x_i=x[:, 0], x_j=x[:, 1])
    elif 'x_i' in syms:
        return lambda x: sym_module(x_i=x[:, 0])
    elif 'x_j' in syms:
        return lambda x: sym_module(x_j=x[:, 1])
    else:
        raise ValueError(f"Unexpected symbols in expression: {free_syms}")


def get_error_spline_wise(model_path, test_set, theta=0.1, atol=1e-5, rtol=1e-5, sample_size=10000, seed=42,
                          n_h_hidden_layers = 2, n_g_hidden_layers=2, sort_by='score'):
    
    symb_spline_wise, symb_g, symb_h = fit_model(
        n_h_hidden_layers=n_h_hidden_layers,
        n_g_hidden_layers=n_g_hidden_layers,
        model_path=model_path,
        theta=theta,
        message_passing=False,
        include_time=False,
        sample_size=sample_size,
        seed=seed,
        sort_by=sort_by
    )
    print(symb_spline_wise)
    # print(latex(quantise(symb_spline_wise)))
        
    if isinstance(symb_g, int):
        symb_g = sp.sympify(symb_g)
        
    if isinstance(symb_h, int):
        symb_h = sp.sympify(symb_h)
    
    g_symb = make_callable(symb_g)
    h_symb = make_callable(symb_h)
    
    test_losses_splinewise = get_symb_test_error(
        g_symb=g_symb,
        h_symb=h_symb,
        test_set=test_set,
        message_passing=False,
        include_time=False,
        method="dopri5",
        atol=atol,
        rtol=rtol
    )
    
    # symb_model = get_model(
    #     g=g_symb,
    #     h=h_symb,
    #     message_passing=False,
    #     include_time=False,
    #     atol=atol,
    #     rtol=rtol,
    #     integration_method="dopri5"
    # )
    
    ts_mean = np.mean(test_losses_splinewise)
    ts_var = np.var(test_losses_splinewise)
    ts_std = np.std(test_losses_splinewise)
    
    print(f"Mean Test loss of symbolic formula: {ts_mean}")
    print(f"Var Test loss of symbolic formula: {ts_var}")
    print(f"Std Test loss of symbolic formula: {ts_std}")
    
    return symb_spline_wise


def plot_predictions(y_true, y_pred, node_index = 0):
    plt.figure(figsize=(16, 8))
    plt.plot(y_true[:, node_index, :], label='y_true', marker='o')
    plt.plot(y_pred[:, node_index, :], label='y_pred', marker='o')
    plt.xlabel('Time step')
    plt.ylabel('Value')
    plt.title(f'y_true vs y_pred for Node {node_index}')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


## LB losses


### Kuramoto

In [17]:
kur_config = load_config("./configs/config_pred_deriv/config_ic1/config_kuramoto.yml")

KUR = get_test_set(
    dynamics=kur_config['name'],
    device='cuda',
    input_range=kur_config['input_range'],
    **kur_config['integration_kwargs']    
)

g_symb = lambda x: torch.sin(x[:, 1] - x[:, 0]).unsqueeze(-1)
h_symb = lambda x: 2.0 + 0.5 * x[:, 1].unsqueeze(-1)

test_losses = get_symb_test_error(
    g_symb=g_symb,
    h_symb=h_symb,
    test_set=KUR,
    message_passing=True,
    include_time=False
)

ts_mean = np.mean(test_losses)
ts_var = np.var(test_losses)
ts_std = np.std(test_losses)

print(f"Mean Test loss of symbolic formula: {ts_mean}")
print(f"Var Test loss of symbolic formula: {ts_var}")
print(f"Std Test loss of symbolic formula: {ts_std}")



Mean Test loss of symbolic formula: 1.352664670169664e-05
Var Test loss of symbolic formula: 1.3517513389612395e-13
Std Test loss of symbolic formula: 3.676617112185112e-07


### Epidemics

In [18]:
epid_config = load_config("./configs/config_pred_deriv/config_ic1/config_epidemics.yml")

EPID = get_test_set(
    dynamics=epid_config['name'],
    device='cuda',
    input_range=epid_config['input_range'],
    **epid_config['integration_kwargs']    
)

g_symb = lambda x: 0.5*x[:, 1].unsqueeze(-1) * (1 - x[:, 0].unsqueeze(-1))
h_symb = lambda x: x[:, 1].unsqueeze(1) - 0.5 * x[:, 0].unsqueeze(-1)

test_losses = get_symb_test_error(
    g_symb=g_symb,
    h_symb=h_symb,
    test_set=EPID,
    message_passing=True,
    include_time=False
)


ts_mean = np.mean(test_losses)
ts_var = np.var(test_losses)
ts_std = np.std(test_losses)

print(f"Mean Test loss of symbolic formula: {ts_mean}")
print(f"Var Test loss of symbolic formula: {ts_var}")
print(f"Std Test loss of symbolic formula: {ts_std}")


Mean Test loss of symbolic formula: 1.0729370387707604e-06
Var Test loss of symbolic formula: 8.017524161241833e-14
Std Test loss of symbolic formula: 2.831523293430911e-07


### Population

In [19]:
pop_config = load_config("./configs/config_pred_deriv/config_ic1/config_population.yml")

POP = get_test_set(
    dynamics=pop_config['name'],
    device='cuda',
    input_range=pop_config['input_range'],
    **pop_config['integration_kwargs']    
)

g_symb = lambda x: 0.2*torch.pow(x[:, 1].unsqueeze(-1), 3)
h_symb = lambda x: -0.5 * x[:, 0].unsqueeze(-1) + x[:, 1].unsqueeze(1) 

test_losses = get_symb_test_error(
    g_symb=g_symb,
    h_symb=h_symb,
    test_set=POP,
    message_passing=True,
    include_time=False
)

ts_mean = np.mean(test_losses)
ts_var = np.var(test_losses)
ts_std = np.std(test_losses)

print(f"Mean Test loss of symbolic formula: {ts_mean}")
print(f"Var Test loss of symbolic formula: {ts_var}")
print(f"Std Test loss of symbolic formula: {ts_std}")


Mean Test loss of symbolic formula: 3.740968168131076e-06
Var Test loss of symbolic formula: 4.763240197448409e-13
Std Test loss of symbolic formula: 6.901623140572375e-07


### Biochemical

In [20]:
bio_config = load_config("./configs/config_pred_deriv/config_ic1/config_biochemical.yml")

BIO = get_test_set(
    dynamics=bio_config['name'],
    device='cuda',
    input_range=bio_config['input_range'],
    **bio_config['integration_kwargs']    
)

g_symb = lambda x: (-0.5*x[:, 1] * x[:, 0]).unsqueeze(-1)
h_symb = lambda x: (1.0 - 0.5 * x[:, 0]).unsqueeze(-1)  + x[:, 1].unsqueeze(-1) 

test_losses = get_symb_test_error(
    g_symb=g_symb,
    h_symb=h_symb,
    test_set=BIO,
    message_passing=True,
    include_time=False
)

ts_mean = np.mean(test_losses)
ts_var = np.var(test_losses)
ts_std = np.std(test_losses)

print(f"Mean Test loss of symbolic formula: {ts_mean}")
print(f"Var Test loss of symbolic formula: {ts_var}")
print(f"Std Test loss of symbolic formula: {ts_std}")


Mean Test loss of symbolic formula: 1.2020226639227378e-06
Var Test loss of symbolic formula: 7.199001650861301e-14
Std Test loss of symbolic formula: 2.683095535172257e-07


## Symb Reg

### Biochemical


#### IC=1

In [21]:
model_path_gkan = "./saved_models_optuna/model-biochemical-gkan/biochemical_gkan_ic1_s5_pd_mult_12/0/gkan"

pysr_model = lambda : get_pysr_model(
    model_selection="score",
    n_iterations=200
)

gkan_symb, symb_g, symb_h = fit_black_box_from_kan(
    n_g_hidden_layers=2,
    n_h_hidden_layers=2,
    device='cuda',
    model_path=model_path_gkan,
    pysr_model=pysr_model,
    sample_size=10000,
    theta=-np.inf,
    message_passing=False
)

Fitting G_Net...
Execution time: 40.632179 seconds
Fitting H_Net...
Execution time: 26.607414 seconds


In [22]:
gkan_symb

\sum_{j}( -0.49989304*x_i*x_j) - 0.5006705*x_i + 1.0002961

In [23]:
g_symb = make_callable(symb_g)
h_symb = make_callable(symb_h)

post_process_gkan(
    g_symb=g_symb,
    h_symb=h_symb,
    model_path='./saved_models_optuna/model-biochemical-gkan/biochemical_gkan_ic1_s5_pd_mult_12/0',
    test_set=BIO,
    message_passing=False,
    include_time=False,
    method='dopri5'
)

Mean Test loss of symbolic formula: 3.353227778764752e-05
Var Test loss of symbolic formula: 3.6287748546308483e-12
Std Test loss of symbolic formula: 1.9049343439160438e-06

Number of model's parameters: 280

Mean Test loss of best model: 3.5303641804299936e-05
Var Test loss of best model: 1.9453725896936992e-11
Std Test loss of best model: 4.41063781067285e-06


In [24]:
model_path_gkan = "./saved_models_optuna/model-biochemical-gkan/biochemical_gkan_ic1_s5_pd_mult_12/0/gkan"

splie_wise = get_error_spline_wise(
    model_path=model_path_gkan,
    test_set=BIO,
    theta=0.1,
    n_h_hidden_layers=2,
    n_g_hidden_layers=2
)

Fitting G_Net...
Execution time: 27.370929 seconds

Fitting H_Net...
Execution time: 27.196197 seconds
\sum_{j}( -0.489696795*x_i*x_j + 0.00396463899999999*x_i + 0.009527175*x_j + 0.001861365) - 0.499829*x_i + 1.00231669819695
Mean Test loss of symbolic formula: 0.01312450443704923
Var Test loss of symbolic formula: 1.2424257713960202e-06
Std Test loss of symbolic formula: 0.0011146415439036983


In [25]:
quantise(splie_wise)

\sum_{j}(-0.49*x_i*x_j + 0.01*x_j) - 0.5*x_i + 1.0

In [26]:
model_path_gkan = "./saved_models_optuna/model-biochemical-gkan/biochemical_gkan_ic1_s5_pd_mult_16/0/gkan"

splie_wise = get_error_spline_wise(
    model_path=model_path_gkan,
    test_set=BIO,
    theta=0.1,
    n_h_hidden_layers=2,
    n_g_hidden_layers=2,
    sample_size=30000,
)

Fitting G_Net...


Execution time: 59.955500 seconds

Fitting H_Net...
Execution time: 55.889033 seconds
\sum_{j}( -0.516187296*x_i*x_j + 0.00621972*x_i + 0.00802812*x_j - 0.0036254) + 0.136112788*x_i**2 - 0.648508088*x_i + 1.034861972
Mean Test loss of symbolic formula: 0.0016010809534539778
Var Test loss of symbolic formula: 4.4943979235184e-10
Std Test loss of symbolic formula: 2.119999510263717e-05


In [27]:
quantise(splie_wise)

\sum_{j}(-0.52*x_i*x_j + 0.01*x_i + 0.01*x_j) + 0.14*x_i**2 - 0.65*x_i + 1.03

#### SNR

In [28]:
model_paths_gkan = [
    "./saved_models_optuna/model-biochemical-gkan/biochemical_gkan_ic1_s5_pd_mult_noise_70db_2/0",
    "./saved_models_optuna/model-biochemical-gkan/biochemical_gkan_ic1_s5_pd_mult_noise_50db_2/0",
    "./saved_models_optuna/model-biochemical-gkan/biochemical_gkan_ic1_s5_pd_mult_noise_20db_2/0"
]

for model_path in model_paths_gkan:
    print(model_path)
    pysr_model = lambda : get_pysr_model(
        model_selection="score",
        n_iterations=200
    )

    gkan_symb, symb_g, symb_h = fit_black_box_from_kan(
        n_g_hidden_layers=2,
        n_h_hidden_layers=2,
        device='cuda',
        model_path=f"{model_path}/gkan",
        pysr_model=pysr_model,
        sample_size=10000,
        theta=-np.inf,
        message_passing=False
    )
    print()
    print(str(gkan_symb))
    
    g_symb = make_callable(symb_g)
    h_symb = make_callable(symb_h)

    post_process_gkan(
        g_symb=g_symb,
        h_symb=h_symb,
        model_path=model_path,
        test_set=BIO,
        message_passing=False,
        include_time=False,
        method='dopri5'
    )
    print()
    print("Spline-Wise:")
    
    spline_wise = get_error_spline_wise(
        model_path=f"{model_path}/gkan",
        test_set=BIO,
        theta=0.1,
        n_h_hidden_layers=2,
        n_g_hidden_layers=2
    )
    print(str(quantise(spline_wise)))


./saved_models_optuna/model-biochemical-gkan/biochemical_gkan_ic1_s5_pd_mult_noise_70db_2/0
Fitting G_Net...
Execution time: 30.353003 seconds
Fitting H_Net...
Execution time: 43.999195 seconds

\sum_{j}( -0.49755025*x_i*x_j) + log(2.6254972 - x_i)
Mean Test loss of symbolic formula: 0.0016831848382328947
Var Test loss of symbolic formula: 3.0808843855568743e-09
Std Test loss of symbolic formula: 5.550571489096302e-05

Number of model's parameters: 287

Mean Test loss of best model: 0.0009035245166160166
Var Test loss of best model: 1.1097949536221683e-08
Std Test loss of best model: 0.00010534680600863836

Spline-Wise:
Pruning node (0,0)
Fitting G_Net...
Execution time: 17.684135 seconds

Fitting H_Net...
Execution time: 24.590769 seconds
\sum_{j}( -0.36541666*x_i*x_j - 0.0502024*x_i - 0.05289956*x_j + 0.0205396) - 0.508158*x_i + 1.005116
Mean Test loss of symbolic formula: 0.0039607267050693435
Var Test loss of symbolic formula: 3.1886049199279606e-07
Std Test loss of symbolic formul

### Kuramoto

#### IC=1

In [29]:
model_path_gkan = "./saved_models_optuna/model-kuramoto-gkan/kuramoto_gkan_ic1_s5_pd_mult_12/0/gkan"

pysr_model = lambda : get_pysr_model(
    model_selection="score",
    n_iterations=200
)

gkan_symb, symb_g, symb_h = fit_black_box_from_kan(
    n_g_hidden_layers=2,
    n_h_hidden_layers=2,
    device='cuda',
    model_path=model_path_gkan,
    pysr_model=pysr_model,
    sample_size=10000,
    theta=-np.inf,
    message_passing=False
)

Fitting G_Net...
Execution time: 57.220600 seconds
Fitting H_Net...
Execution time: 60.101732 seconds


In [30]:
gkan_symb

\sum_{j}( -0.50057304*sin(x_i - x_j)) - 0.00083998445*tanh(x_i) + 2.0000844

In [31]:
g_symb = make_callable(symb_g)
h_symb = make_callable(symb_h)

post_process_gkan(
    g_symb=g_symb,
    h_symb=h_symb,
    model_path='./saved_models_optuna/model-kuramoto-gkan/kuramoto_gkan_ic1_s5_pd_mult_12/0',
    test_set=KUR,
    message_passing=False,
    include_time=False,
    method='dopri5'
)

Mean Test loss of symbolic formula: 0.00048534209296728176
Var Test loss of symbolic formula: 4.0890553067469863e-10
Std Test loss of symbolic formula: 2.0221412677523266e-05

Number of model's parameters: 543

Mean Test loss of best model: 0.0016188209410756826
Var Test loss of best model: 6.282351116937351e-07
Std Test loss of best model: 0.0007926128384613355


In [32]:
model_path_gkan = "./saved_models_optuna/model-kuramoto-gkan/kuramoto_gkan_ic1_s5_pd_mult_12/0/gkan"

splie_wise = get_error_spline_wise(
    model_path=model_path_gkan,
    test_set=KUR,
    theta=0.1,
    n_g_hidden_layers=2,
    n_h_hidden_layers=2
)

Pruning node (0,0)
Pruning node (0,1)
Pruning node (0,3)
Pruning node (0,4)
Fitting G_Net...


Execution time: 7.930643 seconds

Pruning node (0,0)
Pruning node (0,1)
Pruning node (0,3)
Pruning node (0,4)
Pruning node (0,5)
Fitting H_Net...
Execution time: 4.418379 seconds
\sum_{j}( -0.501*sin(-0.999518*x_i + 0.999518*x_j + 3.137487)) + 1.999
Mean Test loss of symbolic formula: 0.0023045686539262533
Var Test loss of symbolic formula: 2.083349899171215e-09
Std Test loss of symbolic formula: 4.5643727928064935e-05


In [33]:
quantise(splie_wise)

\sum_{j}(-0.5*sin(-1.0*x_i + 1.0*x_j + 3.14)) + 2.0

In [34]:
model_path_gkan = "./saved_models_optuna/model-kuramoto-gkan/kuramoto_gkan_ic1_s5_pd_mult_16/0/gkan"

splie_wise = get_error_spline_wise(
    model_path=model_path_gkan,
    test_set=KUR,
    theta=0.1,
    n_g_hidden_layers=3,
    n_h_hidden_layers=2
)

Fitting G_Net...


Execution time: 18.207711 seconds

Fitting H_Net...
Execution time: 3.080902 seconds
\sum_{j}( 0.00780404168*x_i - 0.00780404168*x_j - 8.88773558082877e-5*(-x_i + x_j + 0.697884360863639)**2 + 0.488*cos(3.625056*cos(0.321563*x_i - 0.321563*x_j + 0.671051) - 4.410536) - 0.03644631864) + 1.21082027122742
Mean Test loss of symbolic formula: 0.47987274328867596
Var Test loss of symbolic formula: 0.0007959131792107696
Std Test loss of symbolic formula: 0.028211933276731844


In [35]:
quantise(splie_wise)

\sum_{j}(0.01*x_i - 0.01*x_j + 0.49*cos(3.63*cos(0.32*x_i - 0.32*x_j + 0.67) - 4.41) - 0.04) + 1.21

#### SNR

In [36]:
model_paths_gkan = [
    "./saved_models_optuna/model-kuramoto-gkan/kuramoto_gkan_ic1_s5_pd_mult_noise_70db_2/0",
    "./saved_models_optuna/model-kuramoto-gkan/kuramoto_gkan_ic1_s5_pd_mult_noise_50db_2/0",
    "./saved_models_optuna/model-kuramoto-gkan/kuramoto_gkan_ic1_s5_pd_mult_noise_20db/0",
]

for model_path in model_paths_gkan:
    print(model_path)
    print()
    pysr_model = lambda : get_pysr_model(
        model_selection="score",
        n_iterations=200
    )

    gkan_symb, symb_g, symb_h = fit_black_box_from_kan(
        n_g_hidden_layers=2,
        n_h_hidden_layers=2,
        device='cuda',
        model_path=f"{model_path}/gkan",
        pysr_model=pysr_model,
        sample_size=10000,
        theta=-np.inf,
        message_passing=False
    )

    print(str(gkan_symb))
    
    g_symb = make_callable(symb_g)
    h_symb = make_callable(symb_h)

    post_process_gkan(
        g_symb=g_symb,
        h_symb=h_symb,
        model_path=model_path,
        test_set=KUR,
        message_passing=False,
        include_time=False,
        method='dopri5'
    )
    
    print()
    print("Spline-Wise")
    
    spline_wise = get_error_spline_wise(
        model_path=f"{model_path}/gkan",
        test_set=KUR,
        theta=0.01,
        n_h_hidden_layers=2,
        n_g_hidden_layers=2
    )
    print(str(quantise(spline_wise)))

./saved_models_optuna/model-kuramoto-gkan/kuramoto_gkan_ic1_s5_pd_mult_noise_70db_2/0

Fitting G_Net...
Execution time: 54.587956 seconds
Fitting H_Net...
Execution time: 61.616291 seconds
\sum_{j}( -0.48908207*sin(x_i - x_j)) - 3.870909e-5*x_i + 2.0120368
Mean Test loss of symbolic formula: 0.008413009655972322
Var Test loss of symbolic formula: 1.6430587054824652e-07
Std Test loss of symbolic formula: 0.0004053466054480369

Number of model's parameters: 138

Mean Test loss of best model: 0.03694612470765909
Var Test loss of best model: 7.532399191586253e-05
Std Test loss of best model: 0.00867893956171274

Spline-Wise
Pruning node (0,0)
Pruning node (0,2)
Fitting G_Net...
Execution time: 7.290454 seconds

Fitting H_Net...
Execution time: 6.758115 seconds
\sum_{j}( 0.001 - 0.487*sin(0.99957*x_i - 1.014156*x_j + 0.10247)) + 2.012
Mean Test loss of symbolic formula: 0.03401338681578636
Var Test loss of symbolic formula: 4.086510505378153e-05
Std Test loss of symbolic formula: 0.00639258

### Epidemics

#### IC=1

In [37]:
model_path_gkan = "./saved_models_optuna/model-epidemics-gkan/epidemics_gkan_ic1_s5_pd_mult_12/0/gkan"

pysr_model = lambda : get_pysr_model(
    model_selection="score",
    n_iterations=200
)

gkan_symb, symb_g, symb_h = fit_black_box_from_kan(
    n_g_hidden_layers=2,
    n_h_hidden_layers=2,
    device='cuda',
    model_path=model_path_gkan,
    pysr_model=pysr_model,
    sample_size=10000,
    theta=-np.inf,
    message_passing=False
)

Fitting G_Net...
Execution time: 46.134056 seconds
Fitting H_Net...
Execution time: 48.359699 seconds


In [38]:
gkan_symb

\sum_{j}( x_j*(0.50011538756532 - 0.5002436*x_i)) - 0.49971062*x_i

In [39]:
g_symb = make_callable(symb_g)
h_symb = make_callable(symb_h)

post_process_gkan(
    g_symb=g_symb,
    h_symb=h_symb,
    model_path='./saved_models_optuna/model-epidemics-gkan/epidemics_gkan_ic1_s5_pd_mult_12/0',
    test_set=EPID,
    message_passing=False,
    include_time=False,
    method='dopri5'
)

Mean Test loss of symbolic formula: 3.225190509207702e-05
Var Test loss of symbolic formula: 2.2093288603405234e-11
Std Test loss of symbolic formula: 4.700349838406204e-06

Number of model's parameters: 264

Mean Test loss of best model: 0.00019448873354122043
Var Test loss of best model: 8.969934973476717e-11
Std Test loss of best model: 9.470974064728884e-06


In [40]:
model_path_gkan = "./saved_models_optuna/model-epidemics-gkan/epidemics_gkan_ic1_s5_pd_mult_12/0/gkan"

splie_wise = get_error_spline_wise(
    model_path=model_path_gkan,
    test_set=EPID,
    theta=0.1,
    n_g_hidden_layers=2,
    n_h_hidden_layers=2
)

Pruning node (0,0)
Fitting G_Net...
Execution time: 24.911887 seconds

Pruning node (0,1)
Pruning node (0,2)
Fitting H_Net...
Execution time: 6.122642 seconds
\sum_{j}( -0.49653192*x_i*x_j - 0.0020606*x_i + 0.49726872*x_j + 0.00517860000000001) - 0.499118*x_i - 0.000664000000000001
Mean Test loss of symbolic formula: 0.006057584968705972
Var Test loss of symbolic formula: 5.065979323860325e-08
Std Test loss of symbolic formula: 0.0002250773050278576


In [41]:
quantise(splie_wise)

\sum_{j}(-0.5*x_i*x_j + 0.5*x_j + 0.01) - 0.5*x_i

In [42]:
model_path_gkan = "./saved_models_optuna/model-epidemics-gkan/epidemics_gkan_ic1_s5_pd_mult_16/0/gkan"

splie_wise = get_error_spline_wise(
    model_path=model_path_gkan,
    test_set=EPID,
    theta=0.05,
    n_g_hidden_layers=2,
    n_h_hidden_layers=2
)

Fitting G_Net...


Execution time: 52.134197 seconds

Pruning node (0,3)
Pruning node (0,5)
Fitting H_Net...
Execution time: 27.194627 seconds
\sum_{j}( -0.523148061*x_i*x_j + 0.014639249*x_i + 0.514403412*x_j - 0.00806726000000002) - 0.497218*x_i + 0.007992
Mean Test loss of symbolic formula: 0.004540392197668552
Var Test loss of symbolic formula: 3.1775670854677454e-09
Std Test loss of symbolic formula: 5.6369912945362485e-05


In [43]:
quantise(splie_wise)

\sum_{j}(-0.52*x_i*x_j + 0.01*x_i + 0.51*x_j - 0.01) - 0.5*x_i + 0.01

#### SNR

In [44]:
model_paths_gkan = [
    "./saved_models_optuna/model-epidemics-gkan/epidemics_gkan_ic1_s5_pd_mult_noise_70db_2/0",
    "./saved_models_optuna/model-epidemics-gkan/epidemics_gkan_ic1_s5_pd_mult_noise_50db_2/0",
    "./saved_models_optuna/model-epidemics-gkan/epidemics_gkan_ic1_s5_pd_mult_noise_20db_2/0",
]

for model_path in model_paths_gkan:
    print(model_path)
    print()
    pysr_model = lambda : get_pysr_model(
        model_selection="score",
        n_iterations=200
    )

    gkan_symb, symb_g, symb_h = fit_black_box_from_kan(
        n_g_hidden_layers=2,
        n_h_hidden_layers=2,
        device='cuda',
        model_path=f"{model_path}/gkan",
        pysr_model=pysr_model,
        sample_size=10000,
        theta=-np.inf,
        message_passing=False
    )

    print(str(gkan_symb))
    
    g_symb = make_callable(symb_g)
    h_symb = make_callable(symb_h)

    post_process_gkan(
        g_symb=g_symb,
        h_symb=h_symb,
        model_path=model_path,
        test_set=EPID,
        message_passing=False,
        include_time=False,
        method='dopri5'
    )
    
    print()
    print("Spline-Wise")
    
    spline_wise = get_error_spline_wise(
        model_path=f"{model_path}/gkan",
        test_set=EPID,
        theta=0.1,
        n_h_hidden_layers=2,
        n_g_hidden_layers=2
    )
    print(str(quantise(spline_wise)))


./saved_models_optuna/model-epidemics-gkan/epidemics_gkan_ic1_s5_pd_mult_noise_70db_2/0

Fitting G_Net...


Execution time: 46.664981 seconds
Fitting H_Net...
Execution time: 45.491405 seconds
\sum_{j}( x_j*(0.50804573 - 0.51468444*x_i)) - 0.63656974*log(x_i + 1)
Mean Test loss of symbolic formula: 0.0018808306582892935
Var Test loss of symbolic formula: 2.7373946103514762e-08
Std Test loss of symbolic formula: 0.00016545073618305469

Number of model's parameters: 450

Mean Test loss of best model: 0.0020580210645372667
Var Test loss of best model: 8.081580328749753e-08
Std Test loss of best model: 0.00028428120459766156

Spline-Wise
Pruning node (0,0)
Pruning node (0,1)
Fitting G_Net...
Execution time: 39.962709 seconds

Pruning node (0,1)
Pruning node (0,2)
Pruning node (0,3)
Pruning node (0,4)
Pruning node (0,5)
Fitting H_Net...
Execution time: 7.110334 seconds
\sum_{j}( -0.463819821*x_i*x_j + 0.000815866*x_i + 0.470682753*x_j - 0.004827938) - 0.39339*x_i - 0.059838
Mean Test loss of symbolic formula: 0.01323495184381803
Var Test loss of symbolic formula: 2.6385112039073534e-06
Std Test l

### Population

#### IC=1

In [45]:
model_path_gkan = "./saved_models_optuna/model-population-gkan/population_gkan_ic1_s5_pd_mult_12/0/gkan"

pysr_model = lambda : get_pysr_model(
    model_selection="score",
    n_iterations=200
)

gkan_symb, symb_g, symb_h = fit_black_box_from_kan(
    n_g_hidden_layers=2,
    n_h_hidden_layers=2,
    device='cuda',
    model_path=model_path_gkan,
    pysr_model=pysr_model,
    sample_size=10000,
    theta=-np.inf,
    message_passing=False
)

Fitting G_Net...
Execution time: 31.032831 seconds
Fitting H_Net...
Execution time: 32.204913 seconds


In [46]:
gkan_symb

\sum_{j}( 0.200043456054492*x_j**3) - 0.49992707*x_i

In [47]:
g_symb = make_callable(symb_g)
h_symb = make_callable(symb_h)

post_process_gkan(
    g_symb=g_symb,
    h_symb=h_symb,
    model_path='./saved_models_optuna/model-population-gkan/population_gkan_ic1_s5_pd_mult_12/0',
    test_set=POP,
    message_passing=False,
    include_time=False,
    method='dopri5'
)

Mean Test loss of symbolic formula: 1.7520228842234548e-05
Var Test loss of symbolic formula: 2.1879355615594253e-12
Std Test loss of symbolic formula: 1.4791671851279778e-06

Number of model's parameters: 294

Mean Test loss of best model: 0.00026240094545452547
Var Test loss of best model: 2.1595274187835755e-09
Std Test loss of best model: 4.6470715711979036e-05


In [48]:
model_path_gkan = "./saved_models_optuna/model-population-gkan/population_gkan_ic1_s5_pd_mult_12/0/gkan"

# Sort by score
spline_wise = get_error_spline_wise(
    model_path=model_path_gkan,
    test_set=POP,
    theta=0.1,
    n_g_hidden_layers=2,
    n_h_hidden_layers=2,
    sort_by='score'
)

Fitting G_Net...
Execution time: 8.784961 seconds

Pruning node (0,0)
Pruning node (0,2)
Pruning node (0,4)
Fitting H_Net...
Execution time: 11.016199 seconds
\sum_{j}( 0.07072*x_j - 0.001768) - 0.500338*x_i - 0.000384
Mean Test loss of symbolic formula: 0.018310097356637318
Var Test loss of symbolic formula: 4.561667204535596e-07
Std Test loss of symbolic formula: 0.0006754011552059706


In [49]:
quantise(spline_wise)

\sum_{j}(0.07*x_j) - 0.5*x_i

In [50]:
model_path_gkan = "./saved_models_optuna/model-population-gkan/population_gkan_ic1_s5_pd_mult_12/0/gkan"

# Sort by loss
spline_wise = get_error_spline_wise(
    model_path=model_path_gkan,
    test_set=POP,
    theta=0.1,
    n_g_hidden_layers=2,
    n_h_hidden_layers=2,
    sort_by='log_loss'
)

Fitting G_Net...


Execution time: 8.721778 seconds

Pruning node (0,0)
Pruning node (0,2)
Pruning node (0,4)
Fitting H_Net...
Execution time: 10.917016 seconds
\sum_{j}( 0.147628*x_j**3 + 0.00663*x_j**2 + 0.020332*x_j - 0.000442) - 0.28655*x_i - 0.1746*tanh(1.427*x_i - 0.078) - 0.0128
Mean Test loss of symbolic formula: 0.00506338756531477
Var Test loss of symbolic formula: 6.30518194262835e-07
Std Test loss of symbolic formula: 0.0007940517579244032


In [51]:
quantise(spline_wise)

\sum_{j}(0.15*x_j**3 + 0.01*x_j**2 + 0.02*x_j) - 0.29*x_i - 0.17*tanh(1.43*x_i - 0.08) - 0.01

In [52]:
model_path_gkan = "./saved_models_optuna/model-population-gkan/population_gkan_ic1_s5_pd_mult_16/0/gkan"

spline_wise = get_error_spline_wise(
    model_path=model_path_gkan,
    test_set=POP,
    theta=0.1,
    n_g_hidden_layers=3,
    n_h_hidden_layers=2,
    sort_by='score'
)

Pruning node (0,1)
Pruning node (0,2)
Pruning node (1,1)
Pruning node (1,2)
Fitting G_Net...
Execution time: 48.074125 seconds

Fitting H_Net...
Execution time: 16.373006 seconds
\sum_{j}( 0.19850712*x_j**3 - 0.0028686*x_j**2 + 0.00172116*x_j - 6.82999999999999e-5) - 0.230868*x_i - 0.55676 + 1.59961889102703*exp(-1.0527*exp(0.464*x_i))
Mean Test loss of symbolic formula: 0.0011149529018439353
Var Test loss of symbolic formula: 3.390983480122e-08
Std Test loss of symbolic formula: 0.0001841462321124709


In [53]:
quantise(spline_wise)

\sum_{j}(0.2*x_j**3) - 0.23*x_i - 0.56 + 1.6*exp(-1.05*exp(0.46*x_i))

#### SNR

In [54]:
model_paths_gkan = [
    "./saved_models_optuna/model-population-gkan/population_gkan_ic1_s5_pd_mult_noise_70db_2/0",
    "./saved_models_optuna/model-population-gkan/population_gkan_ic1_s5_pd_mult_noise_50db_2/0",
    # "./saved_models_optuna/model-population-gkan/population_gkan_ic1_s5_pd_mult_noise_20db_2/0",
]

for model_path in model_paths_gkan:
    print(model_path)
    print()
    pysr_model = lambda : get_pysr_model(
        model_selection="score",
        n_iterations=200
    )

    gkan_symb, symb_g, symb_h = fit_black_box_from_kan(
        n_g_hidden_layers=2,
        n_h_hidden_layers=2,
        device='cuda',
        model_path=f"{model_path}/gkan",
        pysr_model=pysr_model,
        sample_size=10000,
        theta=-np.inf,
        message_passing=False
    )

    print(str(gkan_symb))
    
    g_symb = make_callable(symb_g)
    h_symb = make_callable(symb_h)

    post_process_gkan(
        g_symb=g_symb,
        h_symb=h_symb,
        model_path=model_path,
        test_set=POP,
        message_passing=False,
        include_time=False,
        method='dopri5'
    )
    
    print()
    print("Spline-Wise")
    
    spline_wise = get_error_spline_wise(
        model_path=f"{model_path}/gkan",
        test_set=POP,
        theta=0.05,
        n_h_hidden_layers=2,
        n_g_hidden_layers=2
    )
    print(str(quantise(spline_wise)))

./saved_models_optuna/model-population-gkan/population_gkan_ic1_s5_pd_mult_noise_70db_2/0



Fitting G_Net...
Execution time: 31.744214 seconds
Fitting H_Net...
Execution time: 30.025341 seconds
\sum_{j}( 0.200004670689379*x_j**3) - 0.4995046*x_i
Mean Test loss of symbolic formula: 8.797331004946803e-05
Var Test loss of symbolic formula: 1.0629399142105468e-11
Std Test loss of symbolic formula: 3.260275930363175e-06

Number of model's parameters: 368

Mean Test loss of best model: 0.0007541064793864886
Var Test loss of best model: 6.963269243328493e-09
Std Test loss of best model: 8.344620568563015e-05

Spline-Wise
Pruning node (0,0)
Pruning node (0,5)
Fitting G_Net...
Execution time: 26.756082 seconds

Fitting H_Net...
Execution time: 4.718826 seconds
\sum_{j}( 0.01652*x_j + 0.03325*sin(0.654*x_i - 0.864) + 0.02336) - 0.499272*x_i + 0.000371999999999997
Mean Test loss of symbolic formula: 0.032248698795835175
Var Test loss of symbolic formula: 1.3531267305916095e-05
Std Test loss of symbolic formula: 0.003678487094705661
\sum_{j}(0.02*x_j + 0.03*sin(0.65*x_i - 0.86) + 0.02) -

### Real Epid

In [55]:
model_path_gkan = "./saved_models_optuna/model-real-epid-gkan/real_epid_gkan_4/0/gkan"

pysr_model = lambda : get_pysr_model(
    model_selection="score",
    n_iterations=200
)

gkan_symb, symb_g, symb_h = fit_black_box_from_kan(
    n_g_hidden_layers=2,
    n_h_hidden_layers=2,
    device='cuda',
    model_path=model_path_gkan,
    pysr_model=pysr_model,
    sample_size=10000,
    theta=-np.inf,
    message_passing=False
)

Fitting G_Net...
Execution time: 32.812989 seconds
Fitting H_Net...
Execution time: 17.956217 seconds


In [56]:
gkan_symb

\sum_{j}( log(12.5043577686824*x_i + 13.5111551375654)) + 2.4884284*sin(x_i) + 2.112076

In [57]:
symb_spline_wise, symb_g, symb_h = fit_model(
    n_h_hidden_layers=2,
    n_g_hidden_layers=2,
    model_path=model_path_gkan,
    theta=0.1,
    message_passing=False,
    include_time=False,
    sample_size=10000
)

Fitting G_Net...


Execution time: 10.827934 seconds

Fitting H_Net...
Execution time: 4.955698 seconds


## TSS Errors

In [73]:
def get_tss_test_error(
    text_sympy_mapping_g,
    text_sympy_mapping_h,
    row_means,
    test_set
):
    g_symb = sp.S(0)
    h_symb = sp.S(0)
            
    for symb_g in text_sympy_mapping_g.keys():
        g_symb += row_means[symb_g] * text_sympy_mapping_g[symb_g]
    for symb_h in text_sympy_mapping_h.keys():
        h_symb += row_means[symb_h] * text_sympy_mapping_h[symb_h]
    
    g_symb = make_callable(g_symb)
    h_symb = make_callable(h_symb)
    
    test_losses = get_symb_test_error(
        g_symb=g_symb,
        h_symb=h_symb,
        test_set=test_set,
        message_passing=False,
        include_time=False,
        method='dopri5',
        atol=1e-5,
        rtol=1e-5
    )
    
    ts_mean = np.mean(test_losses)
    ts_var = np.var(test_losses)
    ts_std = np.std(test_losses)

    print(f"Mean Test loss of symbolic formula: {ts_mean}")
    print(f"Var Test loss of symbolic formula: {ts_var}")
    print(f"Std Test loss of symbolic formula: {ts_std}")

### KUR

In [59]:
df = pd.read_csv("./saved_models_optuna/tss/Kuramoto-1/results_dim=0.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

In [60]:
x_i, x_j = sp.symbols('x_i x_j')

text_sympy_mapping_g = {
    "sinx1jMinusx1i": sp.sin(x_j - x_i)
}
text_sympy_mapping_h = {
    "constant": sp.S(1.0)
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=KUR
)


Mean Test loss of symbolic formula: 0.0002912786342979719
Var Test loss of symbolic formula: 4.206261141125669e-10
Std Test loss of symbolic formula: 2.0509171463337248e-05


#### 70 DB

In [61]:
df = pd.read_csv("./saved_models_optuna/tss/Kuramoto-1/results_dim=0_70_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)
row_means

0
NaN               0.000000
sinx1jMinusx1i    0.500084
constant          1.999299
dtype: float64

In [62]:
x_i, x_j = sp.symbols('x_i x_j')

text_sympy_mapping_g = {
    "sinx1jMinusx1i": sp.sin(x_j - x_i)
}
text_sympy_mapping_h = {
    "constant": sp.S(1.0)
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=KUR
)

Mean Test loss of symbolic formula: 0.00035186927804412943
Var Test loss of symbolic formula: 3.4495474563401334e-14
Std Test loss of symbolic formula: 1.85729573744736e-07


#### 50 DB

In [63]:
df = pd.read_csv("./saved_models_optuna/tss/Kuramoto-1/results_dim=0_50_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

x_i, x_j = sp.symbols('x_i x_j')

text_sympy_mapping_g = {
    "sinx1jMinusx1i": sp.sin(x_j - x_i)
}
text_sympy_mapping_h = {
    "fracx1": 1/ x_i
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=KUR
)

Mean Test loss of symbolic formula: 0.6032914121945699
Var Test loss of symbolic formula: 0.0009892931714156185
Std Test loss of symbolic formula: 0.03145303119598521


#### 20 DB

In [70]:
df = pd.read_csv("./saved_models_optuna/tss/Kuramoto-1/results_dim=0_20_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)
row_means

0
NaN                0.000000
x1iexpx1j          0.000338
fracx1jMinusx1i    0.026852
fracx1ix1j        -0.035464
x1ifracx1j        -0.020929
dtype: float64

In [None]:
x_i, x_j = sp.symbols('x_i x_j')

text_sympy_mapping_g = {
    "x1iexpx1j": x_i * sp.exp(x_j),
    "fracx1jMinusx1i": 1/(x_j - x_i),
    "fracx1ix1j": 1/(x_i * x_j),
    "x1ifracx1j": x_i / x_j
}

text_sympy_mapping_h = {}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=KUR
)

### EPID

In [75]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Epidemics-1/results_dim=0.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN               0.000000
constant         -1.479146
expx1jMinusx1i    0.310919
dtype: float64

In [76]:
text_sympy_mapping_g = {
    "expx1jMinusx1i": sp.exp(x_j - x_i)
}

text_sympy_mapping_h = {
    "constant": sp.S(1.0)
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=EPID
)

Mean Test loss of symbolic formula: 0.13932317246993384
Var Test loss of symbolic formula: 0.0011257805640565567
Std Test loss of symbolic formula: 0.03355265360677985


#### 70 DB

In [77]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Epidemics-1/results_dim=0_70_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN         0.000000
x1x1x1     -1.319998
constant    0.645621
dtype: float64

In [79]:
text_sympy_mapping_g = {}

text_sympy_mapping_h = {
    "x1x1x1": x_i * x_i * x_i,
    "constant": sp.S(1.0)
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=EPID
)

Mean Test loss of symbolic formula: 0.06238254035512606
Var Test loss of symbolic formula: 6.161101675210123e-05
Std Test loss of symbolic formula: 0.007849268548858628


#### 50 DB

In [80]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Epidemics-1/results_dim=0_50_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN               0.000000
x1x1x1           -1.049246
expx1jMinusx1i    0.101428
dtype: float64

In [81]:
text_sympy_mapping_g = {
    "expx1jMinusx1i": sp.exp(x_j - x_i)
}

text_sympy_mapping_h = {
    "x1x1x1": x_i * x_i * x_i
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=EPID
)

Mean Test loss of symbolic formula: 0.021900850037733715
Var Test loss of symbolic formula: 4.893136437053426e-06
Std Test loss of symbolic formula: 0.0022120434980021134


#### 20 DB

In [82]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Epidemics-1/results_dim=0_20_db.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN                0.000000
fracx1ix1j         0.009380
x1ifracx1j         0.009147
fracx1j            0.000166
fracx1jMinusx1i    0.000093
dtype: float64

In [83]:
text_sympy_mapping_g = {
    "fracx1ix1j": 1/(x_i * x_j),
    "x1ifracx1j": x_i / x_j,
    "fracx1j": 1 / x_j,
    "fracx1jMinusx1i": 1/(x_j - x_i)
}

text_sympy_mapping_h = {}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=EPID
)

Mean Test loss of symbolic formula: 0.11705934256315231
Var Test loss of symbolic formula: 8.681066367199897e-06
Std Test loss of symbolic formula: 0.002946364941279321


### BIO

In [66]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Biochemical-1/results_dim=0.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN         0.000000
x1ix1j     -0.629935
constant    0.805864
dtype: float64

In [67]:
text_sympy_mapping_g = {
    "x1ix1j": x_i * x_j
}

text_sympy_mapping_h = {
    "constant": sp.S(1.0)
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=BIO
)

Mean Test loss of symbolic formula: 0.030168694754441578
Var Test loss of symbolic formula: 7.935192549003557e-07
Std Test loss of symbolic formula: 0.0008907969773749547


### POP

In [68]:
x_i, x_j = sp.symbols('x_i x_j')

df = pd.read_csv("./saved_models_optuna/tss/Population-1/results_dim=0.csv", header=None)
df.set_index(0, inplace=True)
row_means = df.mean(axis=1)

row_means

0
NaN         0.000000
constant   -0.026403
x1j         0.181286
sinx1j     -0.003445
dtype: float64

In [69]:
text_sympy_mapping_g = {
    "sinx1j": sp.sin(x_j),
    "x1j": x_j
}

text_sympy_mapping_h = {
    "constant": sp.S(1.0)
}

get_tss_test_error(
    text_sympy_mapping_g=text_sympy_mapping_g,
    text_sympy_mapping_h=text_sympy_mapping_h,
    row_means=row_means,
    test_set=POP
)

Mean Test loss of symbolic formula: 0.127986046175162
Var Test loss of symbolic formula: 7.68135815102871e-05
Std Test loss of symbolic formula: 0.008764335771197217
