In [1]:
import h5py
import numpy as np
from copy import deepcopy

from library.misc.io import load_fields_from_hdf5

In [2]:
path_data_input = "/home/ingo/Git/SMM/shallow-moments-simulation/openfoam_data/channelflow_coarse/closure_dataset_input.hdf5"
path_data_output = "/home/ingo/Git/SMM/shallow-moments-simulation/openfoam_data/channelflow_coarse/closure_dataset_output.hdf5"

## Test multi sindy
Problem: not working with only two snapshots in time

In [3]:
with h5py.File(path_data_input, "r") as f:
    n_iterations = len(f.keys())
    Q, Qaux, time = load_fields_from_hdf5(path_data_input, 0)
    n_elements = Q.shape[0]
    n_fields = Q.shape[1]

input = np.zeros((n_iterations, n_elements, n_fields))
output = np.zeros((n_iterations, n_elements, n_fields))
dts = np.zeros((n_iterations, n_elements)) 
for i in range(n_iterations):
    output[i, :, :] = deepcopy(Q)
    Q, Qaux, time = load_fields_from_hdf5(path_data_input, i)
    input[i, :, :] = deepcopy(Q)
    Q, Qaux, dt = load_fields_from_hdf5(path_data_output, i)
    dts[i, :] = dt
input = input.reshape(n_iterations*n_elements, n_fields)
output = output.reshape(n_iterations*n_elements, n_fields)
dts = dts.reshape(n_iterations*n_elements)


go to primitive variables

In [4]:
for i in range(1, n_fields):
    input[:,i] = input[:,i] / input[:,0]

Sanity checks

In [5]:

print(f"number of NAN entries in output: {np.sum(np.isnan(output) > 0)}/{output.shape[0]*output.shape[1]}")
print(f"number of inf entries in output: {np.sum(np.isinf(output) > 0)}/{output.shape[0]*output.shape[1]}")
print(f"number of non-zero entries in output: {np.sum(np.abs(output) > 0)}/{output.shape[0]*output.shape[1]}")

number of NAN entries in output: 0/17640
number of inf entries in output: 0/17640
number of non-zero entries in output: 11697/17640


Use [Multiple trajectories](https://pysindy.readthedocs.io/en/latest/examples/1_feature_overview/example.html#Single-trajectory,-pass-in-collection-times)

## CHEATING
because we need at least 3 time steps (maybe a custom fd model helps?), I assume a linear relation in time anf sample the output as list [IC, IC+0.5*dQ, IC_dQ]. This is clearly an assumption.

In [59]:
x_data = []
T_data = []
for i in range(n_iterations*n_elements):
    x_data.append(np.stack((input[i, :], input[i,:] + 0.5 *output[i, :], input[i,:] + 1.0 *output[i, :])))
    T_data.append( np.array([0, 0.5*dts[i], 1.0 * dts[i]]) )

Test train split

In [63]:
indices = np.linspace(0, len(x_data)-1, len(x_data), dtype=int)
from sklearn.model_selection import train_test_split
I_train, I_test = train_test_split(indices, test_size=0.2, random_state=42)
x_test = [x_data[i] for i in I_test]
T_test = [T_data[i] for i in I_test]
x_train = [x_data[i] for i in I_train]
T_train = [T_data[i] for i in I_train]

In [64]:
import pysindy as ps 

In [85]:
## Create data
# model = ps.SINDy()
# library_identity = ps.IdentityLibrary()
# library_poly = ps.PolynomialLibrary(degree=2, include_bias=False)
library_custom = ps.CustomLibrary(
    library_functions=[lambda x, y, z: abs(y), lambda x, y, z: abs(z)])
# combined_library = library_custom
combined_library = library_poly
# combined_library = library_custom + library_custom * library_custom
feature_names = ['h', 'u', 'v']
model = ps.SINDy(feature_library = combined_library, feature_names = feature_names)
# model = ps.SINDy(feature_library = library_poly)
model.fit(x_train, t=T_train, multiple_trajectories=True, unbias=True)
model.print()
 

(h)' = -54.656 h + 120.625 u + 4.898 v + 71.443 h^2 + -181.292 h u + -6.835 h v + 49.196 u^2 + 2.886 u v + -1.292 v^2
(u)' = 83.980 u + -47.337 v + -104.058 h u + 58.571 h v + 20.723 u^2 + -7.915 u v + 5.585 v^2
(v)' = -0.339 u + 83.966 v + 0.263 h u + -95.569 h v + 0.212 u^2 + 2.095 u v + -0.850 v^2


Test

In [86]:
def compute_error(model, x_test, T_test):
    error = 0.
    n_tests = len(x_test)
    for i in range(n_tests):
        x0 = x_test[i][0]
        y = x_test[i][:]
        time = T_test[i][:]
        y_sim = model.simulate(x0, time)
        error += np.linalg.norm((y-y_sim)[:-1, :])
    print(error/n_tests)


In [87]:
compute_error(model, x_test, T_test)

0.0176113168263203


In [88]:
## Create data
n_constraints = 5
n_targets = x_train[0].shape[1]
n_features = 2
n_eqns = 3
constraint_rhs = np.zeros(n_constraints)
constraint_lhs = np.zeros((n_constraints, n_eqns*n_features))
# coefficients of Abs(u) == Abs(v) in u'
constraint_lhs[0, n_features + 0] = 1
constraint_lhs[0, n_features + 1] = -1
# coefficients of Abs(u) == Abs(v) in v'
constraint_lhs[1, 2*n_features + 0] = 1
constraint_lhs[1, 2*n_features + 1] = -1
# coefs for u' and v' are the same
constraint_lhs[2, 1*n_features + 0] = 1
constraint_lhs[2, 2*n_features + 0] = -1
# h' is not affcted from Abs(u)
constraint_lhs[3, 0] = 1
# h' is not affcted from Abs(v)
constraint_lhs[4, 1] = 1

opt = ps.ConstrainedSR3(constraint_lhs=constraint_lhs, constraint_rhs=constraint_rhs, threshold=0.5, thresholder='l1')
# opt = ps.SR3(threshold=0.5, thresholder='l1')
# library_identity = ps.IdentityLibrary()
library_poly = ps.PolynomialLibrary(degree=2, include_bias=False)
library_custom = ps.CustomLibrary(
    library_functions=[lambda x, y, z: abs(y), lambda x, y, z: abs(z)])
# combined_library = library_custom
combined_library = library_custom
# combined_library = library_custom + library_custom * library_custom
feature_names = ['h', 'u', 'v']
model = ps.SINDy(feature_library = combined_library, feature_names = feature_names, optimizer=opt)
# model = ps.SINDy(feature_library = library_poly)


model.fit(x_train, t=T, multiple_trajectories=True)
model.print()
 

(h)' = 0.000
(u)' = 0.777 f0(h,u,v) + 0.777 f1(h,u,v)
(v)' = 0.777 f0(h,u,v) + 0.777 f1(h,u,v)


In [89]:
compute_error(model, x_test, T_test)

0.01239209355980192


## TODOs
- [ ] check why there are so little non-zeros. Something wrong in callback.h?
- [ ] generate mid/high resulition data
- [ ] use only newtonian model, not chezy
- [ ] use level 0 system.
- [ ] Ideally, I do not want to recompute the moment projection of the high fidality data every time. Can I just use the first N-levels of a given one? I think this requires changes in `callback.h` and fit the Q fields to the correct size and PICK THE RIGHT FIELDS.