In [1]:
import h5py
import numpy as np
from copy import deepcopy

from library.misc.io import load_fields_from_hdf5

In [2]:
path_data_input = "/home/ingo/Git/SMM/shallow-moments-simulation/openfoam_data/channelflow_coarse/closure_dataset_input.hdf5"
path_data_output = "/home/ingo/Git/SMM/shallow-moments-simulation/openfoam_data/channelflow_coarse/closure_dataset_output.hdf5"

## Test multi sindy
Problem: not working with only two snapshots in time

In [3]:
with h5py.File(path_data_input, "r") as f:
    n_iterations = len(f.keys())
    Q, Qaux, time = load_fields_from_hdf5(path_data_input, 0)
    n_elements = Q.shape[0]
    n_fields = Q.shape[1]

input = np.zeros((n_iterations, n_elements, n_fields))
output = np.zeros((n_iterations, n_elements, n_fields))
dts = np.zeros((n_iterations, n_elements)) 
for i in range(n_iterations):
    output[i, :, :] = deepcopy(Q)
    Q, Qaux, time = load_fields_from_hdf5(path_data_input, i)
    input[i, :, :] = deepcopy(Q)
    Q, Qaux, dt = load_fields_from_hdf5(path_data_output, i)
    dts[i, :] = dt
input = input.reshape(n_iterations*n_elements, n_fields)
output = output.reshape(n_iterations*n_elements, n_fields)
dts = dts.reshape(n_iterations*n_elements)


go to primitive variables

In [4]:
for i in range(1, n_fields):
    input[:,i] = input[:,i] / input[:,0]

Sanity checks

In [5]:

print(f"number of NAN entries in output: {np.sum(np.isnan(output) > 0)}/{output.shape[0]*output.shape[1]}")
print(f"number of inf entries in output: {np.sum(np.isinf(output) > 0)}/{output.shape[0]*output.shape[1]}")
print(f"number of non-zero entries in output: {np.sum(np.abs(output) > 0)}/{output.shape[0]*output.shape[1]}")

number of NAN entries in output: 0/29400
number of inf entries in output: 0/29400
number of non-zero entries in output: 23217/29400


Use [Multiple trajectories](https://pysindy.readthedocs.io/en/latest/examples/1_feature_overview/example.html#Single-trajectory,-pass-in-collection-times)

## CHEATING
because we need at least 3 time steps (maybe a custom fd model helps?), I assume a linear relation in time anf sample the output as list [IC, IC+0.5*dQ, IC_dQ]. This is clearly an assumption.

In [6]:
x_data = []
T_data = []
for i in range(n_iterations*n_elements):
    x_data.append(np.stack((input[i, :], input[i,:] + 0.5 *output[i, :], input[i,:] + 1.0 *output[i, :])))
    T_data.append( np.array([0, 0.5*dts[i], 1.0 * dts[i]]) )

Test train split

In [7]:
indices = np.linspace(0, len(x_data)-1, len(x_data), dtype=int)
from sklearn.model_selection import train_test_split
I_train, I_test = train_test_split(indices, test_size=0.2, random_state=42)
x_test = [x_data[i] for i in I_test]
T_test = [T_data[i] for i in I_test]
x_train = [x_data[i] for i in I_train]
T_train = [T_data[i] for i in I_train]

In [8]:
import pysindy as ps 

In [24]:
## Create data
# library_poly = ps.PolynomialLibrary(degree=2, include_bias=False)
library_unsym = ps.CustomLibrary(
    library_functions=[lambda h, u, u1, v, v1: abs(u), lambda h, u, u2, v, v1: abs(v)])
    # library_functions=[lambda h, u, v : abs(u), lambda h, u, v: abs(u)])
combined_library = library_unsym
# feature_names = ['h', 'u', 'v']
feature_names = ['h', 'u', 'u1', 'v', 'v1']
model_unsym = ps.SINDy(feature_library = combined_library, feature_names = feature_names)
# model = ps.SINDy(feature_library = library_poly)
model_unsym.fit(x_train, t=T_train, multiple_trajectories=True, unbias=True)
model_unsym.print()
 

(h)' = 0.276 f0(h,u,u1,v,v1) + 0.690 f1(h,u,u1,v,v1)
(u)' = 3.185 f0(h,u,u1,v,v1)
(u1)' = 1.657 f0(h,u,u1,v,v1) + -2.990 f1(h,u,u1,v,v1)
(v)' = 0.305 f1(h,u,u1,v,v1)
(v1)' = -0.293 f1(h,u,u1,v,v1)


Test

In [19]:
def compute_error(model, x_test, T_test):
    # considered_fields = [1, 2]
    considered_fields = [1, 3]
    error = 0.
    mean = 0.
    n_tests = len(x_test)
    for i in range(n_tests):
        x0 = x_test[i][0]
        y = x_test[i][:]
        time = T_test[i][:]
        y_sim = model.simulate(x0, time)
        error += np.linalg.norm((y-y_sim)[-1, considered_fields])
        mean += np.linalg.norm((y[-1,considered_fields]))
    error = error/n_tests
    mean = mean/n_tests
    print(f'absolute L2 error {error}')
    print(f'relative L2 error {error/mean}')
    print(f'mean {mean}')


In [25]:
print(compute_error(model_unsym, x_test , T_test))
print(compute_error(model_unsym, x_train, T_train))

absolute L2 error 0.005065342944989903
relative L2 error 0.04224022334858655
mean 0.11991752276469438
None
absolute L2 error 0.0057794632401142335
relative L2 error 0.045803889639148186
mean 0.12617843780617655
None


Constraint problem for level = 0

In [26]:
## Create data
n_constraints = 5
n_targets = x_train[0].shape[1]
n_features = 2
n_eqns = 3
constraint_rhs = np.zeros(n_constraints)
constraint_lhs = np.zeros((n_constraints, n_eqns*n_features))
# coefficients of Abs(u) == Abs(v) in u'
constraint_lhs[0, n_features + 0] = 1
constraint_lhs[0, n_features + 1] = -1
# coefficients of Abs(u) == Abs(v) in v'
constraint_lhs[1, 2*n_features + 0] = 1
constraint_lhs[1, 2*n_features + 1] = -1
# coefs for u' and v' are the same
constraint_lhs[2, 1*n_features + 0] = 1
constraint_lhs[2, 2*n_features + 0] = -1
# h' is not affcted from Abs(u)
constraint_lhs[3, 0] = 1
# h' is not affcted from Abs(v)
constraint_lhs[4, 1] = 1

opt = ps.ConstrainedSR3(constraint_lhs=constraint_lhs, constraint_rhs=constraint_rhs, threshold=0.5, thresholder='l1')
# opt = ps.SR3(threshold=0.5, thresholder='l1')
library_sym = ps.CustomLibrary(
    library_functions=[lambda x, y, z: abs(y), lambda x, y, z: abs(z)])
    # library_functions=[lambda h, u, u1, v, v1: abs(u), lambda h, u, u2, v, v1: abs(v)])
combined_library = library_sym
feature_names = ['h', 'u', 'v']
# feature_names = ['h', 'u', 'u1', 'v', 'v1']
model_sym = ps.SINDy(feature_library = combined_library, feature_names = feature_names, optimizer=opt)


model_sym.fit(x_train, t=T_train, multiple_trajectories=True)
model_sym.print()
 

ValueError: cannot reshape array of size 6 into shape (0,20)

In [None]:
print(compute_error(model_sym, x_test , T_test))
print(compute_error(model_sym, x_train, T_train))

absolute L2 error 0.004802322289611073
relative L2 error 0.04004687704426924
mean 0.11991752276469438
None
absolute L2 error 0.005634422971122236
relative L2 error 0.04465440426340756
mean 0.12617843780617655
None


Constraint problem for level = 1

In [27]:
## Create data
n_constraints = 16
n_targets = x_train[0].shape[1]
n_features = 4
n_eqns = 5
constraint_rhs = np.zeros(n_constraints)
constraint_lhs = np.zeros((n_constraints, n_eqns*n_features))
# coefficients of Abs(u) == Abs(v) in u'
constraint_lhs[0, n_features + 0] = 1
constraint_lhs[0, n_features + 2] = -1
# coefficients of Abs(u) == Abs(v) in v'
constraint_lhs[1, 3*n_features + 0] = 1
constraint_lhs[1, 3*n_features + 2] = -1
# coefs for u' and v' are the same
constraint_lhs[2, 1*n_features + 0] = 1
constraint_lhs[2, 3*n_features + 0] = -1

# coefficients of Abs(u1) == Abs(v1) in u'
constraint_lhs[3, n_features + 1] = 1
constraint_lhs[3, n_features + 3] = -1
# coefficients of Abs(u1) == Abs(v1) in v'
constraint_lhs[4, 3*n_features + 1] = 1
constraint_lhs[4, 3*n_features + 3] = -1
# coefs for u1' and v1' are the same
constraint_lhs[5, 1*n_features + 1] = 1
constraint_lhs[5, 3*n_features + 1] = -1

# coefficients of Abs(u) == Abs(v) in u1'
constraint_lhs[6, 2*n_features + 0] = 1
constraint_lhs[6, 2*n_features + 2] = -1
# coefficients of Abs(u) == Abs(v) in v1'
constraint_lhs[7, 4*n_features + 0] = 1
constraint_lhs[7, 4*n_features + 2] = -1
# coefs for u' and v' are the same
constraint_lhs[8, 2*n_features + 0] = 1
constraint_lhs[8, 4*n_features + 0] = -1
 
# coefficients of Abs(u1) == Abs(v1) in u1'
constraint_lhs[9, 2*n_features + 1] = 1
constraint_lhs[9, 2*n_features + 3] = -1
# coefficients of Abs(u1) == Abs(v1) in v1'
constraint_lhs[10, 4*n_features + 1] = 1
constraint_lhs[10, 4*n_features + 3] = -1
# coefs for u1' and v1' are the same
constraint_lhs[11, 2*n_features + 1] = 1
constraint_lhs[11, 4*n_features + 1] = -1

# h' is not affcted from Abs(u)
constraint_lhs[12, 0] = 1
# h' is not affcted from Abs(v)
constraint_lhs[13, 1] = 1
# h' is not affcted from Abs(u1)
constraint_lhs[14, 2] = 1
# h' is not affcted from Abs(v1)
constraint_lhs[15, 3] = 1

opt = ps.ConstrainedSR3(constraint_lhs=constraint_lhs, constraint_rhs=constraint_rhs, threshold=0.5, thresholder='l1')
# opt = ps.SR3(threshold=0.5, thresholder='l1')
# library_poly = ps.PolynomialLibrary(degree=2, include_bias=False)
library_sym_1 = ps.CustomLibrary(
    # library_functions=[lambda x, y, z: abs(y), lambda x, y, z: abs(z)])
    library_functions=[lambda h, u, u1, v, v1: abs(u), lambda h, u, u1, v, v1: abs(u1), lambda h, u, u2, v, v1: abs(v),  lambda h, u, u2, v, v1: abs(v1)])
combined_library = library_sym_1
# feature_names = ['h', 'u', 'v']
feature_names = ['h', 'u', 'u1', 'v', 'v1']
model_sym_1 = ps.SINDy(feature_library = combined_library, feature_names = feature_names, optimizer=opt)


model_sym_1.fit(x_train, t=T_train, multiple_trajectories=True)
model_sym_1.print()
 

(h)' = 0.000
(u)' = 1.092 f0(h,u,u1,v,v1) + -0.144 f1(h,u,u1,v,v1) + 1.092 f2(h,u,u1,v,v1) + -0.144 f3(h,u,u1,v,v1)
(u1)' = 0.281 f1(h,u,u1,v,v1) + 0.281 f3(h,u,u1,v,v1)
(v)' = 1.092 f0(h,u,u1,v,v1) + -0.144 f1(h,u,u1,v,v1) + 1.092 f2(h,u,u1,v,v1) + -0.144 f3(h,u,u1,v,v1)
(v1)' = 0.281 f1(h,u,u1,v,v1) + 0.281 f3(h,u,u1,v,v1)


In [28]:
print(compute_error(model_sym_1, x_test , T_test))
print(compute_error(model_sym_1, x_train, T_train))

absolute L2 error 0.004802322289611073
relative L2 error 0.04004687704426924
mean 0.11991752276469438
None
absolute L2 error 0.005634422971122236
relative L2 error 0.04465440426340756
mean 0.12617843780617655
None


## TODOs
- [x] check why there are so little non-zeros. Something wrong in callback.h?
- [ ] generate mid/high resulition data
- [ ] use only newtonian model, not chezy
- [x] use level 0 system.
- [ ] Ideally, I do not want to recompute the moment projection of the high fidality data every time. Can I just use the first N-levels of a given one? I think this requires changes in `callback.h` and fit the Q fields to the correct size and PICK THE RIGHT FIELDS.