In [1]:
import numpy as np

from weight import *
from utils import save, load
from library import *
from process_library_terms import *

In [2]:
#%%prun # profiling

### load data
# vicsek:
#arrays.npy - normal. arrays2.npy - very few particles. arrays3.npy - high density. arrays4.npy - high density, low noise.
#arrays5.npy - small domain, one clump
#datasets = ['arrays.npy', 'arrays2.npy', 'arrays3.npy']#, 'arrays5.npy']
#datasets = ['arrays.npy']
datasets = ['arrays_bu.npy'] # burgers' equation data
#datasets = ['arrays_bu_rv.npy'] # burgers' equation data

Q0s = list()
Q1s = list()
rw0s = list()
rw1s = list()

### define all parameters and "make library"
rho_obs = Observable("rho", 0)
sqrtrho_obs = Observable("sqrt(rho)", 0)
v_obs = Observable("v", 1)
rv_obs = Observable("rv", 1)k
observables = [rho_obs, v_obs]
#terms = generate_terms_to(4, observables=[rho_obs, sqrtrho_obs, v_obs], max_observables=3) # generally go to 4?
#terms = generate_terms_to(4, observables=[sqrtrho_obs, v_obs], max_observables=3)
terms = generate_terms_to(4, observables=[observables], max_observables=3)
#terms = generate_terms_to(4, observables=[rho_obs, rv_obs], max_observables=3)
terms0 = []
terms1 = []
for term in terms:
    if term.rank==0:
        terms0.append(term)
    else:
        terms1.append(term)
m = 6
qmax = 1
nweights = qmax**3
ndomains = 200#50
dom_width = 20
dom_time = 20
pad = 0

# fix random seed
np.random.seed(1)

for i, dset in enumerate(datasets):
    #rho, v, res = load(dset, 3)
    rho, v, res, deltat = load(dset, 4) 
    #rho, rv, res, deltat = load(dset, 4) 
    world_size = np.array(rho.shape)
    #world_size[2] = 50 # restrict to first 50 time steps
    data_dict = {}
    data_dict['rho'] = rho
    data_dict['v'] = v
    data_dict['sqrt(rho)'] = np.sqrt(rho)
    #data_dict['rv'] = rv

    dx = 1/res # inverse of res
    dt = float(deltat) # same as deltat
    dxs = [dx, dx, dt]

    scale_dict = find_scales(data_dict)
    #print(scale_dict)
    
    char_sizes0 = [get_char_size(term, scale_dict, dx, dt) for term in terms0]
    char_sizes1 = [get_char_size(term, scale_dict, dx, dt) for term in terms1]

    # precompute derivatives here?
    # ...
    
    weights = []
    dxs_weight = [(dom_width-1)/2*dx, (dom_width-1)/2*dx, (dom_time-1)/2*dt] # grid spacing used by weight function

    #for domain in domains:
    for q1 in range(qmax):
        for q2 in range(qmax):
            for q3 in range(qmax):
                weights.append(Weight([m]*3, [q1, q2, q3], [0, 0, 0], dxs=dxs_weight))
    #domains = domains*nweights

    # make domains and "count" density
    domains = make_domains([dom_width, dom_width, dom_time], world_size, ndomains, pad)

    Q0 = make_library(terms0, data_dict, weights, domains, 0, dxs)#, by_parts=False)
    Q1 = make_library(terms1, data_dict, weights, domains, 1, dxs)#, by_parts=False)
    Q0s.append(Q0)
    Q1s.append(Q1)
    
    ### compute row normalizations
    # rows of Q0 are constructed in blocks of nweights*ndomains; Q1, nweights*dimensions*ndomains
    rho_col = find_term(terms0, 'rho')
    dom_densities = Q0[0:ndomains, rho_col] # integral of rho with the 0 harmonics weight
    #dom_densities = [1]*ndomains
    row_weights0 = np.tile(dom_densities, nweights)
    # scale weights according to square root of density (to cancel CLT noise scaling)
    row_weights0 = np.sqrt(row_weights0)
    row_weights0 += 1e-6 # don't want it to be exactly zero either
    # normalize
    row_weights0 = row_weights0/np.max(row_weights0)
    row_weights1 = np.tile(row_weights0, 2) # because of doubling for dimension
    rw0s.append(row_weights0)
    rw1s.append(row_weights1)
    
Q0 = np.concatenate(Q0s, axis=0)
Q1 = np.concatenate(Q1s, axis=0)
row_weights0 = np.concatenate(rw0s, axis=0)
row_weights1 = np.concatenate(rw1s, axis=0)

save('Q_bu.npy', Q0, Q1, dxs, char_sizes0, char_sizes1, terms0, terms1, row_weights0, row_weights1)

In [4]:
from identify_models import *

opts = dict()
opts['threshold'] = "'threshold'"
opts['gamma'] = 2
opts['verbose'] = False
opts['delta'] = 1e-8 # apparently this reaches machine precision as the relative error
opts['char_sizes'] = char_sizes0
opts1 = opts.copy()
opts1['char_sizes'] = char_sizes1
#eqs0, lambdas0, derived_eqs0, excluded_terms = identify_equations(Q0, opts, terms0, observables, threshold=1e-5, 
#                       max_complexity=None, max_equations=999, timed=True)
#print(excluded_terms)
#eqs1, lambdas1, derived_eqs1, excluded_terms = identify_equations(Q1, opts1, terms1, observables, threshold=1e-5, 
#                       max_complexity=None, max_equations=999, timed=True, excluded_terms=excluded_terms)
#print(excluded_terms)
eqs, lambdas, derived_eqs, excluded_terms = interleave_identify([Q0, Q1], [opts, opts1], [terms0, terms1],
                                                                observables, threshold=1e-4)

[3.87 s]
Identified model: 1.0 * dt rho * dt v_i + 0.005372754888942757 * dt rho * rho * v_i = 0 (order 4, residual 2.95e-05)
[7.45 s]
Identified model: 1.0 * rho * dt^2 v_i = 0 (order 4, residual 4.01e-05)


In [3]:
from ipynb.fs.full.sparse_reg import *
opts = dict()
opts['threshold'] = "'pareto'"
#opts['verbose'] = True
#opts['brute_force'] = False
#opts['threshold'] = "'threshold'"
#opts['n_terms'] = len(terms0)
#opts['gamma'] = 1.2
opts['char_sizes'] = char_sizes0
Xi, lambd, best_term, lambda1 = sparse_reg(Q0, opts=opts)

In [4]:
for i in range(len(Xi)):
    if Xi[i]!=0:
        print(f"[Term {i}] {Xi[i]} * {terms0[i]}")
print("Model residual:", lambd)
print("One-term model residual:", lambda1)
print(f"Best term (#{best_term}):", terms0[best_term])

[Term 21] 1.0 * dt^3 rho
[Term 33] -0.00310537026920138 * dj^2 rho * rho
[Term 34] 0.3948215574281788 * dt rho * rho
[Term 37] 0.39869942669808356 * rho * rho * dj v_j
[Term 38] 0.3749498972317281 * dj rho * rho * v_j
Model residual: 8.882726050953584e-05
One-term model residual: 0.00015723970981573098
Best term (#21): dt^3 rho


In [5]:
opts['row_norms'] = row_weights0
Xip, lambdp, best_termp, lambda1p = sparse_reg(Q0, *opts)
for i in range(len(Xip)):
    if Xip[i]!=0:
        print(f"[Term {i}] {Xip[i]} * {terms0[i]}")
print("Model residual:", lambdp)
print("One-term model residual:", lambda1p)
print(f"Best term (#{best_termp}):", terms0[best_termp])

[Term 12] 1.0 * dt v_j * dt v_j
[Term 15] -0.0036532437302029016 * dj v_k * v_j * v_k
Model residual: 9.320023473785616e-06
One-term model residual: 2.4963892321884583e-05
Best term (#12): dt v_j * dt v_j


In [6]:
avoid = []
avoid.append(Xip)
opts['avoid'] = avoid
Xip, lambd, best_term, lambda1 = sparse_reg(Q0, *opts)
#avoid.append(Xi)
#Xi, lambd, best_term, lambda1 = sparse_reg(Q0, opts=opts, char_sizes=char_sizes0, avoid=avoid)
for i in range(len(Xi)):
    if Xi[i]!=0:
        print(f"[Term {i}] {Xi[i]} * {terms0[i]}")
print("Model residual: ", lambd)
print("One-term model residual:", lambda1)
print(f"Best term (#{best_term}):", terms0[best_term])

[Term 21] 1.0 * dt^3 rho
[Term 33] -0.00310537026920138 * dj^2 rho * rho
[Term 34] 0.3948215574281788 * dt rho * rho
[Term 37] 0.39869942669808356 * rho * rho * dj v_j
[Term 38] 0.3749498972317281 * dj rho * rho * v_j
Model residual:  4.8215759067274014e-05
One-term model residual: 6.161145626870743e-05
Best term (#13): dt^2 v_j * v_j


In [7]:
#opts['verbose'] = True
opts['char_sizes'] = char_sizes1
opts['avoid'] = []
opts['row_norms'] = None
Xi, lambd, best_term, lambda1 = sparse_reg(Q1, *opts)

In [8]:
for i in range(len(Xi)):
    if Xi[i]!=0:
        print(f"[Term {i}] {Xi[i]} * {terms1[i]}")
print("Model residual:", lambd)
print("One-term model residual:", lambda1)
print("Best term:", terms1[best_term])

[Term 35] 1.0 * dt rho * dt v_i
[Term 39] -0.004415356527089719 * rho * dj v_j * v_i
[Term 41] -0.003500865798095438 * dj rho * v_j * v_i
Model residual: 3.166815428071119e-05
One-term model residual: 4.4941031236945656e-05
Best term: dt^3 v_i


In [5]:
#avoid = []
#avoid.append(Xi)
#opts['avoid'] = avoid
opts['row_norms'] = row_weights1
Xi, lambd, best_term, lambda1 = sparse_reg(Q1, *opts)
for i in range(len(Xi)):
    if Xi[i]!=0:
        print(f"[Term {i}] {Xi[i]} * {terms1[i]}")
print("Model residual:", lambd)
print("One-term model residual:", lambda1)
print("Best term:", terms1[best_term])

NameError: name 'sparse_reg' is not defined

In [6]:
for term, size in zip(terms0, char_sizes0):
    print(term, size)

NameError: name 'terms0' is not defined

In [11]:
Q0.shape, Q1.shape

((200, 41), (400, 49))

In [12]:
continuity_terms = ['dt rho', 'rho * dj v_j', 'dj rho * v_j']
#continuity_terms = ['dt rho * rho', 'rho * rho * dj v_j', 'dj rho * rho * v_j']
#continuity_terms = ['dt rho', 'dj rv_j']
col_numbers = [find_term(terms0, name) for name in continuity_terms]
Xi, lambd = regress(Q0, col_numbers)
for i in range(len(Xi)):
    if Xi[i]!=0:
        print(f"[Term {i}] {Xi[i]} * {terms0[i]}. (Char. size: {char_sizes0[i]})")
print("Model residual:", lambd)

[Term 18] 0.9580798131049473 * dt rho. (Char. size: 0.09999999999999992)
[Term 22] 1.0 * rho * dj v_j. (Char. size: 0.0018307660444226443)
[Term 23] 0.957149176179876 * dj rho * v_j. (Char. size: 0.0014785126399141976)
Model residual: 5.754352712048967e-05


In [13]:
burger_terms = ['rho * dt v_i', 'rho * dj v_i * v_j']
#burger_terms = ['rho * dt rv_i', 'dj rv_i * rv_j']
col_numbers = [find_term(terms1, name) for name in burger_terms]
Xi, lambd = regress(Q1, col_numbers)
for i in range(len(Xi)):
    if Xi[i]!=0:
        print(f"[Term {i}] {Xi[i]} * {terms1[i]}. (Char. size: {char_sizes1[i]})")
print("Model residual:", lambd)

[Term 32] 1.0 * rho * dt v_i. (Char. size: 0.007323064177690577)
[Term 38] 0.9940317982966582 * rho * dj v_i * v_j. (Char. size: 0.00010827242949618395)
Model residual: 1.379498529061086e-05


In [14]:
#veqconst_terms = ['1', 'v_j * v_j']
veqconst_terms = ['rho', 'rho * v_j * v_j']
col_numbers = [find_term(terms0, name) for name in veqconst_terms]
Xi, lambd = regress(Q0, col_numbers)
for i in range(len(Xi)):
    if Xi[i]!=0:
        print(f"[Term {i}] {Xi[i]} * {terms0[i]}. (Char. size: {char_sizes0[i]})")
print("Model residual:", lambd)

[Term 16] -0.004286563115825593 * rho. (Char. size: 0.1464261231728444)
[Term 28] 1.0 * rho * v_j * v_j. (Char. size: 0.0005121399208783936)
Model residual: 0.0005697337514007149


In [15]:
dup_terms = ['di dj v_j * v_k * v_k', 'di dj v_k * v_j * v_k']
col_numbers = [find_term(terms1, name) for name in dup_terms]
Xi, lambd = regress(Q1, col_numbers)
for i in range(len(Xi)):
    if Xi[i]!=0:
        print(f"[Term {i}] {Xi[i]} * {terms1[i]}. (Char. size: {char_sizes1[i]})")
print("Model residual:", lambd)

ValueError: 'di dj v_j * v_k * v_k' is not in list

In [None]:
rho dt^2 v = 10^-4 dt rho * grad rho