In [1]:
# in case there are any problems with importing because path is wrong
import sys
sys.path.append('/Users/daniel/Princeton Dropbox/Daniel Gurevich/Research/discrete_sr/code/SPIDER_discrete')

In [2]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
from matplotlib import rcParams
import h5py
import matplotlib.pyplot as plt

from library import Observable
from continuous.process_library_terms import SRDataset
from commons.utils import save, load

In [3]:
hdf = True
if hdf:
    import h5py
    
    def load_matlab_v73(mat_file_path):
        """
        Loads MATLAB v7.3 .mat data using h5py and converts it to Python-readable formats.
    
        Parameters:
        - mat_file_path (str): Path to the .mat file.
    
        Returns:
        - dict: A dictionary with MATLAB variable names as keys and corresponding NumPy arrays as values.
        """
        try:
            # Open the HDF5 file
            with h5py.File(mat_file_path, 'r') as f:
                mat_data = {}
    
                def recursively_load(group):
                    """
                    Recursively load MATLAB v7.3 groups into dictionaries.
                    """
                    data = {}
                    for key, item in group.items():
                        if isinstance(item, h5py.Dataset):
                            data[key] = np.array(item)  # Convert HDF5 dataset to NumPy array
                        elif isinstance(item, h5py.Group):
                            data[key] = recursively_load(item)  # Recursively process groups
                    return data
    
                # Load all variables from the root group
                mat_data = recursively_load(f)
    
            return mat_data
        except Exception as e:
            print(f"Error loading .mat file: {e}")
            return None
    
    # Path to your MATLAB v7.3 .mat file
    mat_file_path = "data/filcoh_SGS.mat" # Replace with your .mat file path

In [4]:
if hdf:
    # Load the .mat file
    python_data = load_matlab_v73(mat_file_path)
    
    # Display the loaded data
    if python_data:
        for var_name, data in python_data.items():
            if isinstance(data, np.ndarray):
                print(f"Variable: {var_name}, Shape: {data.shape}, Type: {type(data)}")
            else:
                print(f"Variable: {var_name}, Type: {type(data)} (nested structure)")
    
    s = python_data['s']  # Replace 's' with the actual key name if it's different
    
    # Extract the first layer (V) and the second layer (U)
    V = s[:, 0, :, :]  # First layer
    U = s[:, 1, :, :]  # Second layer
    
    # Transpose to correct shape
    
    U = np.transpose(U, (2,1,0))
    V = np.transpose(V, (2,1,0))

    print(U.shape, V.shape)

    #SUBSAMPLE
    xsample = 2
    ysample = xsample
    tsample = 1
    
    U = U[::xsample, ::ysample, ::tsample]
    V = V[::xsample, ::ysample, ::tsample]
        
    Lx = 2*np.pi; Ly = 2*np.pi; Lt = 5;
    Nx = 2048/xsample; Ny = 2048/ysample; Nt = 100/tsample
    dx = Lx/Nx; dy = Ly/Ny; dt = Lt/Nt;
    
    def pressure_poisson(U, V, dx, dy, density=1.0):
        nx, ny, nt = U.shape
        kx = np.fft.fftfreq(nx, d=dx) * 2 * np.pi
        ky = np.fft.rfftfreq(ny, d=dy) * 2 * np.pi
        kx, ky = np.meshgrid(kx, ky, indexing='ij')
        k_squared = kx**2 + ky**2
        k_squared[0, 0] = np.inf 
        P = np.zeros((nx, ny, nt))
        for t in range(nt):
            u_FT = np.fft.rfftn(U[:, :, t])
            v_FT = np.fft.rfftn(V[:, :, t])
            i = 1j
            dxu = np.fft.irfftn(i * kx * u_FT, s=(nx, ny))
            dyu = np.fft.irfftn(i * ky * u_FT, s=(nx, ny))
            dxv = np.fft.irfftn(i * kx * v_FT, s=(nx, ny))
            dyv = np.fft.irfftn(i * ky * v_FT, s=(nx, ny))
            rhs = dxu**2 + 2 * dyu * dxv + dyv**2
            rhs_FT = np.fft.rfftn(rhs)
            pressure_FT = density * rhs_FT / k_squared
            p = np.fft.irfftn(pressure_FT, s=(nx, ny))
            P[:, :, t] = p
        return P
    
    P = pressure_poisson(U, V, dx, dy)
    
    print(f"V: Shape = {V.shape}, Type = {type(V)}")
    print(f"U: Shape = {U.shape}, Type = {type(U)}")
    print(f"P: Shape = {P.shape}, Type = {type(P)}")

    u = np.concatenate([U[:, :, :, np.newaxis], V[:, :, :, np.newaxis]], axis=3)

Variable: #refs#, Type: <class 'dict'> (nested structure)
Variable: #subsystem#, Type: <class 'dict'> (nested structure)
Variable: domain, Type: <class 'dict'> (nested structure)
Variable: params, Type: <class 'dict'> (nested structure)
Variable: s, Shape: (100, 2, 2048, 2048), Type: <class 'numpy.ndarray'>
(2048, 2048, 100) (2048, 2048, 100)


  dxu = np.fft.irfftn(i * kx * u_FT, s=(nx, ny))
  dyu = np.fft.irfftn(i * ky * u_FT, s=(nx, ny))
  dxv = np.fft.irfftn(i * kx * v_FT, s=(nx, ny))
  dyv = np.fft.irfftn(i * ky * v_FT, s=(nx, ny))
  p = np.fft.irfftn(pressure_FT, s=(nx, ny))


V: Shape = (1024, 1024, 100), Type = <class 'numpy.ndarray'>
U: Shape = (1024, 1024, 100), Type = <class 'numpy.ndarray'>
P: Shape = (1024, 1024, 100), Type = <class 'numpy.ndarray'>


In [5]:
#%%prun # profiling

uobs = Observable(string='u', rank=1)
pobs = Observable(string='p', rank=0)
observables = [uobs, pobs]
data_dict = {'p': P, 'u': u}

# fix random seed
np.random.seed(1)

world_size = np.array(P.shape)
pad = 0

# fix random seed
np.random.seed(1)

dxs = [dx, dy, dt]

# initial setup of dataset
srd = SRDataset(world_size=world_size, data_dict=data_dict, observables=observables, dxs=dxs, 
                #irreps=SRDataset.all_rank2_irreps(), cache_primes=True)
                irreps=SRDataset.only_rank2_irreps(), cache_primes=True)

# initialize libraries, domains, and weights
#srd.make_libraries(max_complexity=3, max_observables=3)
srd.make_libraries(max_complexity=4, max_observables=3)
for irrep in srd.irreps:
    print(irrep, ":", len(srd.libs[irrep].terms))

dom_width = 40
dom_time = 20 #previously 20 (without interpolation)
srd.make_domains(ndomains=30, domain_size=[dom_width, dom_width, dom_time], pad=pad)
#srd.make_domains(ndomains=10, domain_size=[dom_width, dom_width, dom_time], pad=pad)
srd.make_weights(m=12, qmax=0)
srd.set_LT_scale(L=3/8, T=2.5e-3) #T=1 # note that this line must go before make_library_matrices
srd.make_library_matrices(debug=False)

Antisymmetric rank 2 : 23
Symmetric trace-free rank 2 : 36


In [6]:
# from commons.utils import *

# lib1 = srd.libs[srd.irreps[1]]
# for match in regex_find(lib1.terms, r'∂t u_α'):
#     print(match)
# dtu = lib1.Q[:, match[0]]

# for match in regex_find(lib1.terms, r'u_β · ∂β u_α'):
#     print(match)
# adv = lib1.Q[:, match[0]]

# for match in regex_find(lib1.terms, r'∂α p'):
#     print(match)
# dp = lib1.Q[:, match[0]]

# for match in regex_find(lib1.terms, r'∂β² u_α'):
#     print(match)
# viscosity = 0.02*lib1.Q[:, match[0]] #0.1

# print(np.linalg.norm(dtu), np.linalg.norm(adv), np.linalg.norm(dp), np.linalg.norm(dtu+dp+adv-viscosity), 
#       np.linalg.norm(dp-viscosity), np.linalg.norm(dtu+adv))

In [7]:
# lib0 = srd.libs[srd.irreps[0]]
# for match in regex_find(lib0.terms, r'∂α u_α'):
#     print(match)
# div = lib0.Q[:, match[0]]

# print(np.linalg.norm(div))

In [19]:
from commons.identify_models import *
import copy

libs = srd.libs

reg_opts_list = []
for irrep in srd.irreps:
    # for regression we now need to construct a Scaler, Initializer, ModelIterator, and Threshold
    scaler = Scaler(sub_inds=None, char_sizes=libs[irrep].col_weights, row_norms=None, unit_rows=True, train_fraction=1)
    #init = Initializer(method='combinatorial', start_k=2)
    init = Initializer(method='combinatorial', start_k=9999)
    #init = Initializer(method='power', start_k=10)
    #res = Residual(residual_type='fixed_column', anchor_col=0)
    res = Residual(residual_type='matrix_relative')
    
    #iterator = ModelIterator(max_k=10, backward_forward=True, max_passes=9)
    iterator = ModelIterator(max_k=len(libs[irrep].terms), backward_forward=False, max_passes=1)
    thres = Threshold(threshold_type='jump', gamma=1.5, delta=1e-10, n_terms=None)
    #thres = Threshold(threshold_type='information', ic=AIC)
    
    opts = {'scaler': scaler, 'initializer': init, 'residual': res,
            'model_iterator': iterator, 'threshold': thres}
    opts['verbose'] = False
    opts['inhomog'] = False
    opts['inhomog_col'] = None
    reg_opts_list.append(opts)

eqs, lambdas, reg_results, derived_eqs, excluded_terms = interleave_identify([libs[i] for i in srd.irreps], 
reg_opts_list, threshold=2e-7, experimental=True)
#, max_equations=10)

--- WORKING ON LIBRARY WITH IRREP Antisymmetric rank 2 AT COMPLEXITY 1 ---
--- WORKING ON LIBRARY WITH IRREP Symmetric trace-free rank 2 AT COMPLEXITY 1 ---
--- WORKING ON LIBRARY WITH IRREP Antisymmetric rank 2 AT COMPLEXITY 2 ---
--- WORKING ON LIBRARY WITH IRREP Symmetric trace-free rank 2 AT COMPLEXITY 2 ---
--- WORKING ON LIBRARY WITH IRREP Antisymmetric rank 2 AT COMPLEXITY 3 ---
--- WORKING ON LIBRARY WITH IRREP Symmetric trace-free rank 2 AT COMPLEXITY 3 ---
--- WORKING ON LIBRARY WITH IRREP Antisymmetric rank 2 AT COMPLEXITY 4 ---
[0.02 s]
Identified model: -1 · ∂α u_β · ∂γ u_γ + ∂α u_γ · ∂γ u_β = 0 (order 4, residual 3.55e-22)
[0.07 s]
Identified model: 1 · u_α · u_γ · ∂β u_γ + -1 · u_α · u_γ · ∂γ u_β + u_γ · u_γ · ∂α u_β = 0 (order 4, residual 5.98e-17)
[0.13 s]
Identified model: 1 · u_α · ∂β ∂γ u_γ + -1 · u_α · ∂γ² u_β + u_γ · ∂α ∂γ u_β = 0 (order 4, residual 2.33e-16)
[0.18 s]
Identified model: u_α · ∂γ² u_β + -0.0001 · ∂α ∂γ² u_β + 1 · ∂t ∂α u_β = 0 (order 4, residual 2.2

In [9]:
Q = libs[srd.irreps[1]].Q
terms = libs[srd.irreps[1]].terms
opts['scaler'].reset_inds(list(range(len(terms))))

IOI = [9, 26, 23, 35]
print([terms[i] for i in IOI])

[∂α p · u_β, u_α · ∂t u_β, u_α · ∂β ∂γ u_γ, u_γ · u_γ · ∂α u_β]


In [10]:
# coeffs = np.zeros(len(terms))
# coeffs[9] = 1
# coeffs[23] = -1e-4
# coeffs[26] = 1
# coeffs[35] = 1
# opts['scaler'].reset_inds([9, 23, 26, 35])
# reg_result = sparse_reg_bf(Q, **opts)
# #lambd = evaluate_model(Q, coeffs, opts['scaler'], opts['residual'])
# print(reg_result.xi[np.ix_([9, 23, 26, 35])], reg_result.lambd)
# #print(np.linalg.norm(Q[:, 33])/libs[irrep].col_weights[33])
# #print(np.linalg.norm(Q, axis=0)/libs[irrep].col_weights)

In [11]:
i=26
opts['inhomog'] = True
opts['inhomog_col'] = i
opts['verbose'] = False
opts['term_names'] = terms
print(terms[i])
reg_result = sparse_reg_bf(Q, **opts)
print(reg_result.xi, reg_result.lambd)
for i, x in enumerate(reg_result.xi):
    if x!=0:
        print('term', i, '--', reg_result.xi[i], '*', terms[i])

u_α · ∂t u_β
[-0.000e+00 -0.000e+00 -0.000e+00 -0.000e+00 -0.000e+00 -0.000e+00
 -0.000e+00 -0.000e+00 -0.000e+00  1.000e+00 -0.000e+00 -0.000e+00
 -0.000e+00 -0.000e+00 -0.000e+00 -0.000e+00 -0.000e+00 -0.000e+00
 -0.000e+00 -0.000e+00 -0.000e+00 -0.000e+00 -0.000e+00 -0.000e+00
 -0.000e+00 -1.003e-04  1.000e+00 -0.000e+00 -0.000e+00 -0.000e+00
 -0.000e+00 -0.000e+00  1.000e+00 -0.000e+00 -0.000e+00 -0.000e+00] 3.741790564828159e-08
term 9 -- 0.9999821478890607 * ∂α p · u_β
term 25 -- -0.00010031799314277177 * u_α · ∂γ² u_β
term 26 -- 1.0 * u_α · ∂t u_β
term 32 -- 0.9999770493046408 * u_α · u_γ · ∂γ u_β


In [12]:
libs[srd.irreps[1]].terms

[∂α ∂β p,
 ∂t ∂α ∂β p,
 ∂α p · ∂β p,
 p · ∂α ∂β p,
 ∂α u_β,
 ∂α ∂β ∂γ u_γ,
 ∂α ∂γ² u_β,
 ∂t ∂α u_β,
 ∂t² ∂α u_β,
 ∂α p · u_β,
 p · ∂α u_β,
 ∂t ∂α p · u_β,
 ∂t p · ∂α u_β,
 ∂α p · ∂t u_β,
 p · ∂t ∂α u_β,
 p · ∂α p · u_β,
 p · p · ∂α u_β,
 u_α · u_β,
 ∂γ u_α · ∂γ u_β,
 ∂α u_γ · ∂γ u_β,
 ∂α u_β · ∂γ u_γ,
 ∂α u_γ · ∂β u_γ,
 u_γ · ∂α ∂β u_γ,
 u_α · ∂β ∂γ u_γ,
 u_γ · ∂α ∂γ u_β,
 u_α · ∂γ² u_β,
 u_α · ∂t u_β,
 ∂t u_α · ∂t u_β,
 u_α · ∂t² u_β,
 p · u_α · u_β,
 ∂t p · u_α · u_β,
 p · u_α · ∂t u_β,
 u_α · u_γ · ∂γ u_β,
 u_α · u_β · ∂γ u_γ,
 u_α · u_γ · ∂β u_γ,
 u_γ · u_γ · ∂α u_β]

In [13]:
print(srd.scale_dict)
for irrep in srd.irreps:
    Q = srd.libs[irrep].Q
    [U, S, V] = np.linalg.svd(Q)
    print(np.linalg.norm(Q)/max(S))

{'p': {'mean': np.float64(0.08393930372103373), 'std': np.float64(0.08393930372103378)}, 'u': {'mean': np.float64(0.28155178895306154), 'std': np.float64(0.28155178895306104)}}
1.0006648889053673
1.0008522562096887


In [14]:
print([reg_result.sublibrary for reg_result in reg_results])

[[∂α u_β, ∂α ∂γ² u_β, ∂t ∂α u_β, ∂t² ∂α u_β, ∂α p · u_β, p · ∂α u_β, ∂t ∂α p · u_β, ∂t p · ∂α u_β, ∂α p · ∂t u_β, p · ∂t ∂α u_β, p · ∂α p · u_β, p · p · ∂α u_β, ∂α u_γ · ∂γ u_β, ∂α u_β · ∂γ u_γ, u_α · ∂β ∂γ u_γ, u_γ · ∂α ∂γ u_β, u_α · ∂γ² u_β, u_α · ∂t u_β, u_α · ∂t² u_β, p · u_α · ∂t u_β, u_α · u_γ · ∂γ u_β, u_α · u_γ · ∂β u_γ, u_γ · u_γ · ∂α u_β], [∂α u_β, ∂α ∂γ² u_β, ∂t ∂α u_β, ∂t² ∂α u_β, ∂α p · u_β, p · ∂α u_β, ∂t ∂α p · u_β, ∂t p · ∂α u_β, ∂α p · ∂t u_β, p · ∂t ∂α u_β, p · ∂α p · u_β, p · p · ∂α u_β, ∂α u_γ · ∂γ u_β, u_α · ∂β ∂γ u_γ, u_γ · ∂α ∂γ u_β, u_α · ∂γ² u_β, u_α · ∂t u_β, u_α · ∂t² u_β, p · u_α · ∂t u_β, u_α · u_γ · ∂γ u_β, u_α · u_γ · ∂β u_γ, u_γ · u_γ · ∂α u_β], [∂α u_β, ∂α ∂γ² u_β, ∂t ∂α u_β, ∂t² ∂α u_β, ∂α p · u_β, p · ∂α u_β, ∂t ∂α p · u_β, ∂t p · ∂α u_β, ∂α p · ∂t u_β, p · ∂t ∂α u_β, p · ∂α p · u_β, p · p · ∂α u_β, ∂α u_γ · ∂γ u_β, u_α · ∂β ∂γ u_γ, u_γ · ∂α ∂γ u_β, u_α · ∂γ² u_β, u_α · ∂t u_β, u_α · ∂t² u_β, p · u_α · ∂t u_β, u_α · u_γ · ∂γ u_β, u_γ · u_γ · ∂α u_β], 

In [15]:
from library import latexify

for irrep in srd.irreps:
    print(f"IRREP: {irrep}")
    print(latexify(str(srd.libs[irrep].terms)))
    #for term in srd.libs[irrep].terms:
    #    print(latexify(str(term))+",")

IRREP: Antisymmetric rank 2
[\partial_\alpha u_{\beta}, \partial_\alpha \partial_\gamma^2 u_{\beta}, \partial_t \partial_\alpha u_{\beta}, \partial_t^2 \partial_\alpha u_{\beta}, \partial_\alpha p \cdot u_{\beta}, p \cdot \partial_\alpha u_{\beta}, \partial_t \partial_\alpha p \cdot u_{\beta}, \partial_t p \cdot \partial_\alpha u_{\beta}, \partial_\alpha p \cdot \partial_t u_{\beta}, p \cdot \partial_t \partial_\alpha u_{\beta}, p \cdot \partial_\alpha p \cdot u_{\beta}, p \cdot p \cdot \partial_\alpha u_{\beta}, \partial_\alpha u_{\gamma} \cdot \partial_\gamma u_{\beta}, \partial_\alpha u_{\beta} \cdot \partial_\gamma u_{\gamma}, u_{\alpha} \cdot \partial_\beta \partial_\gamma u_{\gamma}, u_{\gamma} \cdot \partial_\alpha \partial_\gamma u_{\beta}, u_{\alpha} \cdot \partial_\gamma^2 u_{\beta}, u_{\alpha} \cdot \partial_t u_{\beta}, u_{\alpha} \cdot \partial_t^2 u_{\beta}, p \cdot u_{\alpha} \cdot \partial_t u_{\beta}, u_{\alpha} \cdot u_{\gamma} \cdot \partial_\gamma u_{\beta}, u_{\alp

In [16]:
U, S, V = np.linalg.svd(Q)
print(S/S[0])
print("First", V[:, -1])
print("Second", V[:, -2])
print("Third", V[:, -3])
print("Fourth", V[:, -4])

[1.000e+00 3.496e-02 1.796e-02 9.287e-03 7.832e-03 2.371e-03 1.979e-03
 1.558e-03 5.786e-04 4.623e-04 3.948e-04 2.132e-04 1.761e-04 1.575e-04
 1.220e-04 9.660e-05 7.583e-05 5.281e-05 4.970e-05 2.811e-05 2.247e-05
 1.613e-05 7.714e-06 6.403e-06 3.913e-06 3.244e-06 1.880e-08 9.168e-09
 6.074e-09 4.760e-09 5.562e-10 9.217e-11 9.866e-17 9.866e-17 9.866e-17
 9.866e-17]
First [ 1.817e-04 -1.271e-03 -3.665e-03 -3.028e-04 -6.459e-03 -2.551e-02
  9.911e-02  6.915e-02 -2.605e-02  5.223e-02  5.496e-02  6.492e-01
 -2.097e-01 -7.519e-02 -9.559e-02 -1.248e-03  3.059e-01  1.010e-01
  4.931e-02  1.232e-01 -2.592e-02 -3.310e-03  1.778e-02 -1.732e-02
  8.246e-03 -1.627e-02 -5.380e-02 -1.946e-01  2.605e-02  3.349e-02
 -5.287e-03  2.870e-01 -2.738e-02  2.863e-02 -4.896e-01  9.315e-02]
Second [ 1.385e-04  2.003e-03 -2.843e-03 -4.761e-04  1.284e-03 -1.858e-02
  4.685e-02  4.409e-02 -4.749e-04 -6.016e-02  2.323e-02  5.826e-01
  3.746e-01  1.490e-01 -1.019e-01  1.948e-01  2.318e-01 -8.928e-02
  1.516e-02  6.1