In [1]:
%matplotlib notebook
%load_ext autoreload
%autoreload 1
!hostname
!pwd

dv004.bridges2.psc.edu
/ocean/projects/asc170022p/mtragoza/mre-pinn/notebooks


In [2]:
import sys, os
import numpy as np

sys.path.append('..')
%aimport mre_pinn

sys.path.append('../../param_search')
%aimport param_search
ps = param_search

Using backend: pytorch



# FEM baseline evaluations

## Mesh, element, and PDE settings



In [37]:
# define the job template and name format

template = '''\
#!/bin/bash
#SBATCH --job-name={job_name}
#SBATCH --account=bio170034p
#SBATCH --partition=BatComputer
#SBATCH --gres=gpu:rtx5000:1
#SBATCH --time=48:00:00
#SBATCH -o %J.stdout
#SBATCH -e %J.stderr
#SBATCH --mail-type=all

hostname
pwd
module load MPI
source activate MRE-PINN

python ../../../fem.py \\
    --data_root ../../../data/BIOQIC \\
    --data_name fem_box \\
    --frequency {frequency} \\
    --xyz_slice {xyz_slice} \\
    --pde_name {pde_name} \\
    --u_elem_type {u_elem_type} \\
    --mu_elem_type {mu_elem_type} \\
    --align_nodes {align_nodes} \\
    --savgol_filter {savgol_filter} \\
    --save_prefix {job_name}
'''
name = 'fem_{frequency}_{xyz_slice}_{pde_name}_{u_elem_type}_{mu_elem_type}_{align_nodes}_{savgol_filter}'

# define the parameter space

param_space = ps.ParamSpace(
    frequency=[50, 60, 70, 80, 90, 100],
    xyz_slice=['2D'],
    pde_name=['helmholtz', 'hetero'],
    u_elem_type=['CG-1', 'CG-2'],
    mu_elem_type=['CG-1', 'DG-1', 'DG-0'],
    align_nodes=[1],
    savgol_filter=[0, 1]
)

len(param_space)

144

In [38]:
%autoreload
expt_name = '2022-09-09_FEM'

jobs = ps.submit(template, name, param_space, work_dir=expt_name, verbose=True)
jobs.to_csv(f'{expt_name}.jobs')

#jobs = pd.read_csv(f'{expt_name}.jobs')

100%|██████████| 144/144 [00:01<00:00, 139.74it/s]
[10942729, 10942730, 10942731, 10942732, 10942733, 10942734, 10942735, 10942736, 10942737, 10942738, 10942739, 10942740, 10942741, 10942742, 10942743, 10942744, 10942745, 10942746, 10942747, 10942748, 10942749, 10942750, 10942751, 10942752, 10942753, 10942754, 10942755, 10942756, 10942757, 10942758, 10942759, 10942760, 10942761, 10942762, 10942763, 10942764, 10942765, 10942766, 10942767, 10942768, 10942769, 10942770, 10942771, 10942772, 10942773, 10942774, 10942775, 10942776, 10942777, 10942778, 10942779, 10942780, 10942781, 10942782, 10942783, 10942784, 10942785, 10942786, 10942787, 10942788, 10942789, 10942790, 10942791, 10942792, 10942793, 10942794, 10942795, 10942796, 10942797, 10942798, 10942799, 10942800, 10942801, 10942802, 10942803, 10942804, 10942805, 10942806, 10942807, 10942808, 10942809, 10942810, 10942811, 10942812, 10942813, 10942814, 10942815, 10942816, 10942817, 10942818, 10942819, 10942820, 10942821, 10942822, 10942823

In [30]:
status_cols = ['job_name', 'job_state', 'node_id', 'runtime', 'stdout', 'stderr']
ps.status(jobs)[status_cols]

Unnamed: 0_level_0,job_name,job_state,node_id,runtime,stdout,stderr
job_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10942588,fem_50_2D_helmholtz_CG-2_CG-1_1_1,,,,[240 rows x 3 columns],Lmod has detected the following error: The fol...
10942589,fem_50_2D_hetero_CG-2_CG-1_1_1,,,,[240 rows x 3 columns],Lmod has detected the following error: The fol...


In [36]:
metrics = ps.metrics(jobs)

# did all models train to 25k iterations?
assert (metrics.groupby('job_name')['iteration'].max() == 0).all()

# get the final test evaluations
metrics = metrics[metrics.iteration == 0]

param_cols = ['frequency', 'pde_name', 'u_elem_type', 'mu_elem_type', 'align_nodes', 'savgol_filter'] # experimental parameters
index_cols = ['variable_name', 'spatial_frequency_bin', 'spatial_region'] # metric identifiers
metric_cols = ['mean_squared_abs_value', 'power_density', 'mean_abs_value'] # metric values

var_name_level = len(param_cols) + index_cols.index('variable_name')
metrics = metrics.groupby(param_cols + index_cols, sort=False)[metric_cols].mean().unstack(level=[var_name_level])

def metric_map(t):
    metric_name, var_name = t
    metric_name = {
        'mean_squared_abs_value': 'MSAV',
        'mean_abs_value': 'MAV',
        'power_density': 'SPD'
    }[metric_name]
    new_col_name = f'{var_name}_{metric_name}'
    new_col_name = new_col_name.replace('diff_MSAV', 'pred_MSAE')
    new_col_name = new_col_name.replace('f_sum_MSAV', 'PDE_MSAE')
    new_col_name = new_col_name.replace('diff_MAV', 'pred_MAD')
    return new_col_name

metrics.columns = [metric_map(t) for t in metrics.columns.to_flat_index()]
metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,u_pred_MSAV,u_pred_MSAE,u_true_MSAV,lu_pred_MSAV,lu_pred_MSAE,Lu_pred_MSAV,f_trac_MSAV,PDE_MSAE,f_body_MSAV,mu_pred_MSAV,...,lu_pred_MAD,Lu_pred_MAV,f_trac_MAV,f_sum_MAV,f_body_MAV,mu_pred_MAV,mu_pred_MAD,mu_true_MAV,Mu_MAV,Mu_pred_MAD
frequency,pde_name,u_elem_type,mu_elem_type,align_nodes,savgol_filter,spatial_frequency_bin,spatial_region,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
50,helmholtz,CG-2,CG-1,1,1,all,all,1.556419e-10,4.383688e-16,1.556485e-10,0.0,0.161991,0.161991,0.0,0.0,0.0,15143990.0,...,,,,,,,,,,
50,helmholtz,CG-2,CG-1,1,1,all,0,,,,,,,,,,,...,0.033074,0.033074,0.0,0.0,0.0,2891.545379,514.550279,3000.0,3063.138853,974.909795
50,helmholtz,CG-2,CG-1,1,1,all,1,,,,,,,,,,,...,0.057061,0.057061,0.0,0.0,0.0,9856.299687,2939.706052,8904.931533,6491.038252,2711.02845
50,helmholtz,CG-2,CG-1,1,1,all,2,,,,,,,,,,,...,0.023534,0.023534,0.0,0.0,0.0,9512.630316,4390.645655,6519.868981,4310.24912,2422.943163
50,helmholtz,CG-2,CG-1,1,1,all,3,,,,,,,,,,,...,0.028201,0.028201,0.0,0.0,0.0,2591.294578,2775.70177,4229.971973,1646.857476,3286.711406
50,helmholtz,CG-2,CG-1,1,1,all,4,,,,,,,,,,,...,0.019453,0.019453,0.0,0.0,0.0,2887.84365,1258.084154,3263.802967,3776.386781,6419.128227
50,helmholtz,CG-2,CG-1,1,1,1.0,all,,,,,,,,,,,...,,,,,,,,,,
50,helmholtz,CG-2,CG-1,1,1,2.0,all,,,,,,,,,,,...,,,,,,,,,,
50,helmholtz,CG-2,CG-1,1,1,3.0,all,,,,,,,,,,,...,,,,,,,,,,
50,helmholtz,CG-2,CG-1,1,1,4.0,all,,,,,,,,,,,...,,,,,,,,,,


In [None]:
fig = ps.plot(
    metrics.reset_index(),
    x=param_cols,
    y=['u_pred_MSAE'],
    height=3,
    width=2.25,
    legend=False,
    tight=True
)

Pretty much all models fit the wave field to a very low error.

In [None]:
fig = ps.plot(
    metrics.reset_index(),
    x=param_cols,
    y=['PDE_MSAE'],
    height=3,
    width=2.5,
    legend=False,
    tight=True
)

There is much more variance in minimizing the PDE residual, and no clear trends jump out at first.

In [None]:
fig = ps.plot(
    metrics.reset_index(),
    x=param_cols,
    y=['PDE_MSAE'],
    hue='pde_name',
    height=4, width=2.5,
    tight=True
)

Overall it seems that the Helmholtz PDE residual was easier to minimize than the heterogeneous PDE. There also appears to be a trend where the residuals are lower (for both PDEs) when omega0 is higher.

In [None]:
fig = ps.plot(
    metrics.reset_index(),
    x=param_cols,
    y=['mu_pred_MAD'],
    height=8, width=3,
    tight=True
)

Here we are looking at the median absolute deviation of the predict stiffness in each of the regions. There is a very clear signal in the PDE name plot: Using the heterogeneous PDE results in lower error in the predicted stiffness per region, compared to the Helmholtz PDE. There may be a trend in the omeg0 and activ_fn plots as well, but it's less clear.

In [None]:
fig = ps.plot(
    metrics.reset_index(),
    x=param_cols,
    y=['mu_pred_MAD'],
    hue='pde_name',
    height=4, width=3,
    tight=True
)
fig.savefig('1d_experiment_mu_pred_MAD.png', dpi=300, bbox_inches='tight')

This plot emphasizes a clear and statistically significant signal from using the heterogeneous PDE instead of the Helmholtz PDE, resulting in more accurate predicted stiffness.

In [None]:
m = metrics.reset_index()
m['Mean % mu error (by region)'] = m['mu_pred_MAD'] / m['mu_true_MAV'] * 100

fig = ps.plot(
    m,
    x=param_cols,
    y=['Mean % mu error (by region)'],
    hue='pde_name',
    height=4, width=3,
    tight=True
)
fig.savefig('1d_experiment_mu_pred_MAD_relative.png', dpi=300, bbox_inches='tight')

In [None]:
agg = metrics.reset_index().groupby(param_cols).mean()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

fig = sns.lmplot(data=agg.reset_index(), x='PDE_MSAE', y='mu_pred_MAD', hue='pde_name')
fig.savefig('1d_experiment_mu_pred_MAD_corr.png', dpi=200, bbox_inches='tight')

Even though using the heterogeneous PDE instead of Helmholtz consistently improves reconstruction quality, there is not a clear correlation between the PDE residual and the reconstruction quality. If anything, the Helmholtz residual is more strongly correlated with mu error, even though mu error is higher in absolute terms.

In [None]:
m = metrics.reset_index()

fig = ps.plot(
    m[m.pde_name == 'hetero'].copy(),
    x=param_cols[1:],
    y=['mu_pred_MAD'],
    height=5.5, width=3,
    tight=True
)

There does not seem to be a relationship with the PDE distribution. So even though I was correct that the Helmholtz PDE seems worse then heterogeneous, the reason why does not seem to be related to the domain sampling distribution.