# Part 3: Take the priors calculated in Part 2 and build the MCMC chains for the TVC02 sites

Benoit Montpetit, CPS/CRD/ECCC, 2024  
Julien Meloche, CPS/CRD/ECCC, 2024  
Mike Brady, CPS/CRD/ECCC, 2024  

This notebook builds the MCMC model architecture around the Snow Microwave Radiative Transfer model ([SMRT](https://github.com/smrt-model/smrt); [Picard et al., 2018](https://gmd.copernicus.org/articles/11/2763/2018/)) and runs it for the Trail Valley Creek, January campaign sites ([Montpetit et al., 2024](https://egusphere.copernicus.org/preprints/2024/egusphere-2024-651/)), using the priors built from the radar equivalent snowpacks of SVS-2

In [1]:
from dask_jobqueue import PBSCluster
from dask.distributed import Client
import dask

In [2]:
cluster = PBSCluster()
cluster

0,1
Dashboard: http://10.102.8.51:42732/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.8.51:22977,Workers: 0
Dashboard: http://10.102.8.51:42732/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [3]:
# let's use 5 worker nodes for now
cluster.scale(jobs=10)

In [4]:
cluster

0,1
Dashboard: http://10.102.8.51:42732/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.8.51:22977,Workers: 0
Dashboard: http://10.102.8.51:42732/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [5]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: http://10.102.8.51:42732/status,

0,1
Dashboard: http://10.102.8.51:42732/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.8.51:22977,Workers: 0
Dashboard: http://10.102.8.51:42732/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [6]:
import os
os.environ['PYTENSOR_FLAGS']='blas__ldflags=-lmkl -lguide -lpthread, optimizer=fast_compile, exception_verbosity=high'
print(os.environ['PYTENSOR_FLAGS'])

import arviz as az
import numpy as np
import pymc as pm
import pytensor.tensor as pt
from pytensor.graph import Apply, Op
from pytensor import dprint
from matplotlib import pyplot as plt
import pytensor
import xarray as xr

print(f"Running on PyMC v{pm.__version__}")

%config InlineBackend.figure_format = 'retina'
az.style.use("arviz-darkgrid")

'''
Imports for running SMRT
'''
import pandas as pd
from smrt.core.globalconstants import PERMITTIVITY_OF_AIR
from smrt import sensor_list, make_model, make_snowpack, make_soil

from datetime import date

blas__ldflags=-lmkl -lguide -lpthread, optimizer=fast_compile, exception_verbosity=high
Running on PyMC v5.16.2


In [7]:
%config InlineBackend.figure_format = 'retina'
az.style.use("arviz-darkgrid")

In [8]:
from typing import Optional, Tuple

In [9]:
from constants import TVC02

sites = pd.DataFrame({'site':TVC02})
sites.replace({'RS':'RP'}, regex=True, inplace=True)
sites=list(sites.site.values)
sites.remove('SC02')

In [10]:
today = date.today()

N=1000
RANDOM_SEED = 58
rng = np.random.default_rng(RANDOM_SEED)
num_draws=5000
num_chains=7
num_cores=56
num_tunes=1000

In [11]:
#Initialize the IBA/DORT SMRT model
smrt_model = make_model("iba", "dort", rtsolver_options = {'error_handling':'nan', 'phase_normalization' : True, 
                                                           'diagonalization_method':'shur_forcedtriu'})

In [12]:
"""
Loading site specific data
"""
@dask.delayed
def load_site_date(site):

    df_sig0 = pd.read_pickle('/space/hall5/sitestore/eccc/crd/ccrp/users/rbm001/TVC_MCMC/data/UMass_TVC18-19_DB.pkl')

    #Optimized parameters of Montpetit et al., 2024
    df_params = pd.read_json('/space/hall5/sitestore/eccc/crd/ccrp/users/rbm001/TVC_MCMC/data/TVC_Ku-Band_MedSnowpit.json')
    
    #Snowpit data
    df_snow = pd.read_json('/space/hall5/sitestore/eccc/crd/ccrp/users/rbm001/TVC_MCMC/data/df_stat_pits.json')

    # UMASS KuBand data
    # df_sig0 = pd.read_pickle('../../data/UMass_TVC18-19_DB.pkl')
    df_sig0 = df_sig0.loc[(df_sig0.site_id==site) & (df_sig0.inc_mean<50) & (df_sig0.inc_mean>25)]
    df_sig0 = df_sig0.sample(n=4, weights=df_sig0.inc_mean)

    sig0_obs = 10*np.log10(df_sig0['slc0_sig0_filt'].values)
    
    #Optimized parameters of Montpetit et al., 2024
    # df_params = pd.read_json('../../data/TVC_Ku-Band_MedSnowpit.json')
    df_params = df_params.loc[df_params.site==site]
    epsrs = np.array([2.41, 3.82])
    epsr = epsrs[np.abs(epsrs-df_params.epsr_ku.values).argmin()]
    
    #Snowpit data
    # df_snow = pd.read_json('../../data/df_stat_pits.json')
    temperatures = df_snow[site].temperature        

    return np.array(temperatures), np.array(sig0_obs), np.array([epsr]), np.array(df_sig0.inc_mean.values)

In [13]:

"""
SMRT runner function
"""
@dask.delayed
def smrtSim(thickness_r, thickness_h, density_r, density_h, ssa_r, ssa_h, sigma, 
            inc_ang,#inc_ang1, inc_ang2, inc_ang3, inc_ang4, 
            temperature_r, temperature_h, epsr, sig0_obs):

    sensor  = sensor_list.active(13.285e9, inc_ang)

    sub = make_soil('geometrical_optics', 
                    permittivity_model = complex(epsr[0],0.74), 
                    mean_square_slope=0.01, 
                    temperature = temperature_h[0])

    # Creating the snowpack to simulate with the substrate
    sp = make_snowpack(thickness=np.array([thickness_r[0],thickness_h[0]]), 
                       microstructure_model='exponential',
                       density= [density_r[0], density_h[0]],
                       temperature= [temperature_r[0], temperature_h[0]],
                       ice_permittivity_model=None,
                       background_permittivity_model=PERMITTIVITY_OF_AIR,
                       liquid_water=0, salinity=0, 
                       corr_length = [0.74*4*(1-density_r[0]/917)/917/ssa_r[0],1.11*4*(1-density_h[0]/917)/917/ssa_h[0]],
                       substrate = sub)
    
    sigma_nought = smrt_model.run(sensor, sp)    

    #Returns the log-normal difference between the observations and the simulations
    return -0.5 * ((sig0_obs - sigma_nought.sigmaVV_dB()) / sigma) ** 2 - np.log(np.sqrt(2 * np.pi)) - np.log(sigma)
    

In [14]:

"""
PyTensor SMRT Class Operation
"""

# define a pytensor Op for our likelihood function
class SMRT(Op):
    @dask.delayed
    def make_node(self, h_r, h_h, density_r, density_h,
                  ssa_r, ssa_h, sigma, inc_ang,#inc_ang1,inc_ang2,inc_ang3,inc_ang4, 
                  temperature_r, temperature_h,epsr, data) -> Apply: 
        h_r=pt.as_tensor(h_r)
        h_h=pt.as_tensor(h_h)
        density_r=pt.as_tensor(density_r)
        density_h=pt.as_tensor(density_h)
        ssa_r=pt.as_tensor(ssa_r)
        ssa_h=pt.as_tensor(ssa_h)
        sigma=pt.as_tensor(sigma)
        # inc_ang1=pt.as_tensor(inc_ang1)
        # inc_ang2=pt.as_tensor(inc_ang2)
        # inc_ang3=pt.as_tensor(inc_ang3)
        # inc_ang4=pt.as_tensor(inc_ang4)
        inc_ang=pt.as_tensor(inc_ang)
        temperature_r=pt.as_tensor(temperature_r)
        temperature_h=pt.as_tensor(temperature_h)
        epsr=pt.as_tensor(epsr)
        data=pt.as_tensor(data)        

        inputs = [h_r, h_h, 
                  density_r, density_h,
                  ssa_r, ssa_h, 
                  sigma, 
                  inc_ang,
                  # inc_ang1,inc_ang2,inc_ang3,inc_ang4, 
                  temperature_r,temperature_h, epsr,
                  data,  
                 ]

        outputs = [data.type()]

        
        return Apply(self, inputs, outputs)
    @dask.delayed
    def perform(self, node: Apply, inputs: list[np.ndarray], outputs: list[list[None]]) -> None:
        
        # the method that is used when calling the Op
        h_r, h_h, density_r, density_h, ssa_r, ssa_h, sigma, inc_ang,temperature_r, temperature_h, epsr, sig0_obs = inputs

        # call the smrt function
        logl = smrtSim(h_r, h_h, density_r, density_h, ssa_r, ssa_h, sigma, inc_ang, temperature_r, temperature_h, epsr, sig0_obs)

        outputs[0][0] = np.asarray(logl)
        

In [15]:


"""
Randomizer function to generate prior predictives
"""
@dask.delayed
def random(
    h_r: np.ndarray | float,
    h_h: np.ndarray | float,
    density_r: np.ndarray | float,
    density_h: np.ndarray | float,
    ssa_r: np.ndarray | float,
    ssa_h: np.ndarray | float,
    sigma: np.ndarray | float,
    inc_ang: np.ndarray | float,
    # inc_ang1: np.ndarray | float,
    # inc_ang2: np.ndarray | float,
    # inc_ang3: np.ndarray | float,
    # inc_ang4: np.ndarray | float,
    temperature_r: np.ndarray | float,
    temperature_h: np.ndarray | float,
    epsr: np.ndarray | float,
    rng: Optional[np.random.Generator] = None,
    size : Optional[Tuple[int]]=None,
) -> list[np.ndarray] | float :
    return [rng.normal(loc=h_r, scale=1, size=size),
            rng.normal(loc=h_h, scale=1, size=size),
            rng.normal(loc=density_r, scale=1, size=size),
            rng.normal(loc=density_h, scale=1, size=size),
            rng.normal(loc=ssa_r, scale=1, size=size),
            rng.normal(loc=ssa_h, scale=1, size=size),
            rng.normal(loc=sigma, scale=1, size=size)]

"""
Custom distribution function for PyMC to call our Log-Like function
"""    
@dask.delayed
def custom_dist_loglike(sig0_obs, h_snow, h_frac, 
                        density_r, density_h,
                        ssa_r, ssa_h, sigma, 
                        inc_ang,
                        # inc_ang1,inc_ang2,inc_ang3,inc_ang4, 
                        temperature_r, temperature_h, epsr): 
    return sig0_sim(h_snow, h_frac,density_r, density_h,
                ssa_r, ssa_h, sigma, inc_ang,
                    # inc_ang1,inc_ang2,inc_ang3,inc_ang4, 
                  temperature_r, temperature_h, epsr, sig0_obs)


In [16]:
sig0_sim = SMRT()

In [19]:
@dask.delayed
def run_mcmc(site):

    """
    Run MCMC
    """

    temperatures, sig0_obs, epsr, inc_ang = load_site_date(site)

    # print(temperatures, sig0_obs, np.array([epsr]), inc_ang)
    
    outname = f'../../data/MCMC_Output_{site}_{today.strftime('%Y%m%d')}_{len(sig0_obs)}obs_Arctic_test'



    priors = xr.open_dataset('/space/hall5/sitestore/eccc/crd/ccrp/users/rbm001/TVC_MCMC/data/SVS-2_ArcticPriors.nc')

    # use PyMC to sample from log-likelihood
    with pm.Model() as mcmc_model:
    
        #Woolley et al., 2024
        h_r = pm.TruncatedNormal("Thickness_R", mu=priors.sel(property='thickness',grain_type='R')['mean'].values, 
                                 sigma=priors.sel(property='thickness',grain_type='R')['std'].values,
                                 lower=priors.sel(property='thickness',grain_type='R')['min'].values, 
                                 upper=priors.sel(property='thickness',grain_type='R')['max'].values)
        h_h = pm.TruncatedNormal("Thickness_H", mu=priors.sel(property='thickness',grain_type='H')['mean'].values, 
                                 sigma=priors.sel(property='thickness',grain_type='H')['std'].values, 
                                 lower=priors.sel(property='thickness',grain_type='H')['min'].values, 
                                 upper=priors.sel(property='thickness',grain_type='H')['max'].values)
        density_r = pm.TruncatedNormal('Density_R', mu=priors.sel(property='density',grain_type='R')['mean'].values, 
                                       sigma=priors.sel(property='density',grain_type='R')['std'].values, 
                                       lower=priors.sel(property='density',grain_type='R')['min'].values, 
                                       upper=priors.sel(property='density',grain_type='R')['max'].values)
        density_h = pm.TruncatedNormal('Density_H', mu=priors.sel(property='density',grain_type='H')['mean'].values, 
                                       sigma=priors.sel(property='density',grain_type='H')['std'].values, 
                                       lower=priors.sel(property='density',grain_type='H')['min'].values, 
                                       upper=priors.sel(property='density',grain_type='H')['max'].values)
        ssa_r = pm.TruncatedNormal('SSA_R', mu=priors.sel(property='ssa',grain_type='R')['mean'].values, 
                                   sigma=3*priors.sel(property='ssa',grain_type='R')['std'].values, 
                                   lower=priors.sel(property='ssa',grain_type='R')['min'].values, 
                                   upper=priors.sel(property='ssa',grain_type='R')['max'].values)
        ssa_h = pm.TruncatedNormal('SSA_H', mu=priors.sel(property='ssa',grain_type='H')['mean'].values, 
                                   sigma=3*priors.sel(property='ssa',grain_type='H')['std'].values, 
                                   lower=priors.sel(property='ssa',grain_type='H')['min'].values, 
                                   upper=priors.sel(property='ssa',grain_type='H')['max'].values)
        sigma = pm.TruncatedNormal('Sigma', mu=1, sigma=1, lower=0.3, upper=2)
    
        likelihood = pm.CustomDist('likelihood', h_r, h_h, 
                                              density_r, density_h,
                                              ssa_r, ssa_h,
                                              sigma,
                                              inc_ang,
                                              # inc_ang[0],inc_ang[1],inc_ang[2],inc_ang[3], 
                                              temperatures[0], temperatures[1], epsr,
                                              observed=sig0_obs, logp=custom_dist_loglike, random=random)
        # idata=pm.sample_prior_predictive(samples=num_tunes)

        idata=pm.sample(draws=num_draws,tune=num_tunes,chains=num_chains,cores=1, step=pm.step_methods.DEMetropolisZ(), blas_cores=num_cores, discard_tuned_samples=False)      
    
    idata.to_netcdf(outname + '.nc')

In [20]:
mcmc_runs=[]

for site in sites:

    mcmc_runs.append(run_mcmc(site))

In [21]:
dask.compute(mcmc_runs)

TypeError: Delayed objects of unspecified length are not iterable