# Part 2: Take Land Surface Model multi-layered snow profiles and reduce them to Microwave Equivalent Snowpacks

Benoit Montpetit, CPS/CRD/ECCC, 2024  
Julien Meloche, CPS/CRD/ECCC, 2024  
Mike Brady, CPS/CRD/ECCC, 2024  

This notebook takes multi-layered snowpacks from land surface models and aggregates the layers to a minimum number of layers relevant to microwave radiative transfer without compromising the geophysical properties of the snowpack.  
The methodology was developed by [Meloche et al. (2025)](https://doi.org/10.5194/tc-19-2949-2025).  
The input data originally comes from the Soil Vegetation Snow version 2 Land Surface Model [(SVS-2; Vionnet et al,, In Prep)](NoLink) improved for Arctic snowpacks by [Woolley et al (2024)](https://doi.org/10.5194/tc-18-5685-2024)

In [None]:
# custom functions defined in res_functions.py
from res_functions import compute_ke, avg_snow_sum_thick

In [None]:
from pathlib import Path
from sklearn.cluster import KMeans
import xarray as xr
import numpy as np
import pandas as pd

In [None]:
DATA_DIR = Path('../Data')

# Process all Arctic Ensembles

In [None]:
# Load relevant Arctic SVS-2 data to TVC Exp 2018/19.
svs_arctic = xr.open_dataset(DATA_DIR / 'SVS-2_ArcticEnsembles_TVC02.nc')

In [None]:
svs_arctic

In [None]:
times = []
layers = []
ensembles = []

for i in range(svs_arctic.sizes['ensemble']):
    svs_arctic_temp = svs_arctic.isel(ensemble=i)[['SNODEN_ML','SNOMA_ML','SNODP','SNODOPT_ML','TSNOW_ML','SNOTYPE_ML']].to_dataframe().dropna()
    if not svs_arctic_temp.empty:
        for cur_date in svs_arctic_temp.index.get_level_values(0).unique():
            for j in range(2):
                ensembles.append(i)
                times.append(cur_date)
                layers.append(j)

indexes = pd.MultiIndex.from_arrays([ensembles,times,layers], names=('ensemble', 'time', 'snow_layer'))
svs_arctic_merged = pd.DataFrame({}, index=indexes)

In [None]:
i=1
svs_arctic_temp=svs_arctic.isel(ensemble=i)[['SNODEN_ML','SNOMA_ML','SNODP','SNODOPT_ML','TSNOW_ML','SNOTYPE_ML']].to_dataframe().dropna()
svs_arctic_temp['ssa'] = svs_arctic_temp['SNODOPT_ML'].apply(lambda x : 6./(x*917) if x > 0 else np.nan)
svs_arctic_temp = svs_arctic_temp.dropna()
svs_arctic_temp['thickness'] = svs_arctic_temp[['SNODEN_ML','SNOMA_ML']].apply(lambda x : x.iloc[1] / x.iloc[0], axis = 1)
for cur_date in svs_arctic_temp.index.get_level_values(0).unique():
    svs_arctic_temp.loc[cur_date,'height'] = np.cumsum(svs_arctic_temp.loc[cur_date].thickness.values[::-1])[::-1]

    X = pd.DataFrame({ 'ke' : compute_ke(svs_arctic_temp.loc[cur_date,['thickness', 'TSNOW_ML', 'SNODEN_ML', 'ssa', 'height']]), 
                       'height' : svs_arctic_temp.loc[cur_date].height})
    kmeans = KMeans(n_clusters=2, random_state=0, n_init="auto").fit(X)
    svs_arctic_temp.loc[cur_date,'label'] = kmeans.labels_

svs_arctic_merged = svs_arctic_merged.join(svs_arctic_temp.loc[cur_date].groupby('label', 
                                                                                 sort = False).apply(lambda x: avg_snow_sum_thick(x, method = 'thick'),
                                                                                                     include_groups=False).rename_axis('snow_layer'))

In [None]:
for i in range(2, svs_arctic.sizes['ensemble']):

    svs_arctic_temp=svs_arctic.isel(ensemble=i)[['SNODEN_ML','SNOMA_ML','SNODP','SNODOPT_ML','TSNOW_ML','SNOTYPE_ML']].to_dataframe().dropna()
    if not svs_arctic_temp.empty:
        svs_arctic_temp['ssa'] = svs_arctic_temp['SNODOPT_ML'].apply(lambda x : 6./(x*917) if x > 0 else np.nan)
        svs_arctic_temp = svs_arctic_temp.dropna()
        svs_arctic_temp['thickness'] = svs_arctic_temp[['SNODEN_ML','SNOMA_ML']].apply(lambda x : x.iloc[1] / x.iloc[0], axis = 1)
    
    
        for cur_date in svs_arctic_temp.index.get_level_values(0).unique():
            svs_arctic_temp.loc[cur_date,'height'] = np.cumsum(svs_arctic_temp.loc[cur_date].thickness.values[::-1])[::-1]

            X = pd.DataFrame({ 'ke' : compute_ke(svs_arctic_temp.loc[cur_date,['thickness', 'TSNOW_ML', 'SNODEN_ML', 'ssa', 'height']]), 
                               'height' : svs_arctic_temp.loc[cur_date].height})
            kmeans = KMeans(n_clusters=2, random_state=0, n_init="auto").fit(X)
            svs_arctic_temp.loc[cur_date,'label'] = kmeans.labels_

            svs_arctic_merged.loc[i,cur_date] = svs_arctic_temp.loc[cur_date].groupby('label', 
                                                                               sort = False).apply(lambda x: avg_snow_sum_thick(x, method = 'thick'), 
                                                                                                   include_groups=False).rename_axis('snow_layer').values

In [None]:
svs_arctic_res = xr.Dataset.from_dataframe(svs_arctic_merged)

In [None]:
svs_arctic_res.to_netcdf(DATA_DIR / 'SVS-2_ArcticEnsembles_TVC02_MES.nc')

# Process the defaults outputs

In [None]:
# Load relevant Default SVS-2 data to TVC Exp 2018/19.
svs_default = xr.open_dataset(DATA_DIR / 'SVS-2_DefaultEnsembles_TVC02.nc')

In [None]:
times = []
layers = []
ensembles = []

for i in range(svs_default.sizes['ensemble']):
    svs_default_temp = svs_default.isel(ensemble=i)[['SNODEN_ML','SNOMA_ML','SNODP','SNODOPT_ML','TSNOW_ML','SNOTYPE_ML']].to_dataframe().dropna()
    if not svs_default_temp.empty:
        for cur_date in svs_default_temp.index.get_level_values(0).unique():
            for j in range(2):
                ensembles.append(i)
                times.append(cur_date)
                layers.append(j)

indexes = pd.MultiIndex.from_arrays([ensembles,times,layers], names=('ensemble', 'time', 'snow_layer'))
svs_default_merged = pd.DataFrame({}, index=indexes)

In [None]:
i=1
svs_default_temp=svs_default.isel(ensemble=i)[['SNODEN_ML','SNOMA_ML','SNODP','SNODOPT_ML','TSNOW_ML','SNOTYPE_ML']].to_dataframe().dropna()
svs_default_temp['ssa'] = svs_default_temp['SNODOPT_ML'].apply(lambda x : 6./(x*917) if x > 0 else np.nan)
svs_default_temp=svs_default_temp.dropna()
svs_default_temp['thickness'] = svs_default_temp[['SNODEN_ML','SNOMA_ML']].apply(lambda x : x.iloc[1] / x.iloc[0], axis = 1)
for cur_date in svs_default_temp.index.get_level_values(0).unique():
    svs_default_temp.loc[cur_date,'height'] = np.cumsum(svs_default_temp.loc[cur_date].thickness.values[::-1])[::-1]

    X = pd.DataFrame({ 'ke' : compute_ke(svs_default_temp.loc[cur_date,['thickness', 'TSNOW_ML', 'SNODEN_ML', 'ssa', 'height']]), 
                       'height' : svs_default_temp.loc[cur_date].height})
    kmeans = KMeans(n_clusters=2, random_state=0, n_init="auto").fit(X)
    svs_default_temp.loc[cur_date,'label'] = kmeans.labels_

svs_default_merged = svs_default_merged.join(svs_default_temp.loc[cur_date].groupby('label', 
                                                                                 sort = False).apply(lambda x: avg_snow_sum_thick(x, method = 'thick'),
                                                                                                     include_groups=False).rename_axis('snow_layer'))

In [None]:
for i in range(2, svs_default.sizes['ensemble']):

    svs_default_temp=svs_default.isel(ensemble=i)[['SNODEN_ML','SNOMA_ML','SNODP','SNODOPT_ML','TSNOW_ML','SNOTYPE_ML']].to_dataframe().dropna()
    if not svs_default_temp.empty:
        svs_default_temp['ssa'] = svs_default_temp['SNODOPT_ML'].apply(lambda x : 6./(x*917) if x > 0 else np.nan)
        svs_default_temp=svs_default_temp.dropna()
        svs_default_temp['thickness'] = svs_default_temp[['SNODEN_ML','SNOMA_ML']].apply(lambda x : x.iloc[1] / x.iloc[0], axis = 1)
    
    
        for cur_date in svs_default_temp.index.get_level_values(0).unique():
            svs_default_temp.loc[cur_date,'height'] = np.cumsum(svs_default_temp.loc[cur_date].thickness.values[::-1])[::-1]

            X = pd.DataFrame({ 'ke' : compute_ke(svs_default_temp.loc[cur_date,['thickness', 'TSNOW_ML', 'SNODEN_ML', 'ssa', 'height']]), 
                               'height' : svs_default_temp.loc[cur_date].height})
            kmeans = KMeans(n_clusters=2, random_state=0, n_init="auto").fit(X)
            svs_default_temp.loc[cur_date,'label'] = kmeans.labels_

            svs_default_merged.loc[i,cur_date] = svs_default_temp.loc[cur_date].groupby('label', 
                                                                               sort = False).apply(lambda x: avg_snow_sum_thick(x, method = 'thick'), 
                                                                                                   include_groups=False).rename_axis('snow_layer').values

In [None]:
svs_default_res = xr.Dataset.from_dataframe(svs_default_merged)

In [None]:
svs_default_res.to_netcdf(DATA_DIR / 'SVS-2_DefaultEnsembles_TVC02_MES.nc')

# Process top 30 Arctic Ensembles

In [None]:
# Load relevant Top 30 Arctic SVS-2 data to TVC Exp 2018/19.
svs_arctic = xr.open_dataset(DATA_DIR / 'SVS-2_ArcticTop30Ensembles_TVC02.nc')

In [None]:
times = []
layers = []
ensembles = []

for i in range(svs_arctic.sizes['ensemble']):
    svs_arctic_temp = svs_arctic.isel(ensemble=i)[['SNODEN_ML','SNOMA_ML','SNODP','SNODOPT_ML','TSNOW_ML','SNOTYPE_ML']].to_dataframe().dropna()
    if not svs_arctic_temp.empty:
        for cur_date in svs_arctic_temp.index.get_level_values(0).unique():
            for j in range(2):
                ensembles.append(i)
                times.append(cur_date)
                layers.append(j)

indexes = pd.MultiIndex.from_arrays([ensembles,times,layers], names=('ensemble', 'time', 'snow_layer'))
svs_arctic_merged = pd.DataFrame({}, index=indexes)

In [None]:
i=1
svs_arctic_temp=svs_arctic.isel(ensemble=i)[['SNODEN_ML','SNOMA_ML','SNODP','SNODOPT_ML','TSNOW_ML','SNOTYPE_ML']].to_dataframe().dropna()
svs_arctic_temp['ssa'] = svs_arctic_temp['SNODOPT_ML'].apply(lambda x : 6./(x*917) if x > 0 else np.nan)
svs_arctic_temp=svs_arctic_temp.dropna()
svs_arctic_temp['thickness'] = svs_arctic_temp[['SNODEN_ML','SNOMA_ML']].apply(lambda x : x.iloc[1] / x.iloc[0], axis = 1)
for cur_date in svs_arctic_temp.index.get_level_values(0).unique():
    svs_arctic_temp.loc[cur_date,'height'] = np.cumsum(svs_arctic_temp.loc[cur_date].thickness.values[::-1])[::-1]

    X = pd.DataFrame({ 'ke' : compute_ke(svs_arctic_temp.loc[cur_date,['thickness', 'TSNOW_ML', 'SNODEN_ML', 'ssa', 'height']]), 
                       'height' : svs_arctic_temp.loc[cur_date].height})
    kmeans = KMeans(n_clusters=2, random_state=0, n_init="auto").fit(X)
    svs_arctic_temp.loc[cur_date,'label'] = kmeans.labels_

svs_arctic_merged = svs_arctic_merged.join(svs_arctic_temp.loc[cur_date].groupby('label', 
                                                                                 sort = False).apply(lambda x: avg_snow_sum_thick(x, method = 'thick'),
                                                                                                     include_groups=False).rename_axis('snow_layer'))

In [None]:
for i in range(2, svs_arctic.sizes['ensemble']):

    svs_arctic_temp=svs_arctic.isel(ensemble=i)[['SNODEN_ML','SNOMA_ML','SNODP','SNODOPT_ML','TSNOW_ML','SNOTYPE_ML']].to_dataframe().dropna()
    if not svs_arctic_temp.empty:
        svs_arctic_temp['ssa'] = svs_arctic_temp['SNODOPT_ML'].apply(lambda x : 6./(x*917) if x > 0 else np.nan)
        svs_arctic_temp=svs_arctic_temp.dropna()
        svs_arctic_temp['thickness'] = svs_arctic_temp[['SNODEN_ML','SNOMA_ML']].apply(lambda x : x.iloc[1] / x.iloc[0], axis = 1)
    
    
        for cur_date in svs_arctic_temp.index.get_level_values(0).unique():
            svs_arctic_temp.loc[cur_date,'height'] = np.cumsum(svs_arctic_temp.loc[cur_date].thickness.values[::-1])[::-1]

            X = pd.DataFrame({ 'ke' : compute_ke(svs_arctic_temp.loc[cur_date,['thickness', 'TSNOW_ML', 'SNODEN_ML', 'ssa', 'height']]), 
                               'height' : svs_arctic_temp.loc[cur_date].height})
            kmeans = KMeans(n_clusters=2, random_state=0, n_init="auto").fit(X)
            svs_arctic_temp.loc[cur_date,'label'] = kmeans.labels_

            svs_arctic_merged.loc[i,cur_date] = svs_arctic_temp.loc[cur_date].groupby('label', 
                                                                               sort = False).apply(lambda x: avg_snow_sum_thick(x, method = 'thick'), 
                                                                                                   include_groups=False).rename_axis('snow_layer').values

In [None]:
svs_arctic_res = xr.Dataset.from_dataframe(svs_arctic_merged)

In [None]:
svs_arctic_res.to_netcdf(DATA_DIR / 'SVS-2_ArcticTop30Ensembles_TVC02_MES.nc')

# Process top 30 default ensembles

In [None]:
# Load relevant Top 30 Default SVS-2 data to TVC Exp 2018/19.
svs_default = xr.open_dataset(DATA_DIR / 'SVS-2_DefaultTop30Ensembles_TVC02.nc')

In [None]:
times = []
layers = []
ensembles = []

for i in range(svs_default.sizes['ensemble']):
    svs_default_temp = svs_default.isel(ensemble=i)[['SNODEN_ML','SNOMA_ML','SNODP','SNODOPT_ML','TSNOW_ML','SNOTYPE_ML']].to_dataframe().dropna()
    if not svs_default_temp.empty:
        for cur_date in svs_default_temp.index.get_level_values(0).unique():
            for j in range(2):
                ensembles.append(i)
                times.append(cur_date)
                layers.append(j)

indexes = pd.MultiIndex.from_arrays([ensembles,times,layers], names=('ensemble', 'time', 'snow_layer'))
svs_default_merged = pd.DataFrame({}, index=indexes)

In [None]:
i=1
svs_default_temp=svs_default.isel(ensemble=i)[['SNODEN_ML','SNOMA_ML','SNODP','SNODOPT_ML','TSNOW_ML','SNOTYPE_ML']].to_dataframe().dropna()
svs_default_temp['ssa'] = svs_default_temp['SNODOPT_ML'].apply(lambda x : 6./(x*917) if x > 0 else np.nan)
svs_default_temp=svs_default_temp.dropna()
svs_default_temp['thickness'] = svs_default_temp[['SNODEN_ML','SNOMA_ML']].apply(lambda x : x.iloc[1] / x.iloc[0], axis = 1)
for cur_date in svs_default_temp.index.get_level_values(0).unique():
    svs_default_temp.loc[cur_date,'height'] = np.cumsum(svs_default_temp.loc[cur_date].thickness.values[::-1])[::-1]

    X = pd.DataFrame({ 'ke' : compute_ke(svs_default_temp.loc[cur_date,['thickness', 'TSNOW_ML', 'SNODEN_ML', 'ssa', 'height']]), 
                       'height' : svs_default_temp.loc[cur_date].height})
    kmeans = KMeans(n_clusters=2, random_state=0, n_init="auto").fit(X)
    svs_default_temp.loc[cur_date,'label'] = kmeans.labels_

svs_default_merged = svs_default_merged.join(svs_default_temp.loc[cur_date].groupby('label', 
                                                                                 sort = False).apply(lambda x: avg_snow_sum_thick(x, method = 'thick'),
                                                                                                     include_groups=False).rename_axis('snow_layer'))

In [None]:
for i in range(2, svs_default.sizes['ensemble']):

    svs_default_temp=svs_default.isel(ensemble=i)[['SNODEN_ML','SNOMA_ML','SNODP','SNODOPT_ML','TSNOW_ML','SNOTYPE_ML']].to_dataframe().dropna()
    if not svs_default_temp.empty:
        svs_default_temp['ssa'] = svs_default_temp['SNODOPT_ML'].apply(lambda x : 6./(x*917) if x > 0 else np.nan)
        svs_default_temp=svs_default_temp.dropna()
        svs_default_temp['thickness'] = svs_default_temp[['SNODEN_ML','SNOMA_ML']].apply(lambda x : x.iloc[1] / x.iloc[0], axis = 1)
    
    
        for cur_date in svs_default_temp.index.get_level_values(0).unique():
            svs_default_temp.loc[cur_date,'height'] = np.cumsum(svs_default_temp.loc[cur_date].thickness.values[::-1])[::-1]

            X = pd.DataFrame({ 'ke' : compute_ke(svs_default_temp.loc[cur_date,['thickness', 'TSNOW_ML', 'SNODEN_ML', 'ssa', 'height']]), 
                               'height' : svs_default_temp.loc[cur_date].height})
            kmeans = KMeans(n_clusters=2, random_state=0, n_init="auto").fit(X)
            svs_default_temp.loc[cur_date,'label'] = kmeans.labels_

            svs_default_merged.loc[i,cur_date] = svs_default_temp.loc[cur_date].groupby('label', 
                                                                               sort = False).apply(lambda x: avg_snow_sum_thick(x, method = 'thick'), 
                                                                                                   include_groups=False).rename_axis('snow_layer').values

In [None]:
svs_default_res = xr.Dataset.from_dataframe(svs_default_merged)

In [None]:
svs_default_res.to_netcdf(DATA_DIR / 'SVS-2_DefaultTop30Ensembles_TVC02_MES.nc')