# Density and layering analysis
*Josh King, Environment and Climate Change Canada, 2019*

Analysis of the SMP derived snow density and layering products


In [None]:
import os
import string
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import gridspec
plt.rcParams["font.family"] = "Times New Roman"
import pickle
from scipy.stats import mode
import geopandas as gpd
from shapely.geometry import Point
from geostatsmodels import utilities, variograms

# Plot settings
axis_value_size = 12
axis_label_size = 14

In [None]:
# Import classified SMP profiles and density
site_path = './output/sites'
summary_files = []
for i in os.listdir(site_path):
    if os.path.isfile(os.path.join(site_path,i)) and 'Summary' in i:
        summary_files.append(os.path.join(site_path, i))
        
data_files = []
for i in os.listdir(site_path):
    if os.path.isfile(os.path.join(site_path,i)) and 'Data' in i:
        data_files.append(os.path.join(site_path, i))
        
data_list = []
for filename in data_files:
    df = pd.read_pickle(filename)
    data_list.append(df)

data_df = pd.concat(data_list, axis=0, ignore_index=True)

summary_list = []
for filename in summary_files:
    df = pd.read_pickle(filename)
    summary_list.append(df)

summary_df = pd.concat(summary_list, axis=0, ignore_index=True)
campaign_name = [x.split('_')[0] for x in summary_df['site_name'].values]
summary_df['campaign_name'] = np.array(campaign_name)

# Remove bad measurement rows, this one sliped though but has no penetration
summary_df.drop(summary_df[summary_df['file_name'] == 'S34M0399'].index, inplace = True)
summary_df.drop(summary_df[summary_df['file_name'] == 'S34M0401'].index, inplace = True)
summary_df.head(1)

In [None]:
# Total number of density measurements
total_rho_meas = len(data_df)
total_smp_thickness = total_rho_meas*0.0025 #in m
print('Total density measurements: %i' % total_rho_meas)
print('Total vectical profile: %0.2f' % total_smp_thickness)

In [None]:
# Site level statistics
np.round(summary_df.groupby('site_name').mean(),3)

# Mean density by campaign and ice type
summary_df.groupby(['campaign_name','ice_type'])['mean_weighted_density'].mean()

In [None]:
# Count transitions between layer classifications as a proxy for the
# number of layers within each SMP profile

layer_df = pd.DataFrame()
for row in summary_df.iterrows():
    clas = data_df[data_df['file_name'] == row[1].file_name].layer_label.values
    clas[clas =='R'] = 1
    clas[clas =='F'] = 2
    clas[clas =='H'] = 3
    
    layers = np.abs(np.diff(clas))
    layer_top = np.insert(np.ravel(np.argwhere(layers >= 1)),0,0) + 1
    layers_present, type_count = np.unique(clas[layer_top], return_counts=True)
    
    layer_count = np.array([0,0,0])
    for idx, layer_type in enumerate(layers_present):
        layer_count[layer_type-1] = type_count[idx]
    layer_df = layer_df.append(pd.DataFrame([layer_count]), ignore_index=True)
    
layer_df.columns = ['r', 'f', 'h']
layer_df['l_total'] = layer_df.sum(axis=1)
summary_layers = pd.concat([summary_df, layer_df], axis=1)

In [None]:
# Layer count summary by layer type, campaign and ice type
np.round(summary_layers.groupby(['ice_type'])['l_total'].describe(),1)

In [None]:
np.round((summary_layers.groupby(['ice_type'])['r', 'f', 'h'].describe()),1)

In [None]:
np.round(summary_layers.groupby(['campaign_name','ice_type'])['l_total'].describe(),1)

In [None]:
# Paper figure 9 with caption
# ' Number of layers characterized within SMP profiles separated by layer-type classification. 
#   Layers were counted where transitions between layer-type classifications were found in the SMP profiles.'

data_max = summary_layers[summary_layers['ice_type'] == 'MYI']['l_total'].max().astype(int)
data_min = 0
bin_size = 1
bins_layers = range(data_min,data_max+bin_size, bin_size)

f, ((ax1, ax2,ax3), (ax4, ax5, ax6)) = plt.subplots(2, 3, sharey=True, figsize=(13,8))

ax1.tick_params(axis='both', which='major', labelsize=axis_label_size)
ax2.tick_params(axis='both', which='major', labelsize=axis_label_size)
ax3.tick_params(axis='both', which='major', labelsize=axis_label_size)
ax4.tick_params(axis='both', which='major', labelsize=axis_label_size)
ax5.tick_params(axis='both', which='major', labelsize=axis_label_size)
ax6.tick_params(axis='both', which='major', labelsize=axis_label_size)

hist_kws = dict(bins = bins_layers,  
                edgecolor="grey", 
                alpha = 1, grid = False, 
                color = 'darkgrey', density = True)


summary_layers[summary_layers['ice_type'] == 'MYI']['r'].hist(ax = ax4, **hist_kws)
summary_layers[summary_layers['ice_type'] == 'FYI']['r'].hist(ax = ax1, **hist_kws)

summary_layers[summary_layers['ice_type'] == 'MYI']['f'].hist(ax = ax5, **hist_kws)
summary_layers[summary_layers['ice_type'] == 'FYI']['f'].hist(ax = ax2, **hist_kws)

summary_layers[summary_layers['ice_type'] == 'MYI']['h'].hist(ax = ax6, **hist_kws)
summary_layers[summary_layers['ice_type'] == 'FYI']['h'].hist(ax = ax3, **hist_kws)


ax1.set_xlim(0,8)
ax2.set_xlim(0,8)
ax3.set_xlim(0,8)

ax1.set_title('Rounded',fontsize=axis_label_size)
ax2.set_title('Faceted',fontsize=axis_label_size)
ax3.set_title('Depth hoar',fontsize=axis_label_size)
ax5.set_xlabel('Number of layers [#]',fontsize=axis_label_size)

ax1.set_ylabel('Probability density on FYI',fontsize=axis_label_size)
ax4.set_ylabel('Probability density on MYI',fontsize=axis_label_size)

ax4.set_xlim(0,8)
ax5.set_xlim(0,8)
ax6.set_xlim(0,8)

f.savefig('./output/figures/Fig09_Layers_lowres.png', format='png')

In [None]:
# Weighted mean density
count_per = summary_df.groupby('ice_type')['mean_weighted_density'].count()/summary_df.count()[0]
weighted_mean_density = (summary_df.groupby('ice_type')['mean_weighted_density'].mean()*count_per).sum()
weighted_std_density = (summary_df.groupby('ice_type')['mean_weighted_density'].std()*count_per).sum()
print(np.round(weighted_mean_density))
print(np.round(weighted_std_density))

In [None]:
summary_df.describe()

In [None]:
# FYI Stats
summary_df[summary_df["ice_type"] == "FYI"].describe()

In [None]:
# MYI Stats
summary_df[summary_df["ice_type"] == "MYI"].describe()

In [None]:
# MYI Eureka
summary_df[(summary_df["ice_type"] == "MYI") & (summary_df["campaign_name"] == "Eureka")].describe()

In [None]:
# MYI Alert
summary_df[(summary_df["ice_type"] == "MYI") & (summary_df["campaign_name"] == "Alert")].describe()

In [None]:
# Figure 7 with caption
# 'Bulk density derived from SMP profiles collected on first year (FYI, n = 403) and multiyear (MYI, n = 211) sea ice (Left). 
#  Automated profile classification was used to separate the high vertical resolution (2.5 mm) estimates of snow density and produce
#  layer-type distributions for rounded, faceted and depth hoar classifications (Right).'

# Histogram bins size
common_bin_all = range(50,450, 10)
common_bin_dens = range(50,450, 15)

fig = plt.figure(figsize=(14, 5)) 
gs = gridspec.GridSpec(1, 4, width_ratios=[1.8, 1, 1, 1]) 
ax0 = plt.subplot(gs[0])

hist_kws = dict(histtype= "stepfilled",
                grid = False,
                edgecolor="black",
                density = True,
                linewidth = 1.25)

summary_df[summary_df["ice_type"] == "FYI"]['mean_weighted_density'] \
        .hist(bins = common_bin_all, color = 'grey', 
              alpha = 1, ax = ax0, label = 'FYI', **hist_kws)

summary_df[summary_df["ice_type"] == "MYI"]['mean_weighted_density'] \
        .hist(bins = common_bin_all, alpha = 0.8, color = 'deepskyblue', 
              ax = ax0, label = 'MYI', **hist_kws)


ax0.set_xlim(150,450)
ax0.set_xlabel('Bulk density [kg m$\mathregular{^{-3}}$]',fontsize=axis_label_size)
ax0.set_ylabel('Probability density',fontsize=axis_label_size)
ax0.tick_params(axis='both', which='major', labelsize=axis_value_size)
ax0.ticklabel_format(axis='y',style='sci', scilimits=(1,5), useMathText=False)

ax1 = plt.subplot(gs[1])
ax1.tick_params(axis='both', which='major', labelsize=axis_value_size)

summary_df[summary_df["ice_type"] == "FYI"]['density_r'] \
        .hist(bins = common_bin_dens, color = 'grey',  alpha = 1, **hist_kws)

summary_df[summary_df["ice_type"] == "MYI"]['density_r'] \
        .hist(bins = common_bin_dens, color = 'deepskyblue', alpha = 0.75, 
              ax = ax1, **hist_kws)


ax1.set_xlabel('Rounded [kg m$\mathregular{^{-3}}$]',fontsize=axis_label_size)
ax1.set_xlim(150,450)
ax1.tick_params(axis='both', which='major', labelsize=axis_value_size)
ax1.ticklabel_format(axis='y',style='sci', scilimits=(1,5), useMathText=False)

ax2 = plt.subplot(gs[2])
summary_df[summary_df["ice_type"] == "FYI"]['density_f'] \
        .hist(bins = common_bin_dens, color = 'grey',  alpha =1, 
              ax = ax2, **hist_kws)

summary_df[summary_df["ice_type"] == "MYI"]['density_f'] \
        .hist(bins = common_bin_dens, color = 'deepskyblue', 
              ax = ax2,  alpha = 0.75, **hist_kws)

ax2.set_xlabel('Faceted [kg m$\mathregular{^{-3}}$]',fontsize=axis_label_size)
ax2.set_xlim(150,450)
ax2.tick_params(axis='both', which='major', labelsize=axis_value_size)
ax2.ticklabel_format(axis='y',style='sci', scilimits=(1,5), useMathText=False)


ax3 = plt.subplot(gs[3])
summary_df[summary_df["ice_type"] == "FYI"]['density_h'] \
        .hist(bins = common_bin_dens, color = 'grey',  alpha = 1, 
              ax = ax3, label = 'FYI', **hist_kws)

summary_df[summary_df["ice_type"] == "MYI"]['density_h'] \
        .hist(bins = common_bin_dens, color = 'deepskyblue',alpha = 0.75, 
              ax = ax3, label = 'MYI', **hist_kws)

ax3.set_xlabel('Depth hoar [kg m$\mathregular{^{-3}}$]',fontsize=axis_label_size)
ax3.set_xlim(150,450)
ax3.tick_params(axis='both', which='major', labelsize=axis_value_size)
ax3.ticklabel_format(axis='y',style='sci', scilimits=(1,5), useMathText=False)
ax3.legend(loc=0, fontsize=axis_value_size)

plt.tight_layout()
axs = [ax0, ax1, ax2, ax3]
for n, ax in enumerate(axs):
    ax.text(0.02, 0.92, string.ascii_lowercase[n]+')', transform=ax.transAxes, 
            size=20, weight='bold')
    
fig.savefig('./output/figures/Fig07_Densitydist_lowres.png', format='png')


In [None]:
# Figure 8 with caption
# 'Fractional snowpack composition by rounded, faceted, and depth hoar layer types derived 
#  from the SMP transect profiles on first year (FYI) and multiyear (MYI) sea ice.'

common_bin_vol = np.arange(0,1, 0.05)
f, (ax1, ax2,ax3) = plt.subplots(1, 3, sharey=True, figsize=(13,5))

hist_kws = dict(bins = common_bin_vol,
                histtype= "stepfilled",
                grid = False,
                edgecolor="black",
                density = True,
                linewidth = 1.25)

summary_df[summary_df["ice_type"] == "FYI"]['fraction_r'] \
        .hist(color = 'grey', ax = ax1, **hist_kws)

summary_df[summary_df["ice_type"] == "MYI"]['fraction_r'] \
        .hist(color = 'deepskyblue',alpha = 0.75, ax = ax1, **hist_kws)

summary_df[summary_df["ice_type"] == "FYI"]['fraction_f'] \
        .hist(color = 'grey', ax = ax2, **hist_kws)

summary_df[summary_df["ice_type"] == "MYI"]['fraction_f'] \
        .hist(color = 'deepskyblue',alpha = 0.75, ax = ax2, **hist_kws)

summary_df[summary_df["ice_type"] == "FYI"]['fraction_h'] \
        .hist(color = 'grey', ax = ax3, label = 'FYI', **hist_kws)

summary_df[summary_df["ice_type"] == "MYI"]['fraction_h'] \
        .hist(color = 'deepskyblue', alpha = 0.75,ax = ax3, label = 'MYI', **hist_kws)


ax1.set_ylabel('Probability density' ,fontsize=axis_label_size)
ax1.set_xlabel('Rounded fraction [%]',fontsize=axis_label_size)
ax2.set_xlabel('Faceted fraction [%]',fontsize=axis_label_size)
ax3.set_xlabel('Depth hoar fraction [%]',fontsize=axis_label_size)

ax1.tick_params(axis='both', which='major', labelsize=axis_value_size)
ax2.tick_params(axis='both', which='major', labelsize=axis_value_size)
ax3.tick_params(axis='both', which='major', labelsize=axis_value_size)

ax1.set_xlim(0,1)
ax1.set_ylim(0,7)
ax2.set_xlim(0,1)
ax3.set_xlim(0,1)
ax3.legend(loc=0, fontsize=axis_value_size)

f.savefig('./output/figures/Fig08_Fractional_lowres.png', format='png')

# Spatial analysis

In [None]:
coords = [Point(x, y) for x, y in zip(summary_df.longitude, summary_df.latitude)]

In [None]:
summary_gdf = gpd.GeoDataFrame(summary_df, geometry=coords)
summary_gdf.crs = {'init' :'epsg:4326'}
summary_wgs84 = summary_gdf.to_crs({'init': 'epsg:32616'}) 
summary_wgs84['x'] = summary_wgs84.geometry.apply(lambda p: p.x).values
summary_wgs84['y'] = summary_wgs84.geometry.apply(lambda p: p.y).values
summary_wgs84.dropna(inplace=True)

In [None]:
def spatial_correlation(data, lags, tol):
    corr_points = []
    n_points = []
    pw_dist = utilities.pairwise(data)
    index = [variograms.lagindices(pw_dist, lag, tol) for lag in lags]
    for indices in index:
        i=indices[:, 0]
        j=indices[:, 1]
        n_points.append(len(data[i, 2]))
        
        unbiased_a = data[i, 2]-np.mean(data[i, 2])
        unbiased_b = data[j, 2]-np.mean(data[j, 2])
        
        num = np.sum(unbiased_a*unbiased_b)
        dom = np.sum(unbiased_a**2)
                
        corr_points.append(num/dom)
    return corr_points, n_points

In [None]:
# Spatial analysis settings
# We set a tolerance of +-5 m to match the SMP GPS accuracy
gps_accuracy = 5 
bins = 1 # in m
min_bin = gps_accuracy
max_dist = 100 # Not enough data beyond this
tol = gps_accuracy
lags = np.arange(min_bin, max_dist, bins)

# Format input
fyi_h = np.array(summary_wgs84[summary_wgs84['ice_type'] == 'FYI'][['x','y','fraction_h']])
fyi_f = np.array(summary_wgs84[summary_wgs84['ice_type'] == 'FYI'][['x','y','fraction_f']])
fyi_r = np.array(summary_wgs84[summary_wgs84['ice_type'] == 'FYI'][['x','y','fraction_r']])

myi_h = np.array(summary_wgs84[summary_wgs84['ice_type'] == 'MYI'][['x','y','fraction_h']])
myi_f = np.array(summary_wgs84[summary_wgs84['ice_type'] == 'MYI'][['x','y','fraction_f']])
myi_r = np.array(summary_wgs84[summary_wgs84['ice_type'] == 'MYI'][['x','y','fraction_r']])

h_corr_fyi, n_h_fyi = spatial_correlation(fyi_h, lags, tol)
f_corr_fyi, n_f_fyi = spatial_correlation(fyi_f, lags, tol)
r_corr_fyi, n_r_fyi = spatial_correlation(fyi_r, lags, tol)
print('Avg # pairs on FYI: %i' % np.asarray(n_h_fyi).mean())


h_corr_myi, n_h_myi = spatial_correlation(myi_h, lags, tol)
f_corr_myi, n_f_myii = spatial_correlation(myi_f, lags, tol)
r_corr_myi, n_r_myi = spatial_correlation(myi_r, lags, tol)
print('Avg # pairs on MYI: %i' % np.asarray(n_h_myi).mean())

In [None]:
# Figure 10 with caption
# 'Spatial auto-correlation by layer-type composition on FYI and MYI as estimated from classified SMP profiles. 
#  Dotted lines show assumed correlation at length scales less than 1 m where geolocation uncertainty of the profiles precludes analysis.'

f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(15,5))
ax1.set_title("First-year ice", fontsize=axis_label_size)
ax2.set_title("Multi-year ice", fontsize=axis_label_size)
ax1.tick_params(axis='both', which='major', labelsize=axis_label_size)
ax2.tick_params(axis='both', which='major', labelsize=axis_label_size)

line_kws = dict(linestyle = '-',
                linewidth = 1.5)

# Missing data where distances are within GPS noise
ax1.plot([0,min_bin], [1,h_corr_fyi[0]], color = "black", linestyle = ':')
ax1.plot([0,min_bin], [1,f_corr_fyi[0]], color = "teal", linestyle = ':')
ax1.plot([0,min_bin], [1,r_corr_fyi[0]], color = "green", linestyle = ':')

# Plot correlation 
ax1.plot(lags, h_corr_fyi, color = "black", **line_kws)
ax1.plot(lags, f_corr_fyi, color = "teal", **line_kws)
ax1.plot(lags, r_corr_fyi, color = "green", **line_kws)

ax2.plot([0,min_bin], [1,h_corr_myi[0]], color = "black", linestyle = ':')
ax2.plot([0,min_bin], [1,f_corr_myi[0]], color = "teal", linestyle = ':')
ax2.plot([0,min_bin], [1,r_corr_myi[0]], color = "green", linestyle = ':')

ax2.plot(lags, r_corr_myi, label = 'Round', color = "green",  **line_kws)
ax2.plot(lags, f_corr_myi, label = 'Faceted', color = "teal",  **line_kws)
ax2.plot(lags, h_corr_myi, label = 'Depth hoar', color = "black",  **line_kws)

ax2.legend(fontsize=axis_label_size)
ax1.set_ylim(-.4, 1)
ax2.set_ylim(-.4, 1)

ax1.set_xlim(0, max_dist)
ax2.set_xlim(0, max_dist)

ax1.set_ylabel("Correlation [-]", fontsize=axis_label_size)
ax1.set_xlabel("Horizontal distance [m]", fontsize=axis_label_size)
ax2.set_xlabel("Horizontal distance [m]", fontsize=axis_label_size)

f.savefig('./output/figures/Fig10_Scales_lowres.png', format='png')

In [None]:
# Lag at which correlation is maxed
np.argwhere(h_corr_fyi==np.max(h_corr_fyi))

In [None]:
# Max correlation beyond the 1 m.
np.round(np.max(h_corr_fyi),2)

In [None]:
# Distance to 0 for faceted layers on MYI
np.argwhere(np.asarray(f_corr_myi) <= 0)[0]