Explore some comparisons between manually-delineated end-of-summer snow lines and the results from our automated delineation

In [1]:
import rasterio as rio
import numpy as np
import os
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
import geopandas as gpd
import gc
import shapely
import xarray as xr
from rasterio.mask import mask
import importlib

import sys
sys.path.append('../')
import snowFun

In [2]:
# define folder and file paths
folder_AGVA = os.path.join('C:',os.sep,'Users','lzell','OneDrive - Colostate','Desktop',"AGVA")
folder_validationlines = os.path.join(folder_AGVA, 'Validation', 'Snowline Traces')
folder_mask = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Masks')
folder_best_images = os.path.join(folder_AGVA, 'Validation', 'Best Images')

# open rgi
path_rgi = os.path.join(folder_AGVA, 'RGI', "rgi_2km_o3regions", "rgi_2km_o3regions.shp")
rgi_gdf = gpd.read_file(path_rgi)

# choose whether to use the 'temporary' products from validaiton subfolder or the 'master' copy
validation_folder_specific = 1

if validation_folder_specific: 
    annual_aa_folder = os.path.join(folder_AGVA, 'Derived Products', 'S2', 'validation', 'Annual AAs')
    annual_ela_folder = os.path.join(folder_AGVA, 'Derived Products', 'S2', 'validation', 'Annual AAs', 'csv')
else:
    annual_aa_folder = os.path.join(folder_AGVA, 'Derived Products', 'S2', 'Annual AAs')
    annual_ela_folder = os.path.join(folder_AGVA, 'Derived Products', 'S2', 'Annual AAs', 'csv')

In [3]:
# open list of validation glaciers
all_validation_df = pd.read_csv(os.path.join(folder_AGVA, 'Validation', 'Validation Glaciers.csv'))
rgis_to_analyze = all_validation_df['RGIId'].values

# get list of glacier area for each rgi
areas = [rgi_gdf[rgi_gdf['RGIId']==i]['Area'].values for i in rgis_to_analyze]

# make df
rgis_to_analyze_df = pd.DataFrame({"RGIId":rgis_to_analyze, 'Area':areas})

# sort however you want
rgis_to_analyze_df = rgis_to_analyze_df.sort_values('Area')

# grab rgi names
rgis_to_analyze = rgis_to_analyze_df['RGIId'].values

# get list of files in best images folder
best_images_files = os.listdir(folder_best_images)

print(len(rgis_to_analyze_df))
best_images_files

45


['RGI60-01.00557_2_noname.csv',
 'RGI60-01.00565_2_Spur_Glacier.csv',
 'RGI60-01.00570_2_Gulkana_Glacier.csv',
 'RGI60-01.00787_6_noname.csv',
 'RGI60-01.01104_6_Lemon_Creek_Glacier.csv',
 'RGI60-01.01270_6_noname.csv',
 'RGI60-01.01390_6_Taku_Glacier.csv',
 'RGI60-01.01666_4_noname.csv',
 'RGI60-01.01743_4_Sherman_Glacier.csv',
 'RGI60-01.02584_6_noname.csv',
 'RGI60-01.03215_6_noname.csv',
 'RGI60-01.03379_6_noname.csv',
 'RGI60-01.03741_6_Great_Glacier.csv',
 'RGI60-01.05007_6_noname.csv',
 'RGI60-01.05078_6_noname.csv',
 'RGI60-01.08989_4_Eklutna_Glacier.csv',
 'RGI60-01.09162_4_Wolverine_Glacier.csv',
 'RGI60-01.09216_4_noname.csv',
 'RGI60-01.09624_4_noname.csv',
 'RGI60-01.09656_4_Langdon_Glacier.csv',
 'RGI60-01.09798_4_Bainbridge_Glacier.csv',
 'RGI60-01.10255_4_noname.csv',
 'RGI60-01.10910_4_noname.csv',
 'RGI60-01.12165_2_noname.csv',
 'RGI60-01.12186_2_noname.csv',
 'RGI60-01.12548_2_noname.csv',
 'RGI60-01.13462_5_noname.csv',
 'RGI60-01.13483_5_noname.csv',
 'RGI60-01.13

### Lets plots the manually-mapped snowlines for each year on top of the automated end-of-summer snow cover product from each year

In [5]:
for i in range(len(rgis_to_analyze)):
#     if i>0: continue

    # get rgiid
    rgi_i = rgis_to_analyze[i]
    if rgi_i!='RGI60-01.08989': continue
        
    # grab the rgi outline that these lines correspond to
    rgi_i_gdf = rgi_gdf[rgi_gdf['RGIId']==rgi_i].to_crs("EPSG:3338")
    ga = rgi_i_gdf['Area'].values[0]
    
    print(f"{i+1} of {len(rgis_to_analyze)} - {rgi_i} {ga}")
    
    # open glacier mask
    glacier_mask = xr.open_dataset(os.path.join(folder_mask, f"S2_{rgi_i}_mask.nc"), chunks='auto').glacier
    if ga>500:
        glacier_mask = glacier_mask.coarsen({"x":3, "y":3}, boundary="trim").median(skipna=True).astype('uint8') 
    glacier_pixels = np.nansum(glacier_mask)

    # set file name/path
    name = f'{rgi_i[:5]}_{rgi_i[6:8]}_{rgi_i[9:]}.geojson'
    line_path = os.path.join(folder_validationlines, name)

    # decide which columns you want to drop
    bad_cols = ['id', 'SLCFlag', 'cloudShadowFlag', 'notes', 'otherImageFlag', 'partialPickFlag']
    snowlines_gdf = gpd.read_file(line_path).drop(bad_cols, axis=1).to_crs("EPSG:3338")

    # add year as column
    snowlines_gdf['Year'] = [int(i[:4]) for i in snowlines_gdf['Date']]
    snowlines_gdf['y2'] = [int(i[:4]) for i in snowlines_gdf['Date']]

    # in some instances there will be multiple entries for a single date (when the user went back to edit a previous image's snowline)
    # in this case, we only want to keep the entry that has the full record (so the one with the latest index)
    snowlines_gdf = snowlines_gdf[~snowlines_gdf.duplicated(subset='Date', keep='last')]

    # clip to rgi outline (buffer inwards 10m to ensure we don't hit the edges)
    snowlines_gdf = snowlines_gdf.clip(rgi_i_gdf.buffer(-10))
    snowlines_gdf.set_index('Year', inplace=True)
        
    # open best images file
    fname = [f for f in best_images_files if f.startswith(rgi_i)][0]
    best_images_df = pd.read_csv(os.path.join(folder_best_images, fname))[:5]
    best_images_df['Year'] = [int(d[:4]) for d in best_images_df['Year'] ]
    best_images_df.set_index('Year', inplace=True)

    # open files
    annual_ela_path = os.path.join(annual_ela_folder, f"S2_{rgi_i}_2018_2022_annual_AAs.csv")
    annual_ela_df = pd.read_csv(annual_ela_path)
    annual_ela_df['Year'] = [int(i[:4]) for i in annual_ela_df['date']]
    annual_ela_df.set_index('Year', inplace=True)
    annual_ela_df = annual_ela_df.replace({'False':'0', 'True':'1'})

    # create df to save all the info
    save_df = pd.DataFrame({'Year':np.arange(2018,2023)})
    save_df.set_index('Year', inplace=True)
    
    # add colummns from automated dataset
    save_df['date_auto'] = annual_ela_df['date']
    save_df['ELA_auto'] = annual_ela_df['ela']
    save_df['AAR_auto'] = annual_ela_df['aar'].round(4)
    save_df['off_glacier_auto'] = annual_ela_df['off_glacier'].astype(int)
    
    elas_manual = []
    aars_manual = []

    # get manual ela for each year
    for i in range(5):
        y=i+2018

        # open the time-varying dem for this year
        xr_dem = snowFun.get_year_DEM(rgi_i_gdf.geometry, y, smoothed=0)        
        if ga>500:
            xr_dem = xr_dem.sel({"x":glacier_mask.x, "y":glacier_mask.y})
        xr_dem = xr.where(xr_dem<=0, np.nan, xr_dem)[0]
         
        # grab the manual validation product for this year
        snowline_y = snowlines_gdf[snowlines_gdf['y2']==y]

        # get manual snowline info if it's there
        if len(snowline_y)>0:
            # sample dem at 20m increments along each snowline
            all_zs = snowFun.sample_dem_along_line(snowline_y, xr_dem, increment=20)

            # get 50 percentile of elevation points
            ela_manual_y = int(np.nanpercentile(all_zs, 50))
            
            # calculate the AAR based on this ELA
            aar_manual_y = round(np.nansum(xr.where(xr_dem>=ela_manual_y, 1, 0))/glacier_pixels, 4)
        
        else:
            ela_manual_y = -9999
            aar_manual_y = -9999

        # add elas to list
        elas_manual.append(ela_manual_y)
        aars_manual.append(aar_manual_y)

    # add columns for manual dataset
    save_df['date_manual'] = snowlines_gdf['Date']
    save_df['ELA_manual'] = elas_manual
    save_df['AAR_manual'] = aars_manual
    save_df['off_glacier_manual'] = best_images_df['All Ablation']
    
    # save figure
    out_path = os.path.join(folder_AGVA, 'Validation', 'comparison', f'{rgi_i}.csv')
    save_df.to_csv(out_path)
    
save_df.head()

35 of 45 - RGI60-01.08989 29.395


Unnamed: 0_level_0,date_auto,ELA_auto,AAR_auto,off_glacier_auto,date_manual,ELA_manual,AAR_manual,off_glacier_manual
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018,2018-09-15,1440.0,0.2294,0,2018-09-14,1523,0.2921,0
2019,2019-09-02,1920.0,0.2202,0,2019-08-30,1508,0.3268,0
2020,2020-08-27,1740.0,0.2845,0,2020-08-27,1501,0.3412,0
2021,2021-09-11,1650.0,0.1456,0,2021-09-11,1543,0.251,0
2022,2022-09-14,1430.0,0.5168,0,2022-08-10,1477,0.3894,0


In [58]:
annual_ela_df['off_glacier']

Year
2018    False
2019     True
2020    -9999
2021    -9999
2022    False
Name: off_glacier, dtype: object

In [None]:
### make another figure comparing manual and automated ela
error_bars = [[i[0] for i in elas_manual], [i[2] for i in elas_manual]]
xs = [i[1] for i in elas_manual]
ys = elas_auto

# initiate figure
fig,axs = plt.subplots(figsize=(5,5),)

# plot 1-1 line
plt.plot([min(min(xs),min(ys)),max(max(xs),max(ys))],[min(min(xs),min(ys)),max(max(xs),max(ys))],
        c='black', linestyle='dashed')

# plot error bars
axs.hlines(ys, xmin=error_bars[0], xmax=error_bars[1], color='grey', linewidth=1 )

# plot points
axs.scatter(xs, ys, zorder=5)

# edits
axs.set_xlabel('Manual ELA')
axs.set_ylabel('Automated ELA')
plt.tight_layout()
axs.set_aspect('equal')

In [None]:
# xr_dem = snowFun.get_year_DEM(rgi_i_gdf.geometry, 2019)
# xr_dem = xr.where(xr_dem<=0, np.nan, xr_dem)[0]
# xr_dem.plot()

In [None]:
# # make figure
# fig, axs = plt.subplots()
# axs.set_facecolor('gainsboro')
# rgi_i_gdf.plot(ax=axs, color='white')
# rgi_i_gdf.boundary.plot(ax=axs, edgecolor='black', linewidth=1)
# snowlines_gdf_clip.plot(ax=axs, column='Year', cmap="RdPu", vmin=2017, vmax=2022, legend=True)
# plt.tight_layout()