In [1]:
import os
import rasterio as rio
import numpy as np
import shapely
import pyproj
import geopandas as gpd
import matplotlib.pyplot as plt
import rioxarray as riox
import rasterio as rio
import xarray as xr
import netCDF4
from osgeo import gdal
import pandas as pd
from datetime import datetime
import dask.array

import sys
sys.path.append('../')
import snowFun

In [2]:
# define folder and file paths
folder_AGVA = os.path.join('C:',os.sep,'Users','lzell','OneDrive - Colostate','Desktop',"AGVA")
folder_dems = os.path.join(folder_AGVA, "DEMs", "time_varying_DEMs", "10m")
folder_class = os.path.join(folder_AGVA, 'classified images', 'S2_Classified_Cloudmasked_Merged')
folder_cloud = os.path.join(folder_AGVA, 'classified images', 'S2_Cloud_Merged')
folder_meta = os.path.join(folder_AGVA, "classified images", "meta csv", "S2")
folder_mask = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Masks')

# open rgi
path_rgi = os.path.join(folder_AGVA, 'RGI', "rgi_2km_o3regions", "rgi_2km_o3regions.shp")
rgi_gdf = gpd.read_file(path_rgi, drop='geometry')

In [4]:
### choose if you want to do only the 45 validation glaciers
validation_only = 1

# open list of validation glaciers
all_validation_df = pd.read_csv(os.path.join(folder_AGVA, 'Validation', 'Validation Glaciers.csv'))

# select which rgis to analyze
if validation_only:
    folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Validation')
else:
    folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2')

# get list of rgis to analyze
rgis_to_analyze = list(set( [ i[3:17] for i in os.listdir(os.path.join(folder_save, "Average AAs")) if i[-3:]=='tif' ] ))

# get list of glacier area for each rgi
areas = [rgi_gdf[rgi_gdf['RGIId']==i]['Area'].values for i in rgis_to_analyze]

# make df
rgis_to_analyze_df = pd.DataFrame({"RGIId":rgis_to_analyze, 'Area':areas})

# sort however you want
rgis_to_analyze_df = rgis_to_analyze_df.sort_values('Area')

# grab rgi names
rgis_to_analyze = rgis_to_analyze_df['RGIId'].values

print(len(rgis_to_analyze_df))
# print(rgis_to_analyze[:10])
# print(rgis_to_analyze_df[:10])

45


### First step: go through to get the snow cover fraction of elevation bands in the average AA product for each glacier

In [6]:
for i in range(len(rgis_to_analyze)):

    # subset rgi to single outline, by choosing rgiid or rgi name
    rgiid = rgis_to_analyze[i]

    # check if we've already run this glacier. if so, skip
    temp_path = os.path.join(folder_save, 'Average AAs', 'Band SCFs', f"{rgiid}.csv")
    if os.path.exists(temp_path):
        continue

    # quickly grab glacier area
    ga = rgi_gdf[rgi_gdf['RGIId']==rgiid]['Area'].values[0]

    # print progress
    print(f"Starting {i+1} of {len(rgis_to_analyze)}: {rgiid}  {ga} km2")
    
    # grab just this rgi geometry and info
    rgi_single = rgi_gdf[rgi_gdf['RGIId']==rgiid].to_crs("EPSG:3338")
    single_geometry = rgi_single.geometry
    
    # define the coarsen scale
    if ga>1000:
        scale=5
    elif ga>500:
        scale=3
        
    # open glacier mask
    glacier_mask = xr.open_dataset(os.path.join(folder_mask, f"S2_{rgiid}_mask.nc"), chunks='auto').glacier
    
    if ga>500:
        glacier_mask = glacier_mask.coarsen({"x":scale, "y":scale}, boundary="trim").median(skipna=True).astype('uint8')  
    
    # open the average AA map
    path_open = os.path.join(folder_save, 'Average AAs', f"S2_{rgiid}_2018_2022_average_AA_final.tif")
    snow = riox.open_rasterio(path_open)
#     snow = xr.open_dataset(path_open)["accumulation_area"]#.rename({'band':'time'})
    
    # if 3d, rename "band" to "time". otherwise if 2d add a time dimension
    if len(snow.shape)==3:
        snow = snow.rename({'band':'time'})
    else:
        snow = snow.expand_dims(dim='time', axis=0)
        snow['time'] = [1]
    
    # make to nan, 0, 1 (instead of 0,1,2,3) (0 and 3 to np.nan, 1 to 0, 2 to 1)
    snow = xr.where(snow.isin([0,3]), np.nan, snow-1)
    
    # open dem for year 2020
    xr_dem = snowFun.get_year_DEM(single_geometry, 2020, smoothed=0) 
    
    # coarsen dem
    if ga>500:
        xr_dem = xr_dem.sel({"x":snow.x, "y":snow.y})

    # shave off edges to make sure dem, mask match
    xr_dem = xr_dem.sel(x=slice( min(glacier_mask.x.values), max(glacier_mask.x.values) ), y=slice(max(glacier_mask.y.values),min(glacier_mask.y.values)))
    glacier_mask = glacier_mask.sel(x=slice( min(xr_dem.x.values), max(xr_dem.x.values) ), y=slice(max(xr_dem.y.values),min(xr_dem.y.values)))
    snow = snow.sel(x=slice( min(xr_dem.x.values), max(xr_dem.x.values) ), y=slice(max(xr_dem.y.values),min(xr_dem.y.values)))
    
    # extract number of snow pixels in bands from each time step
    snow_df = snowFun.extract_band_SCA(snow, xr_dem, glacier_mask, step=10)
    snow_df = snow_df.rename(columns={'total_pixels':f"total_pixels_possible", '1':'snow'})

    # extract number of pixels observed in bands from each time step
    obs_df = snowFun.extract_band_SCA( xr.where(snow>=0, 1, 0) , xr_dem, glacier_mask, step=10)
    obs_df = obs_df.rename(columns={'total_pixels':f"total_pixels_possible", '1':'observed'})

    # add snow_df to obs_df
    obs_df['snow'] = snow_df['snow']
    
    # save to csv
    out_path = os.path.join(folder_save, 'Average AAs', 'Band SCFs', f"{rgiid}.csv")
    obs_df.to_csv(out_path, index=False)

print('Done!')

Starting 1 of 45: RGI60-01.10910  2.084 km2
Starting 2 of 45: RGI60-01.00787  2.126 km2
Starting 3 of 45: RGI60-01.23606  2.344 km2
Starting 4 of 45: RGI60-01.15253  2.551 km2
Starting 5 of 45: RGI60-01.03379  2.578 km2
Starting 6 of 45: RGI60-01.16719  2.681 km2
Starting 7 of 45: RGI60-01.17321  2.88 km2
Starting 8 of 45: RGI60-01.13462  3.206 km2
Starting 9 of 45: RGI60-01.13483  3.216 km2
Starting 10 of 45: RGI60-01.02584  3.441 km2
Starting 11 of 45: RGI60-01.03215  3.998 km2
Starting 12 of 45: RGI60-01.01666  4.243 km2
Starting 13 of 45: RGI60-01.12548  4.314 km2
Starting 14 of 45: RGI60-01.13930  4.404 km2
Starting 15 of 45: RGI60-01.09624  4.487 km2
Starting 16 of 45: RGI60-01.15516  4.764 km2
Starting 17 of 45: RGI60-01.21721  6.422 km2
Starting 18 of 45: RGI60-01.10255  7.262 km2
Starting 19 of 45: RGI60-01.12165  7.969 km2
Starting 20 of 45: RGI60-01.05007  9.216 km2
Starting 21 of 45: RGI60-01.01104  9.528 km2
Starting 22 of 45: RGI60-01.12186  11.05 km2
Starting 23 of 45: R

### Second step: go through those snow cover fraction products to calculate the ELA

In [7]:
all_obs = []
for i in range(len(rgis_to_analyze)):
#     if i>0: continue
    # subset rgi to single outline, by choosing rgiid or rgi name
    rgiid = rgis_to_analyze[i]

    # quickly grab glacier area
    ga = rgi_gdf[rgi_gdf['RGIId']==rgiid]['Area'].values[0]
    
    # define scale
    if ga>1000:
        scale=5
    elif ga>500:
        scale=3
    else:
        scale=1

    # print progress
    print(f"Starting {i+1} of {len(rgis_to_analyze)}: {rgiid}  {ga} km2")
    
    # open the csv hold elevation-band scfs
    open_path = os.path.join(folder_save, 'Average AAs', 'Band SCFs', f"{rgiid}.csv")
    scf_df = pd.read_csv(open_path)
    
    # calculate fraction of each band that was observed and that was snow (relative to observed)
    scf_df['observed_frac'] = scf_df['observed']/scf_df['total_pixels_possible'].fillna(0)
    scf_df['total_snow_frac'] = scf_df['snow']/scf_df['total_pixels_possible'].fillna(0)
    scf_df['snow_frac'] = scf_df['snow']/scf_df['observed'].fillna(0)
    
    ## we need to remove elevation bands that have essentially no observations.
    good_bands = (scf_df['observed']>50)
    
    df_to_use = scf_df[good_bands].reset_index()
#     df_observed = df_observed[good_bands].reset_index()
    
    # transition to numpy array for a bit. snow fractions >=0.5 become 1 (accumulation zone)
    # everything <0.5 becomes 0 (ablation)
    np_accumulation = df_to_use['snow_frac'].values
    np_accumulation[np_accumulation>=0.5] = 1
    np_accumulation[np_accumulation<0.5] = 0
    
    # Define a kernel that sums the next 5 (4,3,2,1...) values along the 2nd dimension
    kernel5 = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0])
    kernel4 = np.array([1, 1, 1, 1, 0, 0, 0])
    kernel3 = np.array([1, 1, 1, 0, 0])
    
    # apply kernel 5, see if we have an elevation band that is start of 5 accumulation bands in a row
    all_elas_5 = np.convolve(np_accumulation, kernel5, mode='same')==5
    all_elas_4 = np.convolve(np_accumulation, kernel4, mode='same')==4
    all_elas_3 = np.convolve(np_accumulation, kernel3, mode='same')==3
    
    first_ela_5 = np.argmax(all_elas_5, axis=0).astype(float)
    first_ela_4 = np.argmax(all_elas_4, axis=0).astype(float)
    first_ela_3 = np.argmax(all_elas_3, axis=0).astype(float)
    
    # if all 0s, replace with nan
    if all_elas_5.sum()==0: first_ela_5 = np.nan
    if all_elas_4.sum()==0: first_ela_4 = np.nan
    if all_elas_3.sum()==0: first_ela_3 = np.nan

    # get the final ela, by first taking from 5, then 4, then 3
    final_ela = first_ela_5
    if np.isnan(final_ela): final_ela = first_ela_4
    if np.isnan(final_ela): final_ela = first_ela_3
        
    # lastly, if we still have no ela (the entire glacier is ablation) we'll put the highest elevation band as the ela
    # make a separate flag for these
    off_glacier_flag = int(np.isnan(final_ela))
    
    # translate final_ela to elevation
    if np.isnan(final_ela):
        final_ela = df_to_use['z_min'].values[-1]
    else:
        final_ela = df_to_use['z_min'][final_ela]
    
    # calculate fraction observed, aar
    final_observed_frac = round(np.nansum(scf_df['observed'])/np.nansum(scf_df['total_pixels_possible']),4)
    final_aar = round(np.nansum(scf_df['snow'])/np.nansum(scf_df['observed']),4)
    total_area = np.nansum(scf_df['total_pixels_possible'])*scale*scale*10*10 / (1000*1000) #km2
    total_observed = np.nansum(scf_df['observed'])*scale*scale*10*10 / (1000*1000) #km2
    total_snow = np.nansum(scf_df['snow'])*scale*scale*10*10 / (1000*1000) #km2
    
    # format all the data for this glacier to save
    final_obs = {"RGIId":rgiid,
                 "ela":int(final_ela),
                 "aar":final_aar,
                 "off_glacier":off_glacier_flag,
                 "total_area":round(total_area,4),
                 "total_observed":round(total_observed,4),
                 "total_snow":round(total_snow,4)}
    
    # save to list
    all_obs.append(final_obs)

# format final df
all_obs_df = pd.DataFrame(all_obs)

# save
out_path = os.path.join(folder_save, 'Average AAs', 'final_glacier_stats.csv')
all_obs_df.to_csv(out_path, index=False)

print('Done!')

Starting 1 of 45: RGI60-01.10910  2.084 km2
Starting 2 of 45: RGI60-01.00787  2.126 km2
Starting 3 of 45: RGI60-01.23606  2.344 km2
Starting 4 of 45: RGI60-01.15253  2.551 km2
Starting 5 of 45: RGI60-01.03379  2.578 km2
Starting 6 of 45: RGI60-01.16719  2.681 km2
Starting 7 of 45: RGI60-01.17321  2.88 km2
Starting 8 of 45: RGI60-01.13462  3.206 km2
Starting 9 of 45: RGI60-01.13483  3.216 km2
Starting 10 of 45: RGI60-01.02584  3.441 km2
Starting 11 of 45: RGI60-01.03215  3.998 km2
Starting 12 of 45: RGI60-01.01666  4.243 km2
Starting 13 of 45: RGI60-01.12548  4.314 km2
Starting 14 of 45: RGI60-01.13930  4.404 km2
Starting 15 of 45: RGI60-01.09624  4.487 km2
Starting 16 of 45: RGI60-01.15516  4.764 km2
Starting 17 of 45: RGI60-01.21721  6.422 km2
Starting 18 of 45: RGI60-01.10255  7.262 km2
Starting 19 of 45: RGI60-01.12165  7.969 km2
Starting 20 of 45: RGI60-01.05007  9.216 km2
Starting 21 of 45: RGI60-01.01104  9.528 km2
Starting 22 of 45: RGI60-01.12186  11.05 km2
Starting 23 of 45: R