In [1]:
import os
import rasterio as rio
import numpy as np
import shapely
import pyproj
import geopandas as gpd
import matplotlib.pyplot as plt
import rioxarray as riox
import rasterio as rio
import xarray as xr
import netCDF4
from osgeo import gdal
import pandas as pd
from datetime import datetime
import dask.array

import sys
sys.path.append('../')
import snowFun

In [2]:
# define folder and file paths
folder_AGVA = os.path.join('C:',os.sep,'Users','lzell','OneDrive - Colostate','Desktop',"AGVA")
folder_dems = os.path.join(folder_AGVA, "DEMs", "time_varying_DEMs", "10m")
folder_class = os.path.join(folder_AGVA, 'classified images', 'S2_Classified_Cloudmasked_Merged')
folder_cloud = os.path.join(folder_AGVA, 'classified images', 'S2_Cloud_Merged')
folder_meta = os.path.join(folder_AGVA, "classified images", "meta csv", "S2")

# open rgi
path_rgi = os.path.join(folder_AGVA, 'RGI', "rgi_2km_o3regions", "rgi_2km_o3regions.shp")
rgi_gdf = gpd.read_file(path_rgi, drop='geometry')

In [8]:
### choose if you want to do only the 45 validation glaciers
validation_only = 1

# open list of validation glaciers
all_validation_df = pd.read_csv(os.path.join(folder_AGVA, 'Validation', 'Validation Glaciers.csv'))

### get list of all the glaciers for which we have calculated the snow covered fractions
# select which rgis to analyze
if validation_only:
    folder_sca = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Validation', 'Band SCFs')
    folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Validation')
else:
    folder_sca = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Band SCFs')
    folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2')

# load rgi names that have been saved to the classified folder
rgis_to_analyze = list(set( [ i[3:17] for i in os.listdir(folder_sca) if i[-3:]=='csv' ] ))
# rgis_to_analyze.sort()

# get list of glacier area for each rgi
areas = [rgi_gdf[rgi_gdf['RGIId']==i]['Area'].values for i in rgis_to_analyze]

# make df
rgis_to_analyze_df = pd.DataFrame({"RGIId":rgis_to_analyze, 'Area':areas})

# sort however you want
rgis_to_analyze_df = rgis_to_analyze_df.sort_values('Area')

# grab rgi names
rgis_to_analyze = rgis_to_analyze_df['RGIId'].values

print(len(rgis_to_analyze_df))
# print(rgis_to_analyze[:10])
# print(rgis_to_analyze_df[:10])

43


In [21]:
skip = 0
for i in range(len(rgis_to_analyze)):
#     if i!=1: continue
    # subset rgi to single outline, by choosing rgiid or rgi name
    rgiid = rgis_to_analyze[i]

    # check if we've already run this glacier. if so, skip
    temp_path = os.path.join(folder_save, 'Annual AAs', 'csv', f"S2_{rgiid}_2018_2022_annual_AAs.csv")
#     if os.path.exists(temp_path):
#         continue

    # quickly grab glacier area
    ga = rgi_gdf[rgi_gdf['RGIId']==rgiid]['Area'].values[0]

    if ga>300: continue
    
    # print progress
    print(f"\nStarting {i+1} of {len(rgis_to_analyze)}: {rgiid}  {ga} km2")
    
    # open the list of the % observed on each date, add a column which will hold post-smoothing %
    obs_df_path = os.path.join(folder_save, 'Daily AAs', 'observed', f"S2_{rgiid}_observed.csv")
    obs_df = pd.read_csv(obs_df_path).rename({'Date':'date'}, axis=1)
    
    # open the snow fraction dataframes
    df_snow = pd.read_csv(os.path.join(folder_sca, f"S2_{rgiid}_snow.csv"))
    df_observed = pd.read_csv(os.path.join(folder_sca, f"S2_{rgiid}_observed.csv"))

    # define the columns that we will be manipulating (ie not the z_min, z_max, etc...)
#     meta_columns = ["z_min", "z_max", "total_pixels"]
    meta_columns = ['z_min', 'z_max'] + [ f"total_pixels_{y}" for y in [2018,2019,2020,2021,2022] ]
    data_cols = df_snow.columns.difference(meta_columns)
    
    # get high, low, mid elevation
    zmin = np.nanmin(df_snow['z_min'])
    zmax = np.nanmin(df_snow['z_max'])
    zmid = np.nanmedian(df_snow['z_min'])
    upper_eles = (df_snow['z_min']>zmid)
    lower_eles = (df_snow['z_min']<=zmid)
    
    # create df with how much of each band was NOT observed
    df_not_observed = df_observed.copy()
    for y in [2018,2019,2020,2021,2022]:
        data_cols_y = [ d for d in data_cols if int(d[:4])==y ]
        for d in data_cols_y:
            to_fill = df_observed[ f"total_pixels_{y}" ] - df_not_observed[d]
            to_fill[lower_eles] = 0
            df_not_observed[d] = to_fill
    
    # calculate the fraction of the entire glacier surface that is visible on each date (after smoothing)
    df_observed_frac_glacier = df_observed[data_cols].sum(axis=0) / df_observed['total_pixels_2018'].sum()
    
    # optionally, make an assumption that everything that wasn't observed in each band in upper elevations is snow
    infill=1
    if infill:
        df_snow[data_cols] = df_snow[data_cols] + df_not_observed[data_cols]
        df_observed[data_cols] = df_observed[data_cols] + df_not_observed[data_cols]
    
    # calculate fraction of each band that was observed on each date
    # have to do this one year at a time, because the total area in each band changes each year
    df_observed_frac = df_observed[data_cols].copy()
    df_snow_frac = df_snow[data_cols].copy()
    for y in [2018,2019,2020,2021,2022]:
        
        # grab column names from this year
        data_cols_y = [ d for d in data_cols if int(d[:4])==y ]
        
        # grab the area of each band this year
        total_pixels_y = df_snow[ f"total_pixels_{y}" ]
        
        # divide count of snow (and observed) in each band by the total pixels in each band that year
        df_snow_frac[data_cols_y] = df_snow_frac[data_cols_y].divide(df_observed[data_cols_y]).fillna(0)
        df_observed_frac[data_cols_y] = df_observed_frac[data_cols_y].divide(total_pixels_y, axis=0).fillna(0)
                          
    # calculate the glacier-wide aar on each date (based on observable surface)
    df_aars = df_snow[data_cols].sum(axis=0) / df_observed[data_cols].sum(axis=0)                  
    
    # transition to numpy arrays for a bit. snow fractions >=0.5 become 1 (accumulation zone)
    # everything <0.5 becomes 0 (ablation)
    np_accumulation = df_snow_frac.copy()
    np_accumulation[np_accumulation>=0.5] = 1
    np_accumulation[np_accumulation<0.5] = 0
    
    # Define a kernel that sums the next 5 (4,3,2,1...) values along the 2nd dimension
    kernel5 = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0])
    kernel4 = np.array([1, 1, 1, 1, 0, 0, 0])
    kernel3 = np.array([1, 1, 1, 0, 0])
    kernel2 = np.array([1, 1, 0])
    kernel1 = np.array([1,])
    
    # apply these kernels, find elevation bands that are the start of 5 accumulation bands in a row (or 4,3,2,1)
    all_elas_5 = (np.apply_along_axis(lambda x: np.convolve(x, kernel5, mode='same'), axis=0, arr=np_accumulation))==5
    all_elas_4 = (np.apply_along_axis(lambda x: np.convolve(x, kernel4, mode='same'), axis=0, arr=np_accumulation))==4
    all_elas_3 = (np.apply_along_axis(lambda x: np.convolve(x, kernel3, mode='same'), axis=0, arr=np_accumulation))==3
    all_elas_2 = (np.apply_along_axis(lambda x: np.convolve(x, kernel3, mode='same'), axis=0, arr=np_accumulation))==2
    all_elas_1 = (np.apply_along_axis(lambda x: np.convolve(x, kernel3, mode='same'), axis=0, arr=np_accumulation))==1

    # so the lowest elevation point in each time that is equal to 5 (4,3,2,1) will be the ela
    first_elas_5 = np.argmax(all_elas_5, axis=0).astype(float)
    first_elas_4 = np.argmax(all_elas_4, axis=0).astype(float)
    first_elas_3 = np.argmax(all_elas_3, axis=0).astype(float)
    first_elas_2 = np.argmax(all_elas_3, axis=0).astype(float)
    first_elas_1 = np.argmax(all_elas_3, axis=0).astype(float)
    
    # make sure that if a column has all 0s then we put nan, to allow filling in
    first_elas_5[(all_elas_5.sum(axis=0)==0)] = np.nan
    first_elas_4[(all_elas_4.sum(axis=0)==0)] = np.nan
    first_elas_3[(all_elas_3.sum(axis=0)==0)] = np.nan
    first_elas_2[(all_elas_2.sum(axis=0)==0)] = np.nan
    first_elas_1[(all_elas_1.sum(axis=0)==0)] = np.nan
    
    # get the final elas, by first taking from 5, then 4, then 3
    final_elas = first_elas_5
    final_elas[np.isnan(final_elas)] = first_elas_4[np.isnan(final_elas)]
    final_elas[np.isnan(final_elas)] = first_elas_3[np.isnan(final_elas)]

    # lastly, if we still have no ela (the entire glacier is ablation) we'll put the highest elevation band as the ela
    # make a separate flag for these
    off_glacier_flag = np.isnan(final_elas)
    final_elas[np.isnan(final_elas)] = np_accumulation.shape[0]-1
    
    # create series to link ela band # to band elevation
    bands_zs = (df_snow['z_min'])
    
    # now that we have our elas for each time step, lets format into a df with date, ela, aar, and save
    ela_df = pd.DataFrame({'date':data_cols})
    ela_df['ela'] = [bands_zs[int(i)] for i in final_elas]
    ela_df['fraction_observed_slopemasked'] = df_observed_frac_glacier.values.round(4)
    ela_df['aar'] = df_aars.values.round(4)
    ela_df['off_glacier'] = off_glacier_flag
    
    # add in the initial observed percent and % terrain shadow info
    ela_df = ela_df.merge(obs_df, how='left', on='date')
    
    out_path = os.path.join(folder_save, "ELAs", f"{rgiid}_ELAs.csv")
    ela_df.to_csv(out_path, index=False)
    
    continue
#     # throw out anything that has <XYZ% observed
#     ela_df_good = ela_df[ela_df['fraction_observed']>0.8]
    
#     # now for each year, get the maximum ela
#     annual_best = []
#     for y in [2018,2019,2020,2021,2022]:
        
#         # decide which of the columns you are going to want to save
#         cols_to_save = ['date', 'ela', 'fraction_observed', 'aar', 'off_glacier']
        
#         # subset to this year. note we only consider obs in the july-november time frame
#         elas_this_year = ela_df_good[ (ela_df_good['date']>=f'{y}-07-01') & (ela_df_good['date']<f'{y}-12-01')]
        
#         # if len=0, we have no good data so we save a null point
#         n_obs = len(elas_this_year)
#         if n_obs==0:
#             annual_best.append(pd.Series({"date":f"{y}-01-01", "ela":-9999, 'fraction_observed':-9999, 'aar':-9999, 'off_glacier':-9999}))
        
#         # else we decide which the best of the elas is and save that info
#         else:
            
#             # find the maximum ela
#             ela_max = np.nanmax(elas_this_year['ela'])

#             # get all the dates that had this ela
#             ela_max_all = elas_this_year[elas_this_year['ela']==ela_max]

#             # we'll take the one that has the minimum aar to be the 'best' estimate
#             # for example, if the ela is well above the glacier, there are likely many dates with the same ela found
#             ela_max_best = ela_max_all.loc[ela_max_all['aar'].idxmax()]
            
#             # gotta make sure that this is not an outlier in ELA or AAR
            
#             # save this data
#             annual_best.append(ela_max_best[cols_to_save])
            
#     ### save all the data here
    
#     # function to format metadata and attributes
#     def format_xr_to_save(xr_da):
#         xr_da.attrs["res"] = (10,10)
#         xr_da.attrs["crs"] = "EPSG:3338"
#         xr_da.attrs["transform"] = [10,0,0,0,-10,0]
#         xr_da.attrs["_FillValue"] = 0
#         xr_da.attrs["long_name"] = rgiid
#         xr_da.attrs["description"] = "0: nan, 1: ablation, 2: accumulation"
#         xr_da.name = "accumulation_area"

#         xr_da.x.attrs["units"] = "meters"
#         xr_da.y.attrs["units"] = "meters"
#         xr_da.x.attrs["long_name"] = 'x'
#         xr_da.y.attrs["long_name"] = 'y'

#         return xr_da
    
#     # set folder paths, etc...
#     path_df_all = os.path.join(folder_save, 'Annual AAs', 'csv', f"S2_{rgiid}_2018_2022_annual_AAs.csv")
#     path_xr_all = os.path.join(folder_save, 'Annual AAs', f"S2_{rgiid}_2018_2022_annual_AAs.nc")
#     path_xr_avg = os.path.join(folder_save, 'Average AAs', f"S2_{rgiid}_2018_2022_average_AA.nc")
#     path_tif_avg = os.path.join(folder_save, 'Average AAs', f"S2_{rgiid}_2018_2022_average_AA.tif")
    
#     # one year at a time, get the snow distributions
#     all_maps = []
#     for s in annual_best:
        
#         # get date
#         d = s['date']
        
#         # skip the null years
#         if d[5:7]=='01': continue
        
#         # open the snow distribution map
#         if ga>500:
#             path_open = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{d[:4]}_daily_AAs_shadowed_coarse_smoothed.nc")
#         else:
#             path_open = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{d[:4]}_daily_AAs_shadowed_smoothed.nc")
        
#         snow = xr.open_dataset(path_open, chunks={'time':1})['class'].sel(time=d)
#         all_maps.append(snow)
    
#     # format to save maps as long as there is usable imagery
#     if len(all_maps)>0:
#         all_maps = xr.concat(all_maps, dim='time')
#         average_map = all_maps.median('time', skipna=True).astype('uint8')#+glacier_mask
#         save_xr_all = format_xr_to_save(all_maps.astype('uint8'))
#         save_xr_avg = format_xr_to_save(average_map)

#         # specify compression/encoding
#         encoding = {"accumulation_area":{"zlib": True}}#, "spatial_ref":{"zlib": False}}

#         # save
#         save_xr_all.to_netcdf(path_xr_all, encoding=encoding)
#         save_xr_avg.to_netcdf(path_xr_avg, encoding=encoding)
#         save_xr_avg.rio.to_raster(raster_path=path_tif_avg, encoding=encoding)

#     # always save the csv
#     annual_best = pd.DataFrame(annual_best)
#     annual_best.to_csv(path_df_all, index=False) # table with annual end-of-summer ela, aar, date, fraction observed

print('All done!')


Starting 1 of 43: RGI60-01.10910  2.084 km2

Starting 2 of 43: RGI60-01.00787  2.126 km2

Starting 3 of 43: RGI60-01.23606  2.344 km2

Starting 4 of 43: RGI60-01.15253  2.551 km2

Starting 5 of 43: RGI60-01.03379  2.578 km2

Starting 6 of 43: RGI60-01.16719  2.681 km2

Starting 7 of 43: RGI60-01.17321  2.88 km2

Starting 8 of 43: RGI60-01.13462  3.206 km2

Starting 9 of 43: RGI60-01.13483  3.216 km2

Starting 10 of 43: RGI60-01.02584  3.441 km2

Starting 11 of 43: RGI60-01.03215  3.998 km2

Starting 12 of 43: RGI60-01.01666  4.243 km2

Starting 13 of 43: RGI60-01.12548  4.314 km2

Starting 14 of 43: RGI60-01.13930  4.404 km2

Starting 15 of 43: RGI60-01.09624  4.487 km2

Starting 16 of 43: RGI60-01.15516  4.764 km2

Starting 17 of 43: RGI60-01.21721  6.422 km2

Starting 18 of 43: RGI60-01.10255  7.262 km2

Starting 19 of 43: RGI60-01.12165  7.969 km2

Starting 20 of 43: RGI60-01.05007  9.216 km2

Starting 21 of 43: RGI60-01.01104  9.528 km2

Starting 22 of 43: RGI60-01.12186  11.05 km

In [24]:
ela_df.head(50)

Unnamed: 0,date,ela,fraction_observed_slopemasked,aar,off_glacier,observed_initial,terrain_shadows,observed_after_smoothing
0,2018-05-06,400.0,0.9886,0.9203,False,0.977,0.0027,0.977
1,2018-05-14,410.0,0.9857,0.8987,False,0.0987,0.0019,0.9727
2,2018-05-16,410.0,0.9872,0.8971,False,0.972,0.0018,0.9758
3,2018-05-19,410.0,0.9872,0.8971,False,0.5223,0.0016,0.9758
4,2018-05-29,440.0,0.803,0.8682,False,0.7631,0.0012,0.813
5,2018-05-31,440.0,0.803,0.8682,False,0.1431,0.0012,0.813
6,2018-06-05,440.0,0.8782,0.9018,False,0.6293,0.0011,0.8849
7,2018-06-08,480.0,0.7087,0.9576,False,0.0701,0.001,0.6986
8,2018-06-10,490.0,0.8017,0.9421,False,0.281,0.001,0.8145
9,2018-06-13,760.0,0.978,0.7897,False,0.7136,0.0009,0.9702


In [12]:
df_not_observed.head()

Unnamed: 0,z_min,z_max,total_pixels_2018,total_pixels_2019,total_pixels_2020,total_pixels_2021,total_pixels_2022,2018-05-07,2018-05-09,2018-05-12,...,2022-10-03,2022-10-10,2022-10-13,2022-10-23,2022-10-25,2022-11-04,2022-11-12,2022-11-17,2022-11-24,2022-11-29
0,10.0,20.0,159.0,161.0,163.0,158.0,151.0,13.0,13.0,13.0,...,20.0,20.0,147.0,151.0,151.0,151.0,151.0,29.0,9.0,11.0
1,20.0,30.0,244.0,244.0,243.0,246.0,249.0,26.0,26.0,26.0,...,45.0,43.0,227.0,249.0,249.0,249.0,249.0,32.0,10.0,4.0
2,30.0,40.0,391.0,387.0,378.0,375.0,374.0,49.0,49.0,49.0,...,35.0,35.0,351.0,374.0,374.0,374.0,374.0,12.0,0.0,7.0
3,40.0,50.0,459.0,460.0,453.0,464.0,459.0,80.0,80.0,80.0,...,4.0,4.0,428.0,459.0,459.0,459.0,459.0,6.0,6.0,16.0
4,50.0,60.0,501.0,499.0,519.0,505.0,510.0,90.0,89.0,89.0,...,0.0,0.0,456.0,508.0,508.0,510.0,510.0,7.0,6.0,14.0


In [13]:
df_observed.head()

Unnamed: 0,z_min,z_max,total_pixels_2018,total_pixels_2019,total_pixels_2020,total_pixels_2021,total_pixels_2022,2018-05-07,2018-05-09,2018-05-12,...,2022-10-03,2022-10-10,2022-10-13,2022-10-23,2022-10-25,2022-11-04,2022-11-12,2022-11-17,2022-11-24,2022-11-29
0,10.0,20.0,159.0,161.0,163.0,158.0,151.0,146.0,146.0,146.0,...,131.0,131.0,4.0,0.0,0.0,0.0,0.0,122.0,142.0,140.0
1,20.0,30.0,244.0,244.0,243.0,246.0,249.0,218.0,218.0,218.0,...,204.0,206.0,22.0,0.0,0.0,0.0,0.0,217.0,239.0,245.0
2,30.0,40.0,391.0,387.0,378.0,375.0,374.0,342.0,342.0,342.0,...,339.0,339.0,23.0,0.0,0.0,0.0,0.0,362.0,374.0,367.0
3,40.0,50.0,459.0,460.0,453.0,464.0,459.0,379.0,379.0,379.0,...,455.0,455.0,31.0,0.0,0.0,0.0,0.0,453.0,453.0,443.0
4,50.0,60.0,501.0,499.0,519.0,505.0,510.0,411.0,412.0,412.0,...,510.0,510.0,54.0,2.0,2.0,0.0,0.0,503.0,504.0,496.0
