In [1]:
import os
import rasterio as rio
import numpy as np
import shapely
import pyproj
import geopandas as gpd
import matplotlib.pyplot as plt
import rioxarray as riox
import rasterio as rio
import xarray as xr
import netCDF4
from osgeo import gdal
import pandas as pd
from datetime import datetime
import snowFun
import dask.array
# %matplotlib widget

# define folder and file paths
folder_AGVA = os.path.join('C:',os.sep,'Users','lzell','OneDrive - Colostate','Desktop',"AGVA")
folder_dems = os.path.join(folder_AGVA, "DEMs", "time_varying_DEMs", "10m")
folder_class = os.path.join(folder_AGVA, 'classified images', 'S2_Classified_Cloudmasked_Merged')
folder_cloud = os.path.join(folder_AGVA, 'classified images', 'S2_Cloud_Merged')
folder_meta = os.path.join(folder_AGVA, "classified images", "meta csv", "S2")
folder_sca = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Band SCFs')

# open rgi
path_rgi = os.path.join(folder_AGVA, 'RGI', "01_rgi60_Alaska", "01_rgi60_Alaska.shp")
rgi_gdf = gpd.read_file(path_rgi, drop='geometry')

In [2]:
### get list of all the glaciers for which we have calculated the snow covered fractions
# load rgi names that have been saved to the classified folder
rgis_to_analyze = list(set( [ i[3:17] for i in os.listdir(folder_sca) if i!='merged.vrt' ] ))
rgis_to_analyze.sort()

In [3]:
skip = 0
for i in range(len(rgis_to_analyze)):
#     if i>0: continue
    # subset rgi to single outline, by choosing rgiid or rgi name
    rgiid = rgis_to_analyze[i]
    
#     # options for skipping
#     if (i+1)<308:continue #option to skip, NEED TO GO BACK TO 241, 277, 280, 290, 291
    
    if rgiid != "RGI60-01.09162": continue
#     if i not in [30]: continue
#     if rgiid == "RGI60-01.09162": skip=0
#     if skip: continue

    # quickly grab glacier area
    ga = rgi_gdf[rgi_gdf['RGIId']==rgiid]['Area']
    
    # print progress
    print(f"\nStarting {i+1} of {len(rgis_to_analyze)}: {rgiid}  {ga.values[0]} km2")
    
    # open the snow fraction dataframes
    df_snow = pd.read_csv(os.path.join(folder_sca, f"S2_{rgiid}_snow.csv")).rename(columns={'Unnamed: 0':"band_n"}).set_index('band_n')
    df_observed = pd.read_csv(os.path.join(folder_sca, f"S2_{rgiid}_observed.csv")).rename(columns={'Unnamed: 0':"band_n"}).set_index('band_n')

    # define the columns that we will be manipulating (ie not the z_min, z_max, etc...)
    meta_columns = ["z_min", "z_max", "total_pixels"]
    data_cols = df_snow.columns.difference(meta_columns)
    
    # create series to link ela band # to band elevation
    bands_zs = (df_snow['z_min']+df_snow['z_max'])/2
    
    # calculate the fraction of each band that was observed in each date
    df_observed_frac = df_observed[data_cols].divide(df_observed["total_pixels"], axis=0)
    
    # calculate fraction of each band that was snow (divide n_snow by n_observed)
    df_snow_frac = df_snow[data_cols].divide(df_observed[data_cols])#, axis=0)
    
    # calculate the fraction of the entire glacier surface that is visible on each date
    df_observed_frac_glacier = df_observed[data_cols].sum(axis=0) / df_observed['total_pixels'].sum()
    
    # calculate the aar on each date (based on observable surface)
    df_aars = df_snow[data_cols].sum(axis=0) / df_observed[data_cols].sum(axis=0)
    # there are instances where a large portion of the ablation zone is nan (mostly debris cover). we should do
    # some interpolatino or something before calculating aar. eg fill in accumulation above the ela, ablation below
    
    # transition to numpy arrays for a bit. snow fractions >=0.5 become 1 (accumulation zone)
    # everything <0.5 becomes 0 (ablation)
    np_accumulation = df_snow_frac.copy()
    np_accumulation[np_accumulation>=0.5] = 1
    np_accumulation[np_accumulation<0.5] = 0
    
    # Define a kernel that sums the next 5 (4,3,2,1...) values along the 2nd dimension
    kernel5 = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0])
    kernel4 = np.array([1, 1, 1, 1, 0, 0, 0])
    kernel3 = np.array([1, 1, 1, 0, 0])
    kernel2 = np.array([1, 1, 0])
    kernel1 = np.array([1,])
    
    # apply these kernels, find elevation bands that are the start of 5 accumulation bands in a row (or 4,3,2,1)
    all_elas_5 = (np.apply_along_axis(lambda x: np.convolve(x, kernel5, mode='same'), axis=0, arr=np_accumulation))==5
    all_elas_4 = (np.apply_along_axis(lambda x: np.convolve(x, kernel4, mode='same'), axis=0, arr=np_accumulation))==4
    all_elas_3 = (np.apply_along_axis(lambda x: np.convolve(x, kernel3, mode='same'), axis=0, arr=np_accumulation))==3
    all_elas_2 = (np.apply_along_axis(lambda x: np.convolve(x, kernel3, mode='same'), axis=0, arr=np_accumulation))==2
    all_elas_1 = (np.apply_along_axis(lambda x: np.convolve(x, kernel3, mode='same'), axis=0, arr=np_accumulation))==1

    # so the lowest elevation point in each time that is equal to 5 (4,3,2,1) will be the ela
    first_elas_5 = np.argmax(all_elas_5, axis=0).astype(float)
    first_elas_4 = np.argmax(all_elas_4, axis=0).astype(float)
    first_elas_3 = np.argmax(all_elas_3, axis=0).astype(float)
    first_elas_2 = np.argmax(all_elas_3, axis=0).astype(float)
    first_elas_1 = np.argmax(all_elas_3, axis=0).astype(float)
    
    # make sure that if a column has all 0s then we put nan, to allow filling in
    first_elas_5[(all_elas_5.sum(axis=0)==0)] = np.nan
    first_elas_4[(all_elas_4.sum(axis=0)==0)] = np.nan
    first_elas_3[(all_elas_3.sum(axis=0)==0)] = np.nan
    first_elas_2[(all_elas_2.sum(axis=0)==0)] = np.nan
    first_elas_1[(all_elas_1.sum(axis=0)==0)] = np.nan
    
    # get the final elas, by first taking from 5, then 4, then 3
    final_elas = first_elas_5
    final_elas[np.isnan(final_elas)] = first_elas_4[np.isnan(final_elas)]
    final_elas[np.isnan(final_elas)] = first_elas_3[np.isnan(final_elas)]
#     final_elas[np.isnan(final_elas)] = first_elas_2[np.isnan(final_elas)]
#     print(np.count_nonzero(~np.isnan(final_elas)))
#     final_elas[np.isnan(final_elas)] = first_elas_1[np.isnan(final_elas)]
#     print(np.count_nonzero(~np.isnan(final_elas)))
    
    # lastly, if we still have no ela (the entire glacier is ablation) we'll put the highest elevation band as the ela
    # we should find some way to flag this, to make it more clear
    final_elas[np.isnan(final_elas)] = np_accumulation.shape[0]-1
    
#     fig,axs = plt.subplots(1,2, figsize=(10,5))
#     axs[0].imshow(df_snow_frac, vmin=0, vmax=1, cmap="RdBu")
#     axs[1].imshow(df_snow_frac, vmin=0, vmax=1, cmap="RdBu")
#     axs[1].plot(final_elas, c='yellow')
#     plt.tight_layout()
    
    # now that we have our elas for each time step, lets format into a df with date, ela
    ela_df = pd.DataFrame({'date':data_cols, "ela_band":(final_elas+1).astype(int)})
    ela_df['ela'] = [bands_zs[int(i+1)] for i in final_elas]
    ela_df['fraction_observed'] = df_observed_frac_glacier.values
    ela_df['aar'] = df_aars.values
    
    # lets also separately get the 'ideal' ela-aar relationship
#     plt.figure()
#     plt.scatter(ela_df['ela'], ela_df['aar'], c=ela_df.index)
    
    # now for each year, get the maximum ela
    annual_best = []
    for y in [2018,2019,2020,2021,2022]:
        
        # decide which of the columns you are going to want to save
        cols_to_save = ['date', 'ela', 'fraction_observed', 'aar']
        
        # subset to this year. note we only consider obs in the july-november time frame
        elas_this_year = ela_df[ (ela_df['date']>=f'{y}-07-01') & (ela_df['date']<f'{y}-12-01')]
        
        # if len=0, we have no good data so we save a null point
        n_obs = len(elas_this_year)
        if len(n_obs)==0:
            annual_best.append(pd.Series({"date":f"{y}-01-01", "ela":-9999, 'fraction_observed':-9999, 'aar':-9999}))
        
        # else we decide which the best of the elas is and save that info
        else:
            
            # find the maximum ela
            ela_max = np.nanmax(elas_this_year['ela'])

            # get all the dates that had this ela
            ela_max_all = elas_this_year[elas_this_year['ela']==ela_max]

            # we'll take the one that has the minimum aar to be the 'best' estimate
            # for example, if the ela is well above the glacier, there are likely many dates with the same ela found
            ela_max_best = ela_max_all.loc[ela_max_all['aar'].idxmax()]
            
            # gotta make sure that this is not an outlier in ELA or AAR
            
            # save this data
            annual_best.append(ela_max_best[cols_to_save])

            
    ### save all the data here
    
    # function to format metadata and attributes
    def format_xr_to_save(xr_da):
        xr_da.attrs["res"] = (10,10)
        xr_da.attrs["crs"] = "EPSG:3338"
        xr_da.attrs["transform"] = [10,0,0,0,-10,0]
        xr_da.attrs["_FillValue"] = 0
        xr_da.attrs["long_name"] = rgiid
        xr_da.attrs["description"] = "0: nan, 1: ablation, 2: accumulation"
        xr_da.name = "accumulation_area"

        xr_da.x.attrs["units"] = "meters"
        xr_da.y.attrs["units"] = "meters"
        xr_da.x.attrs["long_name"] = 'x'
        xr_da.y.attrs["long_name"] = 'y'

        return xr_da
    
    # set folder paths, etc...
    folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2')
    path_df_all = os.path.join(folder_save, 'csv', f"S2_{rgiid}_2018_2022_annual_AAs.csv")
    path_xr_all = os.path.join(folder_save, 'Annual AAs', f"S2_{rgiid}_2018_2022_annual_AAs.nc")
    path_xr_avg = os.path.join(folder_save, 'Average AAs', f"S2_{rgiid}_2018_2022_average_AA.nc")
    path_tif_avg = os.path.join(folder_save, 'Average AAs', f"S2_{rgiid}_2018_2022_average_AA.tif")
    
    # one year at a time, get the snow distributions
    all_maps = []
    for s in annual_best:
        
        # get date
        d = s['date']
        
        # skip the null years
        if d[5:7]=='01': continue
        
        # open the snow distribution map
        path_open = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{d[:4]}_daily_AAs_smoothed.nc")
        snow = xr.open_dataset(path_open, chunks={'time':1})['class'].sel(time=d)
        all_maps.append(snow)
    
    # format to save maps as long as there is usable imagery
    if len(all_maps)>0:
        all_maps = xr.concat(all_maps, dim='time')
        average_map = all_maps.median('time', skipna=True).astype('uint8')#+glacier_mask
        save_xr_all = format_xr_to_save(all_maps.astype('uint8'))
        save_xr_avg = format_xr_to_save(average_map)

        # specify compression/encoding
        encoding = {"accumulation_area":{"zlib": True}}#, "spatial_ref":{"zlib": False}}

        # save
        save_xr_all.to_netcdf(path_xr_all, encoding=encoding)
        save_xr_avg.to_netcdf(path_xr_avg, encoding=encoding)
        save_xr_avg.rio.to_raster(raster_path=path_tif_avg, encoding=encoding)

    # always save the csv
    annual_best = pd.DataFrame(annual_best)
    annual_best.to_csv(path_df_all, index=False) # table with annual end-of-summer ela, aar, date, fraction observed

print('All done!')


Starting 1 of 483: RGI60-01.01546  6.078 km2

Starting 2 of 483: RGI60-01.01560  3.87 km2

Starting 3 of 483: RGI60-01.01573  2.809 km2

Starting 4 of 483: RGI60-01.01589  2.757 km2

Starting 5 of 483: RGI60-01.01602  4.242 km2

Starting 6 of 483: RGI60-01.01606  2.629 km2

Starting 7 of 483: RGI60-01.01607  9.649 km2

Starting 8 of 483: RGI60-01.01623  4.46 km2

Starting 9 of 483: RGI60-01.01625  2.233 km2

Starting 10 of 483: RGI60-01.01632  2.834 km2

Starting 11 of 483: RGI60-01.01637  2.718 km2

Starting 12 of 483: RGI60-01.01644  2.189 km2

Starting 13 of 483: RGI60-01.01647  2.783 km2

Starting 14 of 483: RGI60-01.01648  11.497 km2

Starting 15 of 483: RGI60-01.01649  3.919 km2

Starting 16 of 483: RGI60-01.01666  4.243 km2

Starting 17 of 483: RGI60-01.01670  3.755 km2

Starting 18 of 483: RGI60-01.01671  2.138 km2

Starting 19 of 483: RGI60-01.01681  2.417 km2

Starting 20 of 483: RGI60-01.01682  3.876 km2

Starting 21 of 483: RGI60-01.01684  3.631 km2

Starting 22 of 483: RG


Starting 174 of 483: RGI60-01.09571  4.47 km2

Starting 175 of 483: RGI60-01.09581  25.839 km2

Starting 176 of 483: RGI60-01.09592  14.225 km2

Starting 177 of 483: RGI60-01.09599  3.891 km2

Starting 178 of 483: RGI60-01.09606  5.624 km2

Starting 179 of 483: RGI60-01.09608  12.677 km2

Starting 180 of 483: RGI60-01.09616  3.042 km2

Starting 181 of 483: RGI60-01.09618  91.801 km2

Starting 182 of 483: RGI60-01.09619  3.46 km2

Starting 183 of 483: RGI60-01.09624  4.487 km2

Starting 184 of 483: RGI60-01.09635  17.437 km2

Starting 185 of 483: RGI60-01.09638  4.728 km2

Starting 186 of 483: RGI60-01.09639  377.247 km2

Starting 187 of 483: RGI60-01.09641  13.525 km2

Starting 188 of 483: RGI60-01.09647  5.33 km2

Starting 189 of 483: RGI60-01.09656  13.791 km2

Starting 190 of 483: RGI60-01.09658  5.242 km2

Starting 191 of 483: RGI60-01.09664  11.962 km2

Starting 192 of 483: RGI60-01.09672  4.319 km2

Starting 193 of 483: RGI60-01.09674  3.109 km2

Starting 194 of 483: RGI60-01.09


Starting 344 of 483: RGI60-01.11240  3.263 km2

Starting 345 of 483: RGI60-01.17637  3.807 km2

Starting 346 of 483: RGI60-01.17659  2.503 km2

Starting 347 of 483: RGI60-01.17681  7.526 km2

Starting 348 of 483: RGI60-01.17711  2.401 km2

Starting 349 of 483: RGI60-01.17720  2.409 km2

Starting 350 of 483: RGI60-01.17726  4.334 km2

Starting 351 of 483: RGI60-01.17748  7.195 km2

Starting 352 of 483: RGI60-01.17761  307.536 km2

Starting 353 of 483: RGI60-01.17767  2.628 km2

Starting 354 of 483: RGI60-01.17769  2.267 km2

Starting 355 of 483: RGI60-01.17771  24.315 km2

Starting 356 of 483: RGI60-01.17774  171.454 km2

Starting 357 of 483: RGI60-01.17780  3.404 km2

Starting 358 of 483: RGI60-01.17782  98.233 km2

Starting 359 of 483: RGI60-01.17784  14.773 km2

Starting 360 of 483: RGI60-01.17795  6.899 km2

Starting 361 of 483: RGI60-01.17797  2.249 km2

Starting 362 of 483: RGI60-01.17798  3.719 km2

Starting 363 of 483: RGI60-01.17803  198.091 km2

Starting 364 of 483: RGI60-01.

In [13]:
# lastly, go through all the saved csvs, make a master csv with rgiid, median ela
all_files = [ i for i in os.listdir(os.path.join(folder_save, 'csv')) if i[-3:]=='csv' ]

all_rgis = []
all_elas = []
all_aars = []

for f in all_files:
    
    # open
    df = pd.read_csv(os.path.join(folder_save, 'csv', f))
    
    # get rgi
    all_rgis.append( f[3:17] )
    
    # get rid of the 'bad' years
    df = df[df['ela']>0]
    
    # get median ela, aar
    all_elas.append( np.nanmedian(df['ela']) )
    all_aars.append( np.nanmedian(df['aar']) )

# save everything
bulk_df = pd.DataFrame({'rgiid':all_rgis, 'ela':all_elas, 'aar':all_aars}) 
bulk_df.head()
# bulk_df.to_csv(os.path.join(folder_save, 'Region_04_2018_2022.csv'), index=False)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,rgiid,ela,aar
0,RGI60-01.01546,1750.0,0.090094
1,RGI60-01.01560,1470.0,0.114236
2,RGI60-01.01573,1380.0,0.10088
3,RGI60-01.01589,1290.0,0.066725
4,RGI60-01.01602,1300.0,0.403913
