In [1]:
import os
import rasterio as rio
import numpy as np
import shapely
import pyproj
import geopandas as gpd
import matplotlib.pyplot as plt
import rioxarray as riox
import rasterio as rio
import xarray as xr
import netCDF4
from osgeo import gdal
import pandas as pd
import snowFun
import dask.array
import importlib
importlib.reload(snowFun)
# %matplotlib widget

# define folder and file paths
folder_AGVA = os.path.join('C:',os.sep,'Users','lzell','OneDrive - Colostate','Desktop',"AGVA")
folder_dems = os.path.join(folder_AGVA, "DEMs", "time_varying_DEMs", "10m")
folder_class = os.path.join(folder_AGVA, 'classified images', 'S2_Classified_Cloudmasked_Merged')
folder_cloud = os.path.join(folder_AGVA, 'classified images', 'S2_Cloud_Merged')
folder_meta = os.path.join(folder_AGVA, "classified images", "meta csv", "S2")

In [2]:
# open rgi
path_rgi = os.path.join(folder_AGVA, 'RGI', "01_rgi60_Alaska", "01_rgi60_Alaska.shp")
rgi_gdf = gpd.read_file(path_rgi)

In [3]:
# subset rgi to single outline, by choosing rgiid or rgi name
# Wolverine RGIId: RGI60-01.09162
# Gulkana RGIId: RGI60-01.00570

rgiid = "RGI60-01.01731" # really big glacier
# rgiid = "RGI60-01.09162" # wolverine
rginame = 0

# get the rgi id for the given glacier name, if a name has been given
if rginame:
    rgiid = rgi_gdf[rgi_gdf['Name']==rginame]['RGIId'].values[0]

# grab just this rgi geometry and info
rgi_single = rgi_gdf[rgi_gdf['RGIId']==rgiid].to_crs("EPSG:3338")
single_geometry = rgi_single.geometry

# single_geometry = single_geometry.buffer(-100) #what if we buffer out the exterior 100 meters of the glacier

## open the raw classification data, merge by date, mask out shadow and cloud areas, remove dates with no usable imagery, and then resave

In [4]:
# open the glacier classification, clipping to the glacier outline. note we use dask with "auto" chunks
# we will rechunk later
file_name = f"S2_{rgiid}_2018-01-01_2023-01-01"
xr_class = riox.open_rasterio(os.path.join(folder_class, f"{file_name}.tif"), chunks='auto').rio.clip(single_geometry, from_disk=True, drop=True).chunk()

# load metadata csv, convert date to datetimes
meta_fp = os.path.join(folder_meta, f"{file_name}.csv")
meta_df = pd.read_csv(meta_fp)

# format time axis to pandas datetime, like xarray wants
datetimes = pd.to_datetime([f"{str(i)[:4]}-{str(i)[4:6]}-{str(i)[6:]}" for i in meta_df['date']])
xr_class = xr_class.rename({"band":"time"})
xr_class['time'] = datetimes

# create quick binary glacier mask of 0 and 1
glacier_mask = xr_class.max(dim='time')
glacier_mask = (glacier_mask>0)#.astype(int)

In [None]:
# merge images on same day
xr_class = xr_class.where(xr_class<20, 0).groupby('time').max('time')

# get these merged dates
datetimes_merged = xr_class.time.values

### create binary mask of useable and unuseable data, use it to mask the xr_class to only this area
bad_classes = [5] #class 5 is shadow. we will call these area unusable
good_classes = [1,2,3,4,6] #snow,firn,ice,debris,water are usable areas
# usable = xr.where( xr_class.isin(good_classes), 1, 0)

# count total number of pixels on the glacier surface
glacier_pixels = glacier_mask.sum().values 

# now we can mask out unusable areas in each time step
xr_class = xr.where(xr_class.isin(good_classes), xr_class, 0) #.sel(time=good_times) # note we subset to only the good days

# count useable pixels on each day
count_usable_by_time = np.count_nonzero(xr_class, axis=(1,2))

# calculate percent of the glacier surface that is usable on each day
percent_usable_by_time = count_usable_by_time/glacier_pixels

# now lets throw out days where there is essentially no usable data
good_times = (percent_usable_by_time>0.05)

# print(xr_class)
xr_class = xr_class.sel(time=good_times)
# print(xr_class.shape)

# at this point, xr_class is 0 off-glacier, 0 in shadow/cloud, and 1-6 in usable areas
# save indidivual years at this point
folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2')

# save one year at a time
for y in [2018,2019,2020,2021,2022]:
    print(y)
    path_save = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs.nc")

    save_xr = xr_class.sel(time=slice(f"{y-1}-12-01", f"{y+1}-01-31")).astype('uint8').rename('class')

    # specify compression/encoding
    encoding = {"class":{"zlib": True}}#, "spatial_ref":{"zlib": False}}

    # save
    save_xr.to_netcdf(path_save, encoding=encoding)

### Then we'll have to load each year back in to do the rolling calculation of snow, and we can resave over the same file (or resave to new file, if we're not concerned about storage space)

In [None]:
# doing rolling smoothing one year at a time
for y in [2018,2019,2020,2021,2022]:
    print(y)
    
    # open the data with each day being its own chunk
    path_open = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs.nc")
    snow = xr.open_dataset(path_open, chunks={'time': 1, 'y': 'auto', 'x': 'auto'})
    
    # make 1=snow, 0=ablation, nan=cloud,shadow,off-glacier
    snow = snow.where(snow!=0, np.nan).where(snow<=1, 0)

    # create new empty dataarray with same x/y shape but 1-day frequency, using dask
    time_values = pd.to_datetime(snow.time.values)
    new_time_values = pd.date_range(start=time_values.min(), end=time_values.max(), freq='D')

    # define chunk size for our data. make each pixel its own chunk (all obs of that pixel through time)
    xy_chunk = 100
    chunks_new_flat = (1, len(snow.y), len(snow.x)) # initially we will have each day be its own chunk
#     chunks_new_flat = {'time':30, 'y':len(snow.y), 'x':len(snow.x)}
#     chunks_new_long = (len(new_time_values), xy_chunk, xy_chunk) # before doing rolling() over time axis we will rechunk

    # make nan daily cadence array
    snow2 = dask.array.full((len(new_time_values), len(snow.y), len(snow.x)), np.nan, chunks=chunks_new_flat)# overlapping={'time': 15})
    snow2 = xr.DataArray(snow2, coords={'time': new_time_values, 'y': snow.y, 'x': snow.x},
                                dims=('time', 'y', 'x'))

    # insert observations into this
    snow2 = xr.merge([snow2, snow.rename({'class':'snow'})], compat='override').snow.combine_first(snow2)#.chunk(chunks_new_long)

    # now use rolling window mean and then extract the original good obs
    snow = (snow2.rolling(time=15, min_periods=1, center=False).mean(skipna=True)).loc[dict(time=time_values)]

    # we can drop snow2 at this point I think
    snow2=None
    
    # make to 0, 1, nan
    snow = xr.where(snow.isnull(), np.nan, xr.where(snow>=0.5, 1, 0))
    
    # apply a 2d-convolution filter to smooth out the snow distributions
    snow_x = snow.rolling({'x':3}, min_periods=1, center=True).sum(skipna=True)
    snow_x = snow_x.rolling({'y':3}, min_periods=1, center=True).sum(skipna=True)
    norm_x = (snow.notnull()).rolling({'x':3}, min_periods=1, center=True).sum(skipna=True)
    norm_x = norm_x.rolling({'y':3}, min_periods=1, center=True).sum(skipna=True)

    snow = snow_x/norm_x # this show what fraction of the 3x3 box around each pixel is snow
    snow = xr.where(snow.isnull(), 0, xr.where(snow>=0.5, 2, 1))#.astype('uint8') #nodata=0, ablation=1, snow=2
    snow = xr.where(glacier_mask==1, snow, 0) # make sure the off-glacier stuff goes to 0  
    
    snow_x=None
    norm_x=None
    
    # calculate % useable data on each date, % snow by date
    percent_all_by_time = (xr.where(snow>0, 1, 0).sum(dim=['x','y'])/glacier_pixels).load()
#     percent_snow_by_time = xr.where(snow>1, 1, 0).sum(dim=['x','y'], skipna=True)/xr.where(snow>=0,1,0).sum(dim=['x','y'])
    
    # filter out the dates that have less than x% usable data
    usable_thresh = 0.85
    snow = snow.where((percent_all_by_time>usable_thresh), drop=True)

    ### make nodata=0, ablation=1, snow=2
#     snow = xr.where(snow.isnull(), 0, snow+1)
    
    ### save
    path_save = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs_smoothed.nc")
    save_xr = snow.sel(time=slice(f"{y}-01-01", f"{y}-12-31")).astype('uint8').rename('class')

    # specify compression/encoding
    encoding = {"class":{"zlib": True}}#, "spatial_ref":{"zlib": False}}

    # save
    save_xr.to_netcdf(path_save, encoding=encoding)

### now open each year again, calculate elas, save final products

In [8]:
importlib.reload(snowFun)
csv_data = []
# xr_data = []
chunks = {'time':-1, 'y':50, 'x':50}
# chunks = {'time':1, 'y':-1, 'x':-1}
# chunks = {'time':-1, 'y':10, 'x':10}

for y in [2018]: #2018,2019,2020,2021,2022
    print(y)
    
    # open the data. think about how you want to chunk it
    folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2')
    path_open = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs_smoothed.nc")
#     snow = xr.open_dataset(path_open, chunks={'time':1, 'y':'auto', 'x':'auto'})['class'] # nodata=0, ablation=1, snow=2
    snow = xr.open_dataset(path_open, chunks=chunks)['class'] # nodata=0, ablation=1, snow=2
    
    # make to 0, 1, nan
    snow = snow.astype(float)-1
    snow = snow.where(snow>=0, np.nan)
    
    # open dem
    xr_dem = snowFun.get_year_DEM(single_geometry, 2013)
    
#     print(xr_dem.shape)
#     print(glacier_mask.shape)
#     xr_dem.plot()
#     continue
    
    # calculate the "ideal" ela-aar relationship
    ideal_ELAs = snowFun.idealized_ELA_AAR(xr_dem, glacier_mask)
    
    print('starting ELAs')
    
    # extract ELAs from each time step
    glacier_ELAs = snowFun.get_the_ELAs(snow, xr_dem, glacier_mask, step=20, width=1, p_snow=0.5)
    # this now has, time, aar, ela_ideal, z(list), and ela(single) as columns
    continue
    print("initial ELAs made")
#     print(glacier_ELAs)
    
    # lets add aar on to the df as well\
    ### this is calculated in previous function, so let's get it to be returned in glacier_ELAs
#     glacier_ELAs['aar'] = (xr.where(snow==1, 1, 0).sum(dim=['x','y'], skipna=True)) / ((snow.notnull()).sum(dim=['x','y']))
    
    # if ela is above the glacier then we get 9999. below and we get -1
    # we can change these to the glacier min or max if we want (buffered by 1)
    z_min = np.nanmin(xr_dem.where(xr_dem>0))
    z_max = np.nanmax(xr_dem)
    glacier_ELAs = glacier_ELAs.replace({'ela': {-1:z_min, 9999:z_max} })
    
    # lets use this aar-ela relationship to root out bad observations
    # for each aar we observed, see what the ideal ela would be
    glacier_ELAs['aar_round'] = glacier_ELAs['aar'].round(2)
    glacier_ELAs['ela_ideal'] = [ ideal_ELAs[ideal_ELAs['aar'].round(2)==i]['ela'].values[0] for i in glacier_ELAs['aar_round'] ]

    # how about we incorporate a little error and see the range of elas we could expect
    error_allowed = 0.2
    glacier_ELAs['ela_ideal_min'] = [ ideal_ELAs[ideal_ELAs['aar'].round(2)==round(min(i+error_allowed,1),2)]['ela'].values[0] for i in glacier_ELAs['aar_round'] ]
    glacier_ELAs['ela_ideal_max'] = [ ideal_ELAs[ideal_ELAs['aar'].round(2)==round(max(i-error_allowed,0),2)]['ela'].values[0] for i in glacier_ELAs['aar_round'] ]
    glacier_ELAs['quality'] = [1 if (row['ela_ideal_min'] <= row['ela'] <= row['ela_ideal_max']) else 0 for idx,row in glacier_ELAs.iterrows()]

    ###### need to think of better ways to do this. We
    ### get rolling median of quality ela obs in prior x days
    x_days = 30

    # define function to do this
    def get_rolling_median(df_obs, col_name, n_days, min_periods=1, center=False, closed='left'):
        temp_df = df_obs[['time',col_name]].set_index('time')
        medians = temp_df.rolling(f'{n_days}D', min_periods=min_periods, center=center, closed=closed).median()
        return medians

    # subset df to good obs
    glacier_ELAs_good = glacier_ELAs[glacier_ELAs['quality']==1].copy()
    test = get_rolling_median(glacier_ELAs_good, 'ela', x_days, center=False)
    glacier_ELAs_good['ela_rolling'] = test.values
    glacier_ELAs_good['ela_diff'] = glacier_ELAs_good['ela']-glacier_ELAs_good['ela_rolling']
    
    # lets say that an ela that is 400 or more meter above the rolling median is bad
    new_df = glacier_ELAs_good[glacier_ELAs_good['ela_diff']<400]
    
    print("final elas made")
    
    ### so now from this, lets extract the snow distribution at the end of each year
    df = new_df.copy()
    df['time_index'] = df['time']
    df = df.set_index('time_index')
    
    # subset to just the months of interest
    df_subset = df.loc[f'{y}-07-01':f'{y}-11-01']
    
    # if we have valid obs, then we take the highest ela
    if len(df_subset)>0:
        
        # get the row that has max ela within this time frame
        idx = df_subset['ela'].idxmax()
        ela_max = df_subset.loc[idx]
        ela_max['quality'] = 1

        # get the snow map that is on this date
        snow_map = snow.sel(time=ela_max['time'].to_pydatetime())

        # append to save
        csv_data.append(ela_max)
#         xr_data.append(snow_map)
    
    else: # if we have no valid obs, then set a null row in the csv data and carry on
        csv_data.append(pd.Series({"time":f"01/01/{y}", "ela":-9999, 'aar':-9999, 'quality':0})) # append row of empty/null values to the df
        

save = 1
if save:
    # first we save the csv with dates, elas, aars, etc...
    # then we use that info to select the snow cover product from each year and save it
    

    # set folder paths, etc...
    folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2')
    # path_df_all = os.path.join(folder_save, 'csv', f"S2_{rgiid}_2018_2022_annual_AAs.csv")
    # path_xr_all = os.path.join(folder_save, 'Annual AAs', f"S2_{rgiid}_2018_2022_annual_AAs.nc")
    # path_xr_avg = os.path.join(folder_save, 'Average AAs', f"S2_{rgiid}_2018_2022_average_AA.nc")
    # path_tif_avg = os.path.join(folder_save, 'Average AAs', f"S2_{rgiid}_2018_2022_average_AA.tif")
    path_df_all = os.path.join(folder_save, 'temp', f"S2_{rgiid}_2018_2022_annual_AAs.csv")
    path_xr_all = os.path.join(folder_save, 'temp', f"S2_{rgiid}_2018_2022_annual_AAs.nc")
    path_xr_avg = os.path.join(folder_save, 'temp', f"S2_{rgiid}_2018_2022_average_AA.nc")
    path_tif_avg = os.path.join(folder_save, 'temp', f"S2_{rgiid}_2018_2022_average_AA.tif")

    ### format and save csv
    max_elas = pd.DataFrame(csv_data)
    max_elas.to_csv(path_df_all, index=False, columns=['time','ela','aar']) # table with annual end-of-summer ela, aar, 

    # see how many of the years had a useable product
    n_usable = max_elas['quality'].sum()

    ### format and save maps, if we have valid observations
    # function to format metadata and attributes
    def format_xr_to_save(xr_da):
        xr_da.attrs["res"] = (10,10)
        xr_da.attrs["crs"] = "EPSG:3338"
        xr_da.attrs["transform"] = [10,0,0,0,-10,0]
        xr_da.attrs["_FillValue"] = 0
        xr_da.attrs["long_name"] = rgiid
        xr_da.attrs["description"] = "0: nan, 1: ablation, 2: accumulation"
        xr_da.name = "accumulation_area"

        xr_da.x.attrs["units"] = "meters"
        xr_da.y.attrs["units"] = "meters"
        xr_da.x.attrs["long_name"] = 'x'
        xr_da.y.attrs["long_name"] = 'y'

        return xr_da

    if n_usable>0:
        
        dates = max_elas['time']
        dates = [d.to_pydatetime() for d in dates if d.month!="1"] # toss out the "bad ones"
        
        all_maps = []
        
        for d in dates:
            y = d.year
            print(y)
        
            # reopen the daily snow data
            path_open = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs_smoothed.nc")
            snow = xr.open_dataset(path_open, chunks={'time':1})['class'].sel(time=d)
            all_maps.append(snow)
        
        # format to save
        all_maps = xr.concat(all_maps, dim='time')
        average_map = all_maps.median('time', skipna=True).astype('uint8')+glacier_mask
        save_xr_all = format_xr_to_save(all_maps.astype('uint8')+glacier_mask)
        save_xr_avg = format_xr_to_save(average_map)
        
        # specify compression/encoding
        encoding = {"accumulation_area":{"zlib": True}}#, "spatial_ref":{"zlib": False}}

        # save
        save_xr_all.to_netcdf(path_xr_all, encoding=encoding)
        save_xr_avg.to_netcdf(path_xr_avg, encoding=encoding)
        save_xr_avg.rio.to_raster(raster_path=path_tif_avg, encoding=encoding)

2018
starting ELAs
1
Execution time: 136.26163959503174 seconds
2
3
4
5
5.1
5.2
6
