In [1]:
import os
import rasterio as rio
import numpy as np
import shapely
import pyproj
import geopandas as gpd
import matplotlib.pyplot as plt
import rioxarray as riox
import rasterio as rio
import xarray as xr
import netCDF4
from osgeo import gdal
import pandas as pd
from datetime import timedelta
from datetime import datetime
import dask.array

import sys
sys.path.append('../')
import snowFun

In [2]:
# define folder and file paths
folder_AGVA = os.path.join('C:',os.sep,'Users','lzell','OneDrive - Colostate','Desktop',"AGVA")
folder_dems = os.path.join(folder_AGVA, "DEMs", "time_varying_DEMs", "10m")
folder_class = os.path.join(folder_AGVA, 'classified images', 'S2_Classified_Cloudmasked_Merged')
folder_cloud = os.path.join(folder_AGVA, 'classified images', 'S2_Cloud_Merged')
folder_meta = os.path.join(folder_AGVA, "classified images", "meta csv", "S2")
folder_mask = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Masks')

# open rgi
path_rgi = os.path.join(folder_AGVA, 'RGI', "rgi_2km_o3regions", "rgi_2km_o3regions.shp")
rgi_gdf = gpd.read_file(path_rgi)

In [3]:
### choose if you want to do only the 45 validation glaciers
validation_only = 1

# load rgi names that have been saved to the classified folder
rgis_folder = list(set( [ i[3:17] for i in os.listdir(folder_class) if i!='merged.vrt' ] ))

# open list of validation glaciers
all_validation_df = pd.read_csv(os.path.join(folder_AGVA, 'Validation', 'Validation Glaciers.csv'))

# get rgi names for given o2 region
rgis_o2 = rgi_gdf[rgi_gdf['O2Region']=='4']['RGIId'].values

# select which rgis to analyze
if validation_only:
    rgis_to_analyze = list( set(rgis_folder).intersection(set(all_validation_df['RGIId'].values)) )
else:
    # rgis_to_analyze = ["RGI60-01.09162"] # just a single rgi
    rgis_to_analyze = rgis_folder # everything that is available
#     rgis_to_analyze = list( set(rgis_folder).intersection(set(rgis_o2)) ) # all the rgis in the folder than are in this o2region

# get list of glacier area for each rgi
areas = [rgi_gdf[rgi_gdf['RGIId']==i]['Area'].values for i in rgis_to_analyze]

# make df
rgis_to_analyze_df = pd.DataFrame({"RGIId":rgis_to_analyze, 'Area':areas})

# sort however you want
rgis_to_analyze_df = rgis_to_analyze_df.sort_values('Area')

# grab rgi names
rgis_to_analyze = rgis_to_analyze_df['RGIId'].values


print(len(rgis_to_analyze_df))
# print(rgis_to_analyze[:10])
# print(rgis_to_analyze_df[:10])

45


In [8]:
skip = 0
for i in range(len(rgis_to_analyze)):
#     if i>0: continue
    
    # subset rgi to single outline, by choosing rgiid or rgi name
    rgiid = rgis_to_analyze[i]
#     if rgiid!="RGI60-01.21721": continue
    # quickly grab glacier area
    ga = rgi_gdf[rgi_gdf['RGIId']==rgiid]['Area'].values[0]
    
    if ga>300: continue
    
    # set folder
    if validation_only:
        folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Validation')
    else:
        folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2')
       
    # check if this glacier has been run already, skip if so
    if ga>500:
        temp_path = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_2022_daily_AAs_shadowed_coarse_smoothed.nc")
    else:
        temp_path = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_2022_daily_AAs_shadowed_smoothed.nc")
#     if os.path.exists(temp_path):
#         continue
    
    # print progress
    print(f"\nStarting {i+1} of {len(rgis_to_analyze)}: {rgiid}  {ga} km2")
    
    # grab just this rgi geometry and info
    rgi_single = rgi_gdf[rgi_gdf['RGIId']==rgiid].to_crs("EPSG:3338")
    single_geometry = rgi_single.geometry

    # single_geometry = single_geometry.buffer(-100) #what if we buffer out the exterior 100 meters of the glacier
 
    # define the coarsen scale
    if ga>1000:
        scale=5
    elif ga>500:
        scale=3
        
    # open glacier mask
    glacier_mask = xr.open_dataset(os.path.join(folder_mask, f"S2_{rgiid}_mask.nc"), chunks='auto').glacier
    
    if ga>500:
        glacier_mask = glacier_mask.coarsen({"x":scale, "y":scale}, boundary="trim").median(skipna=True).astype('uint8')
    
    glacier_pixels = glacier_mask.sum().values

    # open the list of the % observed on each date, add a column which will hold post-smoothing %
    obs_df_path = os.path.join(folder_save, 'Daily AAs', 'observed', f"S2_{rgiid}_observed.csv")
    obs_df = pd.read_csv(obs_df_path)
    obs_df['observed_after_smoothing'] = np.zeros(len(obs_df))
    
    # get the dates that have >5% observed
    usable_dates = obs_df[obs_df['observed_initial']>0.05]['Date']#.values
    
    # doing rolling smoothing one year at a time
    for y in [2018,2019,2020,2021,2022]:
#         if y!=2018: continue
        if ga>50: print(y)
        
        # open the file. chunking and getting coarsened products depending on size
        if ga>500:
            path_open = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs_shadowed_coarse.nc")
            snow = xr.open_dataset(path_open, chunks={'time':1})
        
        elif ga>150:
            path_open = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs_shadowed.nc")
            snow = xr.open_dataset(path_open, chunks={'time':10})
        
        else: 
            path_open = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs_shadowed.nc")
            snow = xr.open_dataset(path_open) # if small glacier, we dont need to chunk
        
        # copy of snow oring
        copy_s = snow.copy()
        
        # subset usable dates to this year
        usable_dates_y = [ i for i in usable_dates.values if i[:4]==str(y)]

        # if 'coarse' comes in as 0(nodata), 1(ablation), 2(snow)
        # if not coarse, comes in as 0(nodata), 1(snow), 2-5(ablation)
        # we want to make 1=snow, 0=ablation, nan=cloud,shadow,off-glacier
        if ga>500:
            snow = xr.where(snow==0, np.nan, xr.where(snow==1, 0, 1))
        else:
            snow = snow.where(snow!=0, np.nan).where(snow<=1, 0)

        # subset to the "good" dates (remove those with no usable observations)
        snow = snow.sel(time=usable_dates_y)
        copy_s = copy_s.sel(time=usable_dates_y)
        
        # get list of the date of every observation
        time_values = pd.to_datetime(snow.time.values)
#         print(time_values.values)
        
        # now iterate through each date. get all obs from up to 15 days before, average through time
        all_smoothed = []
        for i in range(len(time_values)):
#             if i>0: continue
                
            # grab this date, define the range of dates that will be used for infilling
            date = time_values[i]
            diffs = time_values-date
            days_plus = 3
            days_minus = 7
        
            # subset to the wanted dates (plus or minus 5 days)
            good_dates = time_values[(diffs>=f"-{days_minus}d") & (diffs<=f"{days_plus}d")].values #time_values[(diffs>="-15d") & (diffs<="0d")]
#             print(date, good_dates)
            
            # select obs from these dates, take average
            if len(good_dates)==1: # we have to treat it differently if there is only 1 "good date"
                smoothed = snow.sel(time = slice(good_dates[0], good_dates[-1]))['class']
#                 smoothed = snow.sel(time=good_dates)
            else:
                smoothed = snow.sel(time = slice(good_dates[0], good_dates[-1])).mean(dim='time', skipna=True).expand_dims(time=[date])['class']
#                 smoothed = snow.sel(time=good_dates).mean(dim='time', skipna=True)['class']
            
            # fix to 0(nodata), 1(ablation), 2(snow)
            smoothed = xr.where(smoothed.isnull(), 0, xr.where(smoothed>=0.5, 2, 1)).astype('uint8')#.expand_dims(time=[date])
#             print(smoothed)
            
            # count what fraction of glacier is observed now, add to df
            observed_today = (xr.where(smoothed>0, 1, 0).sum(dim=['x','y'])/glacier_pixels).values
#             print(observed_today)
#             print(observed_today.round(4))
            
#             print(obs_df.loc[obs_df['Date'] == str(date)[:10]])
            obs_df.loc[obs_df['Date'] == str(date)[:10], 'observed_after_smoothing'] = observed_today.round(4)
#             print(obs_df.loc[obs_df['Date'] == str(date)[:10]])
            
            # save this to list
            all_smoothed.append(smoothed)
       
        # now at the end concat them all together, sort by date
        if len(all_smoothed)>0:
            snow2 = xr.concat(all_smoothed, dim='time').sortby('time')

            ### save
            if ga>500:
                path_save = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs_shadowed_coarse_smoothed.nc")
            else:
                path_save = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs_shadowed_smoothed.nc")
            
            save_xr = snow2.sel(time=slice(f"{y}-01-01", f"{y}-12-31")).astype('uint8').rename('class')

            # specify compression/encoding
            encoding = {"class":{"zlib": True}}#, "spatial_ref":{"zlib": False}}

            # save
#             save_xr.to_netcdf(path_save, encoding=encoding)
            
#             del snow, all_smoothed, smoothed, snow2
        
        # save csv holding the % observed
        obs_df.to_csv(obs_df_path, index=False)
    
    print("Smoothed daily AAs made")
    
print("Done!")
#     if ga>500:
#         for y in (2018,2019,2020,2021,2022):
#             os.remove(os.path.join(folder_save, f'temp{y}.nc'))

    


Starting 1 of 45: RGI60-01.10910  2.084 km2
Smoothed daily AAs made

Starting 2 of 45: RGI60-01.00787  2.126 km2
Smoothed daily AAs made

Starting 3 of 45: RGI60-01.23606  2.344 km2
Smoothed daily AAs made

Starting 4 of 45: RGI60-01.15253  2.551 km2
Smoothed daily AAs made

Starting 5 of 45: RGI60-01.03379  2.578 km2
Smoothed daily AAs made

Starting 6 of 45: RGI60-01.16719  2.681 km2
Smoothed daily AAs made

Starting 7 of 45: RGI60-01.17321  2.88 km2
Smoothed daily AAs made

Starting 8 of 45: RGI60-01.13462  3.206 km2
Smoothed daily AAs made

Starting 9 of 45: RGI60-01.13483  3.216 km2
Smoothed daily AAs made

Starting 10 of 45: RGI60-01.02584  3.441 km2
Smoothed daily AAs made

Starting 11 of 45: RGI60-01.03215  3.998 km2
Smoothed daily AAs made

Starting 12 of 45: RGI60-01.01666  4.243 km2
Smoothed daily AAs made

Starting 13 of 45: RGI60-01.12548  4.314 km2
Smoothed daily AAs made

Starting 14 of 45: RGI60-01.13930  4.404 km2
Smoothed daily AAs made

Starting 15 of 45: RGI60-01.0

In [None]:
# usable = xr.where(snow2==0,0,1).sum(dim='time')#.to_array()

# v1 = xr.where(snow2==2,1,0).sum(dim='time')/usable#.to_array()
# v2 = xr.where(snow2==1,1,0).sum(dim='time')/usable#.to_array()

# v = ((v1+1)/(v2+1))/glacier_mask
# v3 = xr.where(v>1,1,0)/glacier_mask

# fig,axs = plt.subplots(1,4, figsize=(15,5))
# v1.plot(ax=axs[0])
# v2.plot(ax=axs[1])
# usable.plot(ax=axs[2])
# v.plot(ax=axs[3])

# for ax in axs:
#     ax.axis('equal')