In [None]:
import os
import rasterio as rio
import numpy as np
import shapely
import pyproj
import geopandas as gpd
import matplotlib.pyplot as plt
import rioxarray as riox
import rasterio as rio
import xarray as xr
import netCDF4
from osgeo import gdal
import pandas as pd
from datetime import datetime
import snowFun
import dask.array
# %matplotlib widget

# define folder and file paths
folder_AGVA = os.path.join('C:',os.sep,'Users','lzell','OneDrive - Colostate','Desktop',"AGVA")
folder_dems = os.path.join(folder_AGVA, "DEMs", "time_varying_DEMs", "10m")
folder_class = os.path.join(folder_AGVA, 'classified images', 'S2_Classified_Cloudmasked_Merged')
folder_cloud = os.path.join(folder_AGVA, 'classified images', 'S2_Cloud_Merged')
folder_meta = os.path.join(folder_AGVA, "classified images", "meta csv", "S2")
folder_mask = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Masks')

# open rgi
path_rgi = os.path.join(folder_AGVA, 'RGI', "01_rgi60_Alaska", "01_rgi60_Alaska.shp")
rgi_gdf = gpd.read_file(path_rgi)

In [None]:
### iterate through glaciers, running the analysis for each
# get rgi names for given o2 region
rgis_o2 = rgi_gdf[rgi_gdf['O2Region']=='4']['RGIId'].values

# load rgi names that have been saved to the classified folder
rgis_folder = list(set( [ i[3:17] for i in os.listdir(folder_class) if i!='merged.vrt' ] ))

# select which rgis to analyze
# rgis_to_analyze = ["RGI60-01.09162"] # just a single rgi
# rgis_to_analyze = rgis_folder # everything that is available
rgis_to_analyze = list( set(rgis_folder).intersection(set(rgis_o2)) ) # all the rgis in the folder than are in this o2region

# sort
rgis_to_analyze.sort()
# print(rgis_to_analyze)

In [None]:
skip = 0
for i in range(len(rgis_to_analyze)):
    
    # subset rgi to single outline, by choosing rgiid or rgi name
    rgiid = rgis_to_analyze[i]
    
    # agregatted needs: 277, 280, 290
#     if (i+1) not in [405,406,407,408,420, 241, 277, 280, 290, 291]: continue
    if (i+1) not in [241, 277, 280, 290]: continue
    # options for skipping
#     if (i+1)<296: continue ### just need 281-400, as well as 405-408,420, (290 just for this)
#     if (i+1)>325: continue
     #everything done to here NEED TO GO BACK TO 405, 406, 407, 408, 420
        
#     if (i+1)<359: continue ### just need 281-400, as well as 405-408,420
#     if (i+1)>400: continue #option to skip, NEED TO GO BACK TO 241, 277, 280
#     
#     if (i+1)!=358: continue
    
#     if rgiid == "RGI60-01.09162": skip=0
#     if skip: continue

    # quickly grab glacier area
    ga = rgi_gdf[rgi_gdf['RGIId']==rgiid]['Area']
    # print progress
    print(f"\nStarting {i+1} of {len(rgis_to_analyze)}: {rgiid}  {ga.values[0]} km2")
    
    # grab just this rgi geometry and info
    rgi_single = rgi_gdf[rgi_gdf['RGIId']==rgiid].to_crs("EPSG:3338")
    single_geometry = rgi_single.geometry

    # single_geometry = single_geometry.buffer(-100) #what if we buffer out the exterior 100 meters of the glacier
 
    # open glacier mask
    glacier_mask = xr.open_dataset(os.path.join(folder_mask, f"S2_{rgiid}_mask.nc"), chunks='auto') 
    
    # count total number of pixels on the glacier surface, based on the glacier rgi area or on glacier mask
#     glacier_pixels = glacier_mask.sum().values
    glacier_pixels = int(ga * (1000*1000) / (10*10))
    
    # doing rolling smoothing one year at a time
    for y in [2018,2019,2020,2021,2022]:
        print(y)

        # open the data with each day being its own chunk
        folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2')
        path_open = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs.nc")
        snow = xr.open_dataset(path_open, chunks={'time': 1, 'y': -1, 'x': -1})
    
        # make 1=snow, 0=ablation, nan=cloud,shadow,off-glacier
        snow = snow.where(snow!=0, np.nan).where(snow<=1, 0)
        
        # get list of the date of every observation
        time_values = pd.to_datetime(snow.time.values)
        
        # now iterate through each date. get all obs from up to 15 days before, average through time
        all_smoothed = []
        for i in range(len(time_values)):
#             print(i)
            # grab this date, calculate difference
            date = time_values[i]
            diffs = time_values-date
            
            # find which dates are in the preceding 15 dates
            good_dates = time_values[(diffs>="-15d") & (diffs<="0d")]
#             print(date, good_dates)
            
            # select obs from these dates, take average
            smoothed = snow.sel(time=good_dates).mean(dim='time', skipna=True)['class']
#             print(smoothed.head)
            
            # fix to 0(nodata), 1(ablation), 2(snow)
            smoothed = xr.where(smoothed.isnull(), 0, xr.where(smoothed>=0.5, 2, 1)).astype('uint8').expand_dims(time=[date])
#             print(smoothed.head)
            
            # save this to list
            all_smoothed.append(smoothed)
#             
        # now at the end concat them all together, sort by date
        snow2 = xr.concat(all_smoothed, dim='time').sortby('time')
#         print(snow2.head)
        
#         fig,axs=plt.subplots(1,2, figsize=(14,5))
#         snow['class'].mean(dim='time').plot(ax=axs[0])
#         snow2.mean(dim='time').plot(ax=axs[1])
#         continue

        ### save
        path_save = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs_smoothed.nc")
        save_xr = snow2.sel(time=slice(f"{y}-01-01", f"{y}-12-31")).astype('uint8').rename('class')

        # specify compression/encoding
        encoding = {"class":{"zlib": True}}#, "spatial_ref":{"zlib": False}}

        # save
        save_xr.to_netcdf(path_save, encoding=encoding)
    
    print("Smoothed daily AAs made")
#         print(snow.head)
#         print(snow2.head)
        
#         fig,axs=plt.subplots(1,2, figsize=(14,5))
#         snow['class'].mean(dim='time').plot(ax=axs[0])
#         snow2['class'].mean(dim='time').plot(ax=axs[1])
#         continue
    