In [1]:
import os
import rasterio as rio
import numpy as np
import shapely
import pyproj
import geopandas as gpd
import matplotlib.pyplot as plt
import rioxarray as riox
import rasterio as rio
import xarray as xr
import netCDF4
from osgeo import gdal
import pandas as pd
from datetime import datetime
import snowFun
import dask.array
# %matplotlib widget

# define folder and file paths
folder_AGVA = os.path.join('C:',os.sep,'Users','lzell','OneDrive - Colostate','Desktop',"AGVA")
folder_dems = os.path.join(folder_AGVA, "DEMs", "time_varying_DEMs", "10m")
folder_class = os.path.join(folder_AGVA, 'classified images', 'S2_Classified_Cloudmasked_Merged')
folder_cloud = os.path.join(folder_AGVA, 'classified images', 'S2_Cloud_Merged')
folder_meta = os.path.join(folder_AGVA, "classified images", "meta csv", "S2")
folder_mask = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Masks')

# open rgi
path_rgi = os.path.join(folder_AGVA, 'RGI', "01_rgi60_Alaska", "01_rgi60_Alaska.shp")
rgi_gdf = gpd.read_file(path_rgi)

In [2]:
### iterate through glaciers, running the analysis for each
# get rgi names for given o2 region
rgis_o2 = rgi_gdf[rgi_gdf['O2Region']=='4']['RGIId'].values

# load rgi names that have been saved to the classified folder
rgis_folder = list(set( [ i[3:17] for i in os.listdir(folder_class) if i!='merged.vrt' ] ))

# select which rgis to analyze
# rgis_to_analyze = ["RGI60-01.09162"] # just a single rgi
# rgis_to_analyze = rgis_folder # everything that is available
rgis_to_analyze = list( set(rgis_folder).intersection(set(rgis_o2)) ) # all the rgis in the folder than are in this o2region

# sort
rgis_to_analyze.sort()
# print(rgis_to_analyze)

In [16]:
skip = 0
for i in range(len(rgis_to_analyze)):
    
    # subset rgi to single outline, by choosing rgiid or rgi name
    rgiid = rgis_to_analyze[i]
    
    # options for skipping
#     if (i+1)<296: continue #everything done to here
#     if (i+1)>400: continue #everything done to here 
#     if (i+1) not in [405,406,407,408,420, 291]: continue
    if (i+1) not in [280, 290]: continue
#     if rgiid == "RGI60-01.09162": skip=0
#     if skip: continue

    # quickly grab glacier area
    ga = rgi_gdf[rgi_gdf['RGIId']==rgiid]['Area']
    
    # print progress
    print(f"\nStarting {i+1} of {len(rgis_to_analyze)}: {rgiid}  {ga.values[0]} km2")
#     break
    # grab just this rgi geometry and info
    rgi_single = rgi_gdf[rgi_gdf['RGIId']==rgiid].to_crs("EPSG:3338")
    single_geometry = rgi_single.geometry

    # single_geometry = single_geometry.buffer(-100) #what if we buffer out the exterior 100 meters of the glacier
    
    # open glacier mask
    glacier_mask = xr.open_dataset(os.path.join(folder_mask, f"S2_{rgiid}_mask.nc"), chunks='auto') 
    
    # open the classification data
    file_name = f"S2_{rgiid}_2018-01-01_2023-01-01"
    xr_class = riox.open_rasterio(os.path.join(folder_class, f"{file_name}.tif")).chunk({'band':-1, 'y':1000, 'x':1000})#.rio.clip(single_geometry, from_disk=True, drop=True).chunk()

    # load metadata csv, convert date to datetimes
    meta_fp = os.path.join(folder_meta, f"{file_name}.csv")
    meta_df = pd.read_csv(meta_fp)
    
    # format time axis to pandas datetime, like xarray wants
    datetimes = pd.to_datetime([f"{str(i)[:4]}-{str(i)[4:6]}-{str(i)[6:]}" for i in meta_df['date']])
    xr_class = xr_class.rename({"band":"time"})
    xr_class['time'] = datetimes
#     print(datetimes)

    # merge images on same day, if there are repeated dates
    if len(datetimes)!=len(datetimes.unique()):
        xr_class = xr_class.where(xr_class<20, 0).groupby('time').max('time')
    else:
        xr_class = xr_class.where(xr_class<20, 0)

    # get these merged dates
    datetimes_merged = xr_class.time.values

    ### create binary mask of useable and unuseable data, use it to mask the xr_class to only this area
    bad_classes = [5] #class 5 is shadow. we will call these area unusable
    good_classes = [1,2,3,4,6] #snow,firn,ice,debris,water are usable areas
    # usable = xr.where( xr_class.isin(good_classes), 1, 0)

    # count total number of pixels on the glacier surface, based on the glacier rgi area
    glacier_pixels = int(ga * (1000*1000) / (10*10))
#     glacier_pixels = glacier_mask.sum().values 

    # now we can mask out unusable areas in each time step
    xr_class = xr.where(xr_class.isin(good_classes), xr_class, 0) #.sel(time=good_times) # note we subset to only the good days

    # count useable pixels on each day
#     count_usable_by_time = np.count_nonzero(xr_class, axis=(1,2))

    # calculate percent of the glacier surface that is usable on each day
#     percent_usable_by_time = count_usable_by_time/glacier_pixels
    percent_usable_by_time = (xr.where(xr_class>0, 1, 0).sum(dim=['x','y'])/glacier_pixels)

    # now lets throw out days where there is essentially no usable data
    good_times = (percent_usable_by_time>0.05)

    # print(xr_class)
    xr_class = xr_class.sel(time=good_times)  # what if we don't calculate this here?
    # print(xr_class.shape)

    # at this point, xr_class is 0 off-glacier, 0 in shadow/cloud, and 1-6 in usable areas
    # save indidivual years at this point
    folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2')

    # save one year at a time
    for y in [2018,2019,2020,2021,2022]:
        print(y)
        path_save = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs.nc")

        save_xr = xr_class.sel(time=slice(f"{y-1}-12-01", f"{y+1}-01-31")).astype('uint8').rename('class')

        # specify compression/encoding
        encoding = {"class":{"zlib": True}}#, "spatial_ref":{"zlib": False}}

        # save
        save_xr.to_netcdf(path_save, encoding=encoding)
    
    print("Initial daily AAs made")


Starting 280 of 483: RGI60-01.10575  449.57 km2
2018
2019
2020
2021
2022
Initial daily AAs made

Starting 290 of 483: RGI60-01.10689  773.873 km2
2018
2019
2020
2021
2022
Initial daily AAs made
