In [1]:
import os
import rasterio as rio
import numpy as np
import shapely
import pyproj
import geopandas as gpd
import matplotlib.pyplot as plt
import rioxarray as riox
import rasterio as rio
import xarray as xr
import netCDF4
from osgeo import gdal
import pandas as pd
from datetime import datetime
import dask.array

import sys
sys.path.append('../')
import snowFun

In [2]:
# define folder and file paths
folder_AGVA = os.path.join('C:',os.sep,'Users','lzell','OneDrive - Colostate','Desktop',"AGVA")
folder_dems = os.path.join(folder_AGVA, "DEMs", "time_varying_DEMs", "10m")
folder_class = os.path.join(folder_AGVA, 'classified images', 'S2_Classified_Cloudmasked_Merged')
folder_cloud = os.path.join(folder_AGVA, 'classified images', 'S2_Cloud_Merged')
folder_meta = os.path.join(folder_AGVA, "classified images", "meta csv", "S2")
folder_mask = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Masks')
folder_shadow = os.path.join(folder_AGVA, 'classified images', "S2_Shadows")

# open rgi
path_rgi = os.path.join(folder_AGVA, 'RGI', "rgi_2km_o3regions", "rgi_2km_o3regions.shp")
rgi_gdf = gpd.read_file(path_rgi)

In [3]:
### choose if you want to do only the 45 validation glaciers
validation_only = 0

# load rgi names that have been saved to the classified folder
rgis_folder = list(set( [ i[3:17] for i in os.listdir(folder_shadow) if i!='merged.vrt' ] ))

# open list of validation glaciers
all_validation_df = pd.read_csv(os.path.join(folder_AGVA, 'Validation', 'Validation Glaciers.csv'))

# get rgi names for given o2 region
rgis_o2 = rgi_gdf[rgi_gdf['O2Region']=='4']['RGIId'].values

# select which rgis to analyze
if validation_only:
    rgis_to_analyze = list( set(rgis_folder).intersection(set(all_validation_df['RGIId'].values)) )
else:
    # rgis_to_analyze = ["RGI60-01.09162"] # just a single rgi
    rgis_to_analyze = rgis_folder # everything that is available
#     rgis_to_analyze = list( set(rgis_folder).intersection(set(rgis_o2)) ) # all the rgis in the folder than are in this o2region

# get list of glacier area for each rgi
areas = [rgi_gdf[rgi_gdf['RGIId']==i]['Area'].values for i in rgis_to_analyze]

# make df
rgis_to_analyze_df = pd.DataFrame({"RGIId":rgis_to_analyze, 'Area':areas})

# sort however you want
rgis_to_analyze_df = rgis_to_analyze_df.sort_values('Area')

# grab rgi names
rgis_to_analyze = rgis_to_analyze_df['RGIId'].values

print(len(rgis_to_analyze_df))
# print(rgis_to_analyze[:10])
# print(rgis_to_analyze_df[:10])

3031


In [4]:
skip = 0
for i in range(len(rgis_to_analyze)):
    
    # subset rgi to single outline, by choosing rgiid or rgi name
    rgiid = rgis_to_analyze[i]
    
    # options for skipping
#     if i>0: continue

    # quickly grab glacier area
    ga = rgi_gdf[rgi_gdf['RGIId']==rgiid]['Area'].values[0]
#     if ga<324: continue

    if validation_only:
        folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Validation')
    else:
        folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2')
        
    # check if this glacier has been run already, skip if so
    temp_path = os.path.join(folder_save, 'Shadows', f"S2_{rgiid}_2022_daily_shadows.nc")
    if os.path.exists(temp_path):
        continue
        
    print(f"\nStarting {i+1} of {len(rgis_to_analyze)}: {rgiid}  {ga} km2")
        
    # grab just this rgi geometry and info
    rgi_single = rgi_gdf[rgi_gdf['RGIId']==rgiid].to_crs("EPSG:3338")
    single_geometry = rgi_single.geometry

    # single_geometry = single_geometry.buffer(-100) #what if we buffer out the exterior 100 meters of the glacier
    
    # open glacier mask
    glacier_mask = xr.open_dataset(os.path.join(folder_mask, f"S2_{rgiid}_mask.nc"), chunks='auto').glacier 
    
    # open the classification data. only chunk (use dask) if the glacier is "big"
    file_name = f"S2_{rgiid}_2018-01-01_2023-01-01"
    if ga>150:
        xr_class = riox.open_rasterio(os.path.join(folder_shadow, f"{file_name}.tif"), chunks={'band':10})#.chunk({'band':-1, 'y':'auto', 'x':'auto'})
    else:
        xr_class = riox.open_rasterio(os.path.join(folder_shadow, f"{file_name}.tif"))

    # load metadata csv, convert date to datetimes
    meta_fp = os.path.join(folder_meta, f"{file_name}.csv")
    meta_df = pd.read_csv(meta_fp)
#     print(meta_df.head())
    
    # format time axis to pandas datetime, like xarray wants
    datetimes = pd.to_datetime([ f"{str(i)[:4]}-{str(i)[4:6]}-{str(i)[6:]}" for i in meta_df['date'] ])
    
    # subset to just may-november
    datetimes = datetimes[ (datetimes.month>=5) & (datetimes.month<=11) ]
    
    # get unique ones
    datetimes_unique = datetimes.unique()
    
    # subset to datetimes that are within may-november
    xr_class = xr_class.rename({"band":"time"})
    xr_class['time'] = datetimes

    # merge images on same day, if there are repeated dates
    if len(datetimes)!=len(datetimes.unique()):
        xr_class = xr_class.groupby('time').min('time')
    else:
        xr_class = xr_class#.where(xr_class<20, 0)

    # get these merged dates
    datetimes_merged = xr_class.time.values

    # at this point, xr_class is 0 off-glacier, 0 in shadow/cloud, and 1-6 in usable areas
    # save one year at a time
    for y in [2018,2019,2020,2021,2022]:
        if ga>200: print(y)

        path_save = os.path.join(folder_save, 'Shadows', f"S2_{rgiid}_{y}_daily_shadows.nc")
        
        # new method we only use months 5-11
        save_xr = xr_class.sel(time=slice(f"{y}-05-01", f"{y}-11-30")).astype('uint8').rename('class')

        # specify compression/encoding
        encoding = {"class":{"zlib": True}}#, "spatial_ref":{"zlib": False}}

        # save
        save_xr.to_netcdf(path_save, encoding=encoding)
     
    print("done")
print("All done!")

All done!
