In [1]:
import os
import rasterio as rio
import numpy as np
import shapely
import pyproj
import geopandas as gpd
import matplotlib.pyplot as plt
import rioxarray as riox
import rasterio as rio
import xarray as xr
import netCDF4
from osgeo import gdal
import pandas as pd
from datetime import datetime
import dask.array
import importlib

import sys
sys.path.append('../')
import snowFun

In [2]:
# define folder and file paths
folder_AGVA = os.path.join('C:',os.sep,'Users','lzell','OneDrive - Colostate','Desktop',"AGVA")
folder_dems = os.path.join(folder_AGVA, "DEMs", "time_varying_DEMs", "10m")
folder_class = os.path.join(folder_AGVA, 'classified images', 'S2_Classified_Cloudmasked_Merged')
folder_cloud = os.path.join(folder_AGVA, 'classified images', 'S2_Cloud_Merged')
folder_meta = os.path.join(folder_AGVA, "classified images", "meta csv", "S2")
folder_mask = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Masks')
folder_slope = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Slopes')

# open rgi
path_rgi = os.path.join(folder_AGVA, 'RGI', "rgi_2km_o3regions", "rgi_2km_o3regions.shp")
rgi_gdf = gpd.read_file(path_rgi)

In [3]:
### choose if you want to do only the 45 validation glaciers
validation_only = 1

# load rgi names that have been saved to the classified folder
rgis_folder = list(set( [ i[3:17] for i in os.listdir(folder_class) if i!='merged.vrt' ] ))

# open list of validation glaciers
all_validation_df = pd.read_csv(os.path.join(folder_AGVA, 'Validation', 'Validation Glaciers.csv'))

# get rgi names for given o2 region
rgis_o2 = rgi_gdf[rgi_gdf['O2Region']=='4']['RGIId'].values

# select which rgis to analyze
if validation_only:
    rgis_to_analyze = list( set(rgis_folder).intersection(set(all_validation_df['RGIId'].values)) )
else:
    # rgis_to_analyze = ["RGI60-01.09162"] # just a single rgi
    rgis_to_analyze = rgis_folder # everything that is available
#     rgis_to_analyze = list( set(rgis_folder).intersection(set(rgis_o2)) ) # all the rgis in the folder than are in this o2region

# get list of glacier area for each rgi
areas = [rgi_gdf[rgi_gdf['RGIId']==i]['Area'].values for i in rgis_to_analyze]

# make df
rgis_to_analyze_df = pd.DataFrame({"RGIId":rgis_to_analyze, 'Area':areas})

# sort however you want
rgis_to_analyze_df = rgis_to_analyze_df.sort_values('Area')

# grab rgi names
rgis_to_analyze = rgis_to_analyze_df['RGIId'].values


print(len(rgis_to_analyze_df))
# print(rgis_to_analyze[:10])
# print(rgis_to_analyze_df[:10])

45


In [4]:
skip = 0
importlib.reload(snowFun)
for i in range(len(rgis_to_analyze)):
#     if i>0: continue
    # subset rgi to single outline, by choosing rgiid or rgi name
    rgiid = rgis_to_analyze[i]
    
    # quickly grab glacier area
    ga = rgi_gdf[rgi_gdf['RGIId']==rgiid]['Area'].values[0]
    
    if ga<300: continue
    
    # set folder
    if validation_only:
        folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Validation')
    else:
        folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2')
       
    # check if this glacier has been run already, skip if so
#     temp_path = os.path.join(folder_save, 'Band SCFs', f"S2_{rgiid}_observed.csv")
#     if os.path.exists(temp_path):
#         continue
    
    # print progress
    print(f"\nStarting {i+1} of {len(rgis_to_analyze)}: {rgiid}  {ga} km2") 
    
    # grab just this rgi geometry and info
    rgi_single = rgi_gdf[rgi_gdf['RGIId']==rgiid].to_crs("EPSG:3338")
    single_geometry = rgi_single.geometry
    # single_geometry = single_geometry.buffer(-100) #what if we buffer out the exterior 100 meters of the glacier
    
    # define the coarsen scale
    if ga>1000:
        scale=5
    elif ga>500:
        scale=3
        
    # open glacier mask
    glacier_mask = xr.open_dataset(os.path.join(folder_mask, f"S2_{rgiid}_mask.nc"), chunks='auto').glacier
    
    # open glacier slope
    glacier_slope = xr.open_dataset(os.path.join(folder_slope, f"S2_{rgiid}_slope.nc"), chunks='auto').slope
    
    # define max slope that is okay
    max_slope = 25    
    
    if ga>500:
        glacier_mask = glacier_mask.coarsen({"x":scale, "y":scale}, boundary="trim").median(skipna=True).astype('uint8')  
    
    ### lastly calculate elas for each year, pick the best, save all our products
    csv_data = []

    # we'll store everything in a single csv (all 5 years)
    snow_dfs = []
    obs_dfs = []
    meta_dfs = []
    
    # open the list of the % observed on each date
    obs_df_path = os.path.join(folder_save, 'Daily AAs', 'observed', f"S2_{rgiid}_observed.csv")
    obs_df = pd.read_csv(obs_df_path)
    
    # get the dates that have >30% initial observed. we'll drop all others and not send them to have scfs calculated
    usable_dates = obs_df[obs_df['observed_initial']>0.3]['Date']#.values
    
    # function that will re-order the columns in dfs before saving
    def sort_cols(df):
        first_cols = ["total_pixels_2018", "total_pixels_2019", "total_pixels_2020", "total_pixels_2021", "total_pixels_2022"]
        other_cols = [ c for c in df.columns.tolist() if c not in first_cols]
        new_order = first_cols + other_cols
        return df[new_order]
    
    for y in [2018,2019,2020,2021,2022]: #2018,2019,2020,2021,2022
#         print(y)
        
        print(f'starting SCAs {y}')
        
        if ga>500:
            path_open = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs_shadowed_coarse_smoothed.nc")
        else:
            path_open = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs_shadowed_smoothed.nc")
        
        # if this path doesn't exist, add it to list of known problem glaciers/years
        if not os.path.exists(path_open):
            print('uhoh')
            problem_file = os.path.join(folder_AGVA, 'Derived products', 'S2', 'problems.txt')
            to_write = f"{rgiid}_{y}\n"
            
            if to_write not in open(problem_file).read():
                with open(problem_file, 'a') as file:
                    file.write(to_write)               
                
            continue
        
        if ga>500:
            snow = xr.open_dataset(path_open, chunks={'time':1, 'y':-1, 'x':-1})['class']
        elif ga>150:
            snow = xr.open_dataset(path_open, chunks={'time':10, 'y':-1, 'x':-1})['class'] # nodata=0, ablation=1, snow=2
        else:
            snow = xr.open_dataset(path_open)['class']
    
        # make snow to nan, 0, 1 (instead of 0,1,2) 
        snow = xr.where(snow==0, np.nan, snow-1)

        # open dem
        xr_dem = snowFun.get_year_DEM(single_geometry, y, smoothed=0) 
        
        # if problems with dem, skip
        if len(xr_dem.x)==0 or len(xr_dem.y)==0:
            problem_file = os.path.join(folder_AGVA, 'Derived products', 'S2', 'problems.txt')
            to_write = f"{rgiid} dem\n"
            
            if to_write not in open(problem_file).read():
                with open(problem_file, 'a') as file:
                    file.write(to_write)               
                
            continue
        
        # apply max_slope to dem. anything greater than max_slope goes to 0
        xr_dem = xr_dem.where(glacier_slope<=max_slope, 0)

        # coarsen dem
        if ga>500:
            xr_dem = xr_dem.sel({"x":snow.x, "y":snow.y})

        # shave off edges to make sure dem, mask match
        glacier_mask = glacier_mask.reindex
        xr_dem = xr_dem.reindex_like(glacier_mask, method='nearest')
#         xr_dem = xr_dem.sel(x=slice( min(glacier_mask.x.values), max(glacier_mask.x.values) ), y=slice(max(glacier_mask.y.values),min(glacier_mask.y.values)))
        glacier_mask = glacier_mask.sel(x=slice( min(xr_dem.x.values), max(xr_dem.x.values) ), y=slice(max(xr_dem.y.values),min(xr_dem.y.values)))

        # resample (coarsen) dem and mask if very large glacier
#         if ga>500:
#             xr_dem = xr_dem.coarsen({"x":5, "y":5}, boundary="trim").mean(skipna=True)
#             glacier_mask = glacier_mask.coarsen({"x":5, "y":5}, boundary="trim").median(skipna=True).astype("uint8")
            
        # shave off edges to make sure dem, mask, snow match
        snow = snow.sel(x=slice( min(xr_dem.x.values), max(xr_dem.x.values) ), y=slice(max(xr_dem.y.values),min(xr_dem.y.values)))
        
#         print(snow.shape)
#         print(xr_dem.shape)
#         print(glacier_mask.shape)

        # subset to only the usable dates this year
        usable_dates_y = [ i for i in usable_dates.values if i[:4]==str(y)]
        snow = snow.sel(time=usable_dates_y)
        
        # extract number of snow pixels in bands from each time step
        scas = snowFun.extract_band_SCA(snow, xr_dem, glacier_mask, step=10)
        scas = scas.rename(columns={'total_pixels':f"total_pixels_{y}"})
        snow_dfs.append(scas)
        
        # extract number of pixels observed in bands from each time step
        scas = snowFun.extract_band_SCA( xr.where(snow>=0, 1, 0) , xr_dem, glacier_mask, step=10)
        scas = scas.rename(columns={'total_pixels':f"total_pixels_{y}"})
        obs_dfs.append(scas)

    if len(snow_dfs)>0:
        # concat all the years together, save to csv for SNOW
        result_df = pd.concat([df.set_index(['z_min','z_max']) for df in snow_dfs], axis=1, join='outer', sort=True).fillna(0)
        result_df = sort_cols(result_df)

        # save to csv
        path_save = os.path.join(folder_save, 'Band SCFs', f"S2_{rgiid}_snow.csv")
        result_df.to_csv(path_save, index=True)

        # concat all the years together, save to csv for OBSERVED
        result_df = pd.concat([df.set_index(['z_min','z_max']) for df in obs_dfs], axis=1, join='outer', sort=True).fillna(0)
        result_df = sort_cols(result_df)

        # save to csv
        path_save = os.path.join(folder_save, 'Band SCFs', f"S2_{rgiid}_observed.csv")
        result_df.to_csv(path_save, index=True)
    
    else:
        print(f"No data for {rgiid}")
        
print("Done!")
        


Starting 44 of 45: RGI60-01.16558  343.098 km2
starting SCAs 2018
starting SCAs 2019
starting SCAs 2020
starting SCAs 2021
starting SCAs 2022

Starting 45 of 45: RGI60-01.01390  521.396 km2
starting SCAs 2018
starting SCAs 2019
starting SCAs 2020
starting SCAs 2021
starting SCAs 2022
Done!
