In [1]:
import os
import rasterio as rio
import numpy as np
import shapely
import pyproj
import geopandas as gpd
import matplotlib.pyplot as plt
import rioxarray as riox
import rasterio as rio
import xarray as xr
import netCDF4
from osgeo import gdal
import pandas as pd
from datetime import datetime
import dask.array
import importlib

import sys
sys.path.append('../')
import snowFun

In [2]:
# define folder and file paths
folder_AGVA = os.path.join('C:',os.sep,'Users','lzell','OneDrive - Colostate','Desktop',"AGVA")
folder_dems = os.path.join(folder_AGVA, "DEMs", "time_varying_DEMs", "10m")
folder_class = os.path.join(folder_AGVA, 'classified images', 'S2_Classified_Cloudmasked_Merged')
folder_cloud = os.path.join(folder_AGVA, 'classified images', 'S2_Cloud_Merged')
folder_meta = os.path.join(folder_AGVA, "classified images", "meta csv", "S2")
folder_mask = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Masks')
folder_slope = os.path.join(folder_AGVA, 'Derived products', 'S2', 'Slopes')
folder_save = os.path.join(folder_AGVA, 'Derived products', 'S2')
folder_debris = os.path.join(folder_AGVA, 'debris cover', 'raster')

# open rgi
path_rgi = os.path.join(folder_AGVA, 'RGI', "rgi_2km_o3regions", "rgi_2km_o3regions.shp")
rgi_gdf = gpd.read_file(path_rgi)

### open the validated big glacier dates
path_open = os.path.join(folder_save, 'Big Glacier Check', "first_look.csv")
df_dates = pd.read_csv(path_open)

In [3]:
df_dates

Unnamed: 0,RGIId,Area,2018,2019,2020,2021,2022
0,RGI60-01.17423,[512.357],1,2019-07-18,2020-08-11,1,1
1,RGI60-01.13538,[514.803],2018-09-05,2019-08-09,2020-08-28,1,1
2,RGI60-01.01390,[521.396],2018-09-29,1,2020-09-15,1,1
3,RGI60-01.03377,[523.786],1,1,1,2021-08-23,1
4,RGI60-01.27108,[534.228],2018-09-26,1,2020-09-05,1,1
5,RGI60-01.20796,[549.286],2018-09-12,2019-09-04,2020-09-16,2021-08-22,2022-09-28
6,RGI60-01.16545,[582.83],2018-09-20,1,1,1,2022-08-12
7,RGI60-01.16121,[592.219],1,1,2020-09-11,2021-09-04,1
8,RGI60-01.04375,[612.706],1,2019-08-09,1,1,2022-09-12
9,RGI60-01.26738,[718.416],1,1,1,1,1


In [4]:
# iterate though each of the large glaciers
for idx,row in df_dates.iterrows():
#     if idx>0: break
    row_save=row
     
    # grab rgiid
    rgiid = row['RGIId']
    print(idx, len(df_dates), rgiid)

    # open the originally IDed annual best data, add year column
    path_open_original = os.path.join(folder_save, 'Annual AAs', 'csv', f"S2_{rgiid}_2018_2022_annual_AAs.csv")
    ela_df_orig = pd.read_csv(path_open_original)
    ela_df_orig['year'] = [ int(d[:4]) for d in ela_df_orig['date'] ]
    
    # open the csv holding entire ela timeseries
    path_open = os.path.join(folder_save, "ELAs", f"{rgiid}_ELAs.csv")
    full_ela_df = pd.read_csv(path_open)
    
    # open glacier mask
    glacier_mask = xr.open_dataset(os.path.join(folder_mask, f"S2_{rgiid}_mask.nc"), chunks='auto').glacier
    
    # open debris products
    path_debris = os.path.join(folder_debris, f"{rgiid}_debris.tif")
    debris = riox.open_rasterio(path_debris)
    
    # open glacier slope
    glacier_slope = xr.open_dataset(os.path.join(folder_slope, f"S2_{rgiid}_slope.nc"), chunks='auto').slope
    max_slope = 25
    
    # create binary mask for what is usable slopes
    slope_mask = xr.where(glacier_slope<=max_slope, 1, 0)
    
    # make sure dimensions match up
    slope_mask = slope_mask.reindex_like(glacier_mask, method='nearest')
    
    # function to format metadata and attributes
    def format_xr_to_save(xr_da):
        xr_da.attrs["res"] = (10,10)
        xr_da.attrs["crs"] = "EPSG:3338"
        xr_da.attrs["transform"] = [10,0,0,0,-10,0]
        xr_da.attrs["_FillValue"] = 0
        xr_da.attrs["long_name"] = rgiid
        xr_da.attrs["description"] = "0: off glacier, no data. 1: ablation. 2: accumulation. 3: on glacier, no data"
        xr_da.name = "accumulation_area"

        xr_da.x.attrs["units"] = "meters"
        xr_da.y.attrs["units"] = "meters"
        xr_da.x.attrs["long_name"] = 'x'
        xr_da.y.attrs["long_name"] = 'y'

        return xr_da
    
    # lists that will hold updated csv data
    all_dates = []
    all_elas = []
    all_observed_after_smoothings=[]
    all_aars=[]
    all_off_glaciers=[]
    
    ### then go through each year
    all_maps = [] # list to save all obs
    for y in [2018,2019,2020,2021,2022]:
#         if y!=2018: continue

        print(y)
            
        # get the date of the best snow cover observation
        best_date = row[str(y)]
        if best_date=='1':
            best_date = ela_df_orig[ela_df_orig['year']==y]['date'].values[0]
        
        # grab the ela, etc... obs from this date
        full_data_best_date = full_ela_df[full_ela_df['date']==best_date]
        
        # we are going to open the original 10 m resolution snow cover data and reprocess/filter/smooth just this date
        path_open = os.path.join(folder_save, 'Daily AAs', f"S2_{rgiid}_{y}_daily_AAs_shadowed.nc")
        snow = xr.open_dataset(path_open, chunks={'time':1})
        
        # we want to make 1=snow, 0=ablation, nan=cloud,shadow,off-glacier
        snow = snow.where(snow!=0, np.nan).where(snow<=1, 0)
        
        # get list of the date of every observation
        time_values = pd.to_datetime(snow.time.values)
        
        # calculate how many days separate each of time_values and best_date
        diffs = time_values-pd.to_datetime(best_date)
        
        # select just the days within the correct range
        days_plus = 3
        days_minus = 7
        good_dates = time_values[(diffs>=f"-{days_minus}d") & (diffs<=f"{days_plus}d")].values
        
        # select obs from these dates, take average
        if len(good_dates)==1: # we have to treat it differently if there is only 1 "good date"
            snow_out = snow.sel(time = slice(good_dates[0], good_dates[-1]))['class']

        else:
            snow_out = snow.sel(time = slice(good_dates[0], good_dates[-1])).mean(dim='time', skipna=True).expand_dims(time=[pd.to_datetime(best_date)])['class']
            
        # fix to 0(nodata), 1(ablation), 2(snow)
        snow_out = xr.where(snow_out.isnull(), 0, xr.where(snow_out>=0.5, 2, 1)).astype('uint8')#.expand_dims(time=[date])
        
        # add debris cover where we have no data
        snow_out = xr.where( (debris==1) & (snow_out==0), 1, snow_out)

        # add to list to save
        all_maps.append(snow_out.copy())
        
        # last formatting
        snow_out = format_xr_to_save(snow_out).squeeze()
        
        # define path to save, save
        out_path = os.path.join(folder_save, 'Big Glacier Check', 'reprocessed', f"S2_{rgiid}_{y}_AA_reprocessed.tif")
        snow_out.rio.to_raster(raster_path=out_path, compress='DEFLATE')
        
        ### update csv info with the new date
        obs_new = full_data_best_date['observed_after_smoothing'].values[0]
        aar_new = full_data_best_date['aar'].values[0]
        ela_new = full_data_best_date['ela'].values[0]
        off_new = full_data_best_date['off_glacier'].values[0]
        
        all_dates.append(best_date)
        all_elas.append(ela_new) 
        all_off_glaciers.append(off_new)
        all_observed_after_smoothings.append(obs_new) 
        all_aars.append(aar_new)
    
    # update csv and save
    # ela observed_after_smoothing aar off_glacier
    ela_df_orig['date'] = all_dates 
    ela_df_orig['ela'] = all_elas 
    ela_df_orig['observed_after_smoothing'] = all_observed_after_smoothings 
    ela_df_orig['aar'] = all_aars 
    ela_df_orig['off_glacier'] = all_off_glaciers 
    ela_df_orig.to_csv(path_open_original, index=False)
    
    # save 5-year average product
    if len(all_maps)>0:
        print('all')
        print()
        
        # define save paths
        path_xr_all = os.path.join(folder_save, 'Big Glacier Check', 'reprocessed', f"S2_{rgiid}_2018_2022_annual_AAs_reprocessed.nc")
        path_tif_avg = os.path.join(folder_save, 'Big Glacier Check', 'reprocessed', f"S2_{rgiid}_2018_2022_average_AA_reprocessed.tif")
        
        # compute average map
        all_maps = xr.concat(all_maps, dim='time')
        all_maps = all_maps.where(all_maps.isin([1,2]), np.nan)
        average_map = all_maps.median('time', skipna=True).astype('uint8')#+glacier_mask
        
        save_xr_all = format_xr_to_save(all_maps.astype('uint8'))
        save_xr_avg = format_xr_to_save(average_map)

        # specify compression/encoding
        encoding = {"accumulation_area":{"zlib": True}}
        
        # save
        save_xr_all.to_netcdf(path_xr_all, encoding=encoding)
        save_xr_avg.rio.to_raster(raster_path=path_tif_avg, compress='DEFLATE')
print("Done!")

0 22 RGI60-01.17423
2018
2019
2020
2021
2022
all

1 22 RGI60-01.13538
2018
2019
2020
2021
2022
all

2 22 RGI60-01.01390
2018
2019
2020
2021
2022
all

3 22 RGI60-01.03377
2018
2019
2020
2021
2022
all

4 22 RGI60-01.27108
2018
2019
2020
2021
2022
all

5 22 RGI60-01.20796
2018
2019
2020
2021
2022
all

6 22 RGI60-01.16545
2018
2019
2020
2021
2022
all

7 22 RGI60-01.16121
2018
2019
2020
2021
2022
all

8 22 RGI60-01.04375
2018
2019
2020
2021
2022
all

9 22 RGI60-01.26738
2018
2019
2020
2021
2022
all

10 22 RGI60-01.14883
2018
2019
2020
2021
2022
all

11 22 RGI60-01.10689
2018
2019
2020
2021
2022
all

12 22 RGI60-01.23649
2018
2019
2020
2021
2022
all

13 22 RGI60-01.17614
2018
2019
2020
2021
2022
all

14 22 RGI60-01.17183
2018
2019
2020
2021
2022
all

15 22 RGI60-01.14683
2018
2019
2020
2021
2022
all

16 22 RGI60-01.15769
2018
2019
2020
2021
2022
all

17 22 RGI60-01.16201
2018
2019
2020
2021
2022
all

18 22 RGI60-01.17566
2018
2019
2020
2021
2022
all

19 22 RGI60-01.14443
2018
2019
2020
2021


In [5]:
row_save

RGIId    RGI60-01.13696
Area         [3362.656]
2018         2018-09-30
2019         2019-08-11
2020         2020-09-12
2021                  1
2022                  1
Name: 21, dtype: object

In [6]:
ela_df_orig

Unnamed: 0,date,ela,observed_after_smoothing,aar,off_glacier,year
0,2018-09-30,1440.0,0.9185,0.4686,False,2018
1,2019-08-11,1480.0,0.9509,0.4106,False,2019
2,2020-09-12,1480.0,0.9527,0.4544,False,2020
3,2021-08-30,1060.0,0.8846,0.5223,False,2021
4,2022-09-04,1190.0,0.8306,0.5367,False,2022


In [7]:
full_data_best_date

Unnamed: 0,date,ela,fraction_observed_slopemasked,aar,off_glacier,observed_initial,terrain_shadows,observed_after_smoothing
173,2022-09-04,1190.0,0.9291,0.5367,False,0.7939,0.0369,0.8306


In [8]:
snow_out

Unnamed: 0,Array,Chunk
Bytes,68.23 MiB,68.23 MiB
Shape,"(9968, 7177)","(9968, 7177)"
Dask graph,1 chunks in 24 graph layers,1 chunks in 24 graph layers
Data type,uint8 numpy.ndarray,uint8 numpy.ndarray
"Array Chunk Bytes 68.23 MiB 68.23 MiB Shape (9968, 7177) (9968, 7177) Dask graph 1 chunks in 24 graph layers Data type uint8 numpy.ndarray",7177  9968,

Unnamed: 0,Array,Chunk
Bytes,68.23 MiB,68.23 MiB
Shape,"(9968, 7177)","(9968, 7177)"
Dask graph,1 chunks in 24 graph layers,1 chunks in 24 graph layers
Data type,uint8 numpy.ndarray,uint8 numpy.ndarray


In [9]:
smoothed.squeeze().plot()

NameError: name 'smoothed' is not defined

In [None]:
snow_out.squeeze().plot()