# Extract and save landscape characteristics associated with SNOTEL stations & CSO obs

In [1]:
from paths import *
import richdem as rd
from osgeo import gdal
import rasterio as rio
import numpy as np
from scipy import ndimage
import requests
import geopandas as gpd
from Depth2SWE import swe_calc
import pandas as pd
from SM_tools import *
import xarray as xr
from affine import Affine

In [2]:
# set the years that will be assimilated 
st = ['2018-10-01','2019-10-01']
ed = ['2019-09-30','2020-09-30']

In [3]:
# function to extract and save landscape characteristics 
# associated with SNOTEL stations
def extract_meta(gdf,mod_proj,dem_path,lc_path):
    '''
    gdf = geodataframe of SNOTEL in the domain
    
    proj = projection of the modeling domain 
    
    dem_path = path to digital elevation model of domain
    
    lc_path = path to nlcd landcover data of domain
    '''
    
    new=gdf.to_crs(mod_proj)
    
    #add x y values to CSO gdf
    gdf['x']=new.geometry.x
    gdf['y']=new.geometry.y
    
    #build list of coordinates from point geodataframe
    xy = list(map(list, zip(new.geometry.x,new.geometry.y)))
    
    #ELEVATION
    # DEM data
    src = rio.open(dem_path)
    #with rio.open(dtm) as src:
    elevation = src.read(1)

    #sample dem
    with rio.open(dem_path) as src:
        gdf['dem_elev'] = [sample[0] for sample in src.sample(xy)]
    #-----------------------------------------------------------    
    #SLOPE
    #read in data
    ds = gdal.Open(dem_path);
    data = np.array(ds.GetRasterBand(1).ReadAsArray());
    rda = rd.rdarray(data, no_data=-9999);
    slope = rd.TerrainAttribute(rda, attrib='slope_riserun');
    #get indicies
    with rio.open(dem_path) as src:
        rows, cols = rio.transform.rowcol(src.transform, new.geometry.centroid.x, new.geometry.centroid.y)
    #sample slope array
    gdf['slope'] = slope[rows,cols]
    
    #-----------------------------------------------------------        
    #ASPECT
    aspect = rd.TerrainAttribute(rda, attrib='aspect');
    
    #4-aspect key
    #0=N, 2=E, 4=S, 6=W, 8=flat
    DIR=aspect
    DIR[(DIR>=0) & (DIR<=45)]=0
    DIR[(DIR>45) & (DIR<=135)]=2
    DIR[(DIR>135) & (DIR<=225)]=4
    DIR[(DIR>225) & (DIR<=315)]=6
    DIR[(DIR>315) & (DIR<=360)]=0
    DIR[slope < 0.5]=8
    DIR.astype(int)
    
#     #8-aspect key
#     #0=N, 1=NE, 2=E, 3=SE, 4=S, 5=SW, 6=W, 7=NW, 8=flat
#     DIR=aspect
#     DIR[(DIR>=0) & (DIR<=22.5)]=0
#     DIR[(DIR>22.5) & (DIR<=67.5)]=1
#     DIR[(DIR>67.5) & (DIR<=112.5)]=2
#     DIR[(DIR>112.5) & (DIR<=157.5)]=3
#     DIR[(DIR>157.5) & (DIR<=202.5)]=4
#     DIR[(DIR>202.5) & (DIR<=247.5)]=5
#     DIR[(DIR>247.5) & (DIR<=292.5)]=6
#     DIR[(DIR>292.5) & (DIR<=337.5)]=7
#     DIR[(DIR>337.5) & (DIR<=360)]=0
#     DIR[slope < 0.5]=8
#     DIR.astype(int)

    #sample aspect array
    gdf['aspect'] = DIR[rows,cols]
    
    #-----------------------------------------------------------    
    #LANDCOVER
    # LC data
    src = rio.open(lc_path)
    lc = src.read(1)

    # reassign lc from NLCD to SM classes
    DIR=DIR=np.empty([np.shape(lc)[0],np.shape(lc)[1]])
    DIR[lc == 11 ]=24
    DIR[lc == 12 ]=20
    DIR[lc == 21 ]=21
    DIR[lc == 22 ]=21
    DIR[lc == 23 ]=21
    DIR[lc == 24 ]=21
    DIR[lc == 31 ]=18
    DIR[lc == 41 ]=2
    DIR[lc == 42 ]=1
    DIR[lc == 43 ]=6
    DIR[lc == 51 ]=6
    DIR[lc == 52 ]=6
    DIR[lc == 71 ]=12
    DIR[lc == 72 ]=12
    DIR[lc == 73 ]=12
    DIR[lc == 74 ]=12
    DIR[lc == 81 ]=23
    DIR[lc == 82 ]=22
    DIR[lc == 90 ]=9
    DIR[lc == 95 ]=9
    DIR.astype(int)

    #sample lc
    gdf['lc'] = DIR[rows,cols]    

    #-----------------------------------------------------------    
    #TERRAIN COMPLEXITY
    # calculate terrain complexity 
    tc = ndimage.generic_filter(elevation, np.std, size=3)
    # sample tc
    gdf['tc'] = tc[rows,cols]
    
    return gdf

In [4]:
# function to extract point index from gridded data

def point_index_from_grid(gdf,dem_path):
    # load geo raster and get pixel centers
    da = xr.open_rasterio(dem_path)
    transform = Affine.from_gdal(*da.transform)
    nx, ny = da.sizes['x'], da.sizes['y']
    x, y = transform * np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5)
    
    # put point data into projection of gridded data 
    new=gdf.to_crs(da.crs[6:])

    #station index
    x_idx = []
    y_idx = []

    for i in range(len(new)):
        minx = abs(new.geometry.x[i]-da.x.values)
        x=np.where(minx==min(abs(new.geometry.x[i]-da.x.values)))[0][0]
        x_idx.append(x)
        # flip y values to align with cartesian coordinates
        miny = abs(new.geometry.y[i]-np.flip(da.y.values))
        y=np.where(miny==min(abs(new.geometry.y[i]-np.flip(da.y.values))))[0][0]
        y_idx.append(y)


    gdf['x_idx']=x_idx
    gdf['y_idx']=y_idx
    return gdf

# SNOTEL

In [5]:
snotel_gdf = extract_meta(gdf,mod_proj,dem_path,lc_path)
snotel_gdf = point_index_from_grid(gdf,dem_path)
# save 
out = assimPath + 'all_snotel_meta.geojson'
snotel_gdf.to_file(out, driver='GeoJSON')



/nfs/attic/dfh/Aragon2/CSOassim/WY/all_snotel_meta.geojson: No such file or directory
driver GeoJSON does not support creation option ENCODING


### get swe timeseries for SNOTEL
!! This needs to be fixed !!!

In [11]:
# for i in range(1):
#     swe = get_swe(snotel_gdf,st[i], ed[i])
# #     # save 
# #     out = assimPath + 'snotel_swe_'+st[i][0:4]+'_'+ed[i][0:4]+'.geojson'
# #     snotel_gdf.to_file(out, driver='GeoJSON')

# #     out = outpath + 'SNOTEL_data_SWEDmeters2018-09-01_2019-09-30.csv'
# # stn_swe.to_csv(out)
# #     get_swe(gdf,st, ed)
# swe

# Evaluation stations

In [6]:
from sklearn.cluster import KMeans
import math
import random

# create dataframe ov variables to cluster over
# path = assimPath + 'all_snotel_meta.geojson'
# snotel_gdf = gpd.read_file(path)
data = snotel_gdf[["dem_elev","slope","lc","tc","longitude","aspect", "latitude"]]

#number of cluster is 1/3 of the total sample size
clusters = math.ceil(len(data)/3)
  
#run kmeans
kmeans = KMeans(n_clusters = clusters)
kmeans.fit(data,y=None)

#add cluster assignment to snotel gdf
snotel_gdf['cluster'] = kmeans.labels_

In [7]:
#randomly sample 1 station from each cluster
sample = []
for value in np.unique(snotel_gdf.cluster):
    sample.append(random.choice(snotel_gdf.index[snotel_gdf.cluster == value]))
    
#create evaluation gdf
samp = snotel_gdf.iloc[sample,:]

# save 
out = assimPath + 'eval_snotel_meta.geojson'
samp.to_file(out, driver='GeoJSON')

/nfs/attic/dfh/Aragon2/CSOassim/WY/eval_snotel_meta.geojson: No such file or directory
driver GeoJSON does not support creation option ENCODING


In [8]:
# save assimilation data and swe 
snotel_assim_sites = snotel_gdf[~snotel_gdf.code.isin(samp.code)]
snotel_assim_sites.reset_index(inplace = True,drop=True)
# save sites
out = assimPath + 'assim_snotel_sites.geojson'
snotel_assim_sites.to_file(out, driver='GeoJSON')

/nfs/attic/dfh/Aragon2/CSOassim/WY/assim_snotel_sites.geojson: No such file or directory
driver GeoJSON does not support creation option ENCODING


# CSO

In [9]:
def get_cso(st, ed, Bbox):
    #Issue CSO API observations request and load the results into a GeoDataFrame
    params = {
      "bbox": f"{Bbox['lonmin']},{Bbox['latmax']},{Bbox['lonmax']},{Bbox['latmin']}",
      "start_date": st,
      "end_date": ed,
      "format": "geojson",
      "limit": 5000,
    }

    csodata_resp = requests.get("https://api.communitysnowobs.org/observations", params=params)
    csodatajson = csodata_resp.json()
    #turn into geodataframe
    gdf = gpd.GeoDataFrame.from_features(csodatajson, crs=stn_proj)
    
    mask = (gdf['timestamp'] >= st) & (gdf['timestamp'] <= ed)
    gdf = gdf.loc[mask]
    gdf=gdf.reset_index(drop=True)
    print('Total number of CSO in daimain = ',len(gdf))
    
    ingdf = extract_meta(gdf,mod_proj,dem_path,lc_path)
    
    #need to format data for Hs_to_SWE conversion
    ingdf['dt'] = pd.to_datetime(ingdf['timestamp'], format='%Y-%m-%dT%H:%M:%S')
    ingdf['dt'] = pd.to_datetime(ingdf['dt']).dt.date
    ingdf['Y'] = pd.DatetimeIndex(ingdf['dt']).year
    ingdf['M'] = pd.DatetimeIndex(ingdf['dt']).month
    ingdf['D'] = pd.DatetimeIndex(ingdf['dt']).day
    ingdf["LON"] = ingdf.geometry.x
    ingdf["LAT"] = ingdf.geometry.y
    ingdf=ingdf.drop(columns=['dt'])
    
    #convert snow depth to mm to input into density function
    ingdf['H'] = ingdf.depth*10
    ingdf.head()
    
    #Hs to SWE
    SWE,DOY = swe_calc(ingdf.Y.values,ingdf.M.values,ingdf.D.values,ingdf.H.values,ingdf.LAT.values,ingdf.LON.values)
    
    #convert swe to m to input into SM
    ingdf['swe']=SWE/1000
    ingdf['doy']=DOY
    return ingdf

for i in range(len(st)):
    
    csogdf = get_cso(st[i], ed[i], Bbox)
    csogdf = point_index_from_grid(gdf,dem_path)

    # save 
    out = assimPath + 'all_cso_'+st[i][0:4]+'_'+ed[i][0:4]+'_meta.geojson'
    csogdf.to_file(out, driver='GeoJSON')

Total number of CSO in daimain =  310


/nfs/attic/dfh/Aragon2/CSOassim/WY/all_cso_2018_2019_meta.geojson: No such file or directory
driver GeoJSON does not support creation option ENCODING


Total number of CSO in daimain =  72


/nfs/attic/dfh/Aragon2/CSOassim/WY/all_cso_2019_2020_meta.geojson: No such file or directory
driver GeoJSON does not support creation option ENCODING
