# Extract and save landscape characteristics associated with SNOTEL stations & CSO obs

In [1]:
from paths import *
import richdem as rd
from osgeo import gdal
import rasterio as rio
import numpy as np
from scipy import ndimage
import requests
import geopandas as gpd
from Depth2SWE import swe_calc
import pandas as pd
from SM_tools import *
import xarray as xr
from affine import Affine

In [2]:
# set the years that will be assimilated 
st = ['2018-10-01','2019-10-01']
ed = ['2019-09-30','2020-09-30']

In [2]:
st = ['2018-10-01']
ed = ['2019-09-30']

In [3]:
# function to extract and save landscape characteristics 
# associated with SNOTEL stations
def extract_meta(gdf,mod_proj,dem_path,lc_path):
    '''
    gdf = geodataframe of SNOTEL in the domain
    
    proj = projection of the modeling domain 
    
    dem_path = path to digital elevation model of domain
    
    lc_path = path to nlcd landcover data of domain
    '''
    
    new=gdf.to_crs(mod_proj)
    
    #add x y values to CSO gdf
    gdf['x']=new.geometry.x
    gdf['y']=new.geometry.y
    
    #build list of coordinates from point geodataframe
    xy = list(map(list, zip(new.geometry.x,new.geometry.y)))
    
    #ELEVATION
    # DEM data
    src = rio.open(dem_path)
    #with rio.open(dtm) as src:
    elevation = src.read(1)

    #sample dem
    with rio.open(dem_path) as src:
        gdf['dem_elev'] = [sample[0] for sample in src.sample(xy)]
    #-----------------------------------------------------------    
    #SLOPE
    #read in data
    ds = gdal.Open(dem_path);
    data = np.array(ds.GetRasterBand(1).ReadAsArray());
    rda = rd.rdarray(data, no_data=-9999);
    slope = rd.TerrainAttribute(rda, attrib='slope_riserun');
    #get indicies
    with rio.open(dem_path) as src:
        rows, cols = rio.transform.rowcol(src.transform, new.geometry.centroid.x, new.geometry.centroid.y)
    #sample slope array
    gdf['slope'] = slope[rows,cols]
    
    #-----------------------------------------------------------        
    #ASPECT
    aspect = rd.TerrainAttribute(rda, attrib='aspect');
    
    #4-aspect key
    #0=N, 2=E, 4=S, 6=W, 8=flat
    DIR=aspect
    DIR[(DIR>=0) & (DIR<=45)]=0
    DIR[(DIR>45) & (DIR<=135)]=2
    DIR[(DIR>135) & (DIR<=225)]=4
    DIR[(DIR>225) & (DIR<=315)]=6
    DIR[(DIR>315) & (DIR<=360)]=0
    DIR[slope < 0.5]=8
    DIR.astype(int)
    
#     #8-aspect key
#     #0=N, 1=NE, 2=E, 3=SE, 4=S, 5=SW, 6=W, 7=NW, 8=flat
#     DIR=aspect
#     DIR[(DIR>=0) & (DIR<=22.5)]=0
#     DIR[(DIR>22.5) & (DIR<=67.5)]=1
#     DIR[(DIR>67.5) & (DIR<=112.5)]=2
#     DIR[(DIR>112.5) & (DIR<=157.5)]=3
#     DIR[(DIR>157.5) & (DIR<=202.5)]=4
#     DIR[(DIR>202.5) & (DIR<=247.5)]=5
#     DIR[(DIR>247.5) & (DIR<=292.5)]=6
#     DIR[(DIR>292.5) & (DIR<=337.5)]=7
#     DIR[(DIR>337.5) & (DIR<=360)]=0
#     DIR[slope < 0.5]=8
#     DIR.astype(int)

    #sample aspect array
    gdf['aspect'] = DIR[rows,cols]
    
    #-----------------------------------------------------------    
    #LANDCOVER
    # LC data
    src = rio.open(lc_path)
    lc = src.read(1)

    # reassign lc from NLCD to SM classes
    DIR=DIR=np.empty([np.shape(lc)[0],np.shape(lc)[1]])
    DIR[lc == 11 ]=24
    DIR[lc == 12 ]=20
    DIR[lc == 21 ]=21
    DIR[lc == 22 ]=21
    DIR[lc == 23 ]=21
    DIR[lc == 24 ]=21
    DIR[lc == 31 ]=18
    DIR[lc == 41 ]=2
    DIR[lc == 42 ]=1
    DIR[lc == 43 ]=6
    DIR[lc == 51 ]=6
    DIR[lc == 52 ]=6
    DIR[lc == 71 ]=12
    DIR[lc == 72 ]=12
    DIR[lc == 73 ]=12
    DIR[lc == 74 ]=12
    DIR[lc == 81 ]=23
    DIR[lc == 82 ]=22
    DIR[lc == 90 ]=9
    DIR[lc == 95 ]=9
    DIR.astype(int)

    #sample lc
    gdf['lc'] = DIR[rows,cols]    

    #-----------------------------------------------------------    
    #TERRAIN COMPLEXITY
    # calculate terrain complexity 
    tc = ndimage.generic_filter(elevation, np.std, size=3)
    # sample tc
    gdf['tc'] = tc[rows,cols]
    
    return gdf

In [4]:
# function to extract point index from gridded data

def point_index_from_grid(gdf,dem_path):
    # load geo raster and get pixel centers
    da = xr.open_rasterio(dem_path)
    transform = Affine.from_gdal(*da.transform)
    nx, ny = da.sizes['x'], da.sizes['y']
    x, y = transform * np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5)
    
    # put point data into projection of gridded data 
    new=gdf.to_crs(da.crs[6:])

    #station index
    x_idx = []
    y_idx = []

    for i in range(len(new)):
        minx = abs(new.geometry.x[i]-da.x.values)
        x=np.where(minx==min(abs(new.geometry.x[i]-da.x.values)))[0][0]
        x_idx.append(x)
        # flip y values to align with cartesian coordinates
        miny = abs(new.geometry.y[i]-np.flip(da.y.values))
        y=np.where(miny==min(abs(new.geometry.y[i]-np.flip(da.y.values))))[0][0]
        y_idx.append(y)


    gdf['x_idx']=x_idx
    gdf['y_idx']=y_idx
    return gdf

# SNOTEL

In [5]:
snotel_gdf = extract_meta(gdf,mod_proj,dem_path,lc_path)
snotel_gdf = point_index_from_grid(gdf,dem_path)
# save 
out = dataPath + 'all_snotel_meta.geojson'
snotel_gdf.to_file(out, driver='GeoJSON')



/nfs/attic/dfh/Aragon2/CSOassim/WY/all_snotel_meta.geojson: No such file or directory
driver GeoJSON does not support creation option ENCODING


### get swe timeseries for SNOTEL
!! This needs to be fixed !!!

In [11]:
# for i in range(1):
#     swe = get_swe(snotel_gdf,st[i], ed[i])
# #     # save 
# #     out = assimPath + 'snotel_swe_'+st[i][0:4]+'_'+ed[i][0:4]+'.geojson'
# #     snotel_gdf.to_file(out, driver='GeoJSON')

# #     out = outpath + 'SNOTEL_data_SWEDmeters2018-09-01_2019-09-30.csv'
# # stn_swe.to_csv(out)
# #     get_swe(gdf,st, ed)
# swe

# Evaluation stations

In [6]:
from sklearn.cluster import KMeans
import math
import random

# create dataframe ov variables to cluster over
# path = assimPath + 'all_snotel_meta.geojson'
# snotel_gdf = gpd.read_file(path)
data = snotel_gdf[["dem_elev","slope","lc","tc","longitude","aspect", "latitude"]]

#number of cluster is 1/3 of the total sample size
clusters = math.ceil(len(data)/3)
  
#run kmeans
kmeans = KMeans(n_clusters = clusters)
kmeans.fit(data,y=None)

#add cluster assignment to snotel gdf
snotel_gdf['cluster'] = kmeans.labels_

In [7]:
#randomly sample 1 station from each cluster
sample = []
for value in np.unique(snotel_gdf.cluster):
    sample.append(random.choice(snotel_gdf.index[snotel_gdf.cluster == value]))
    
#create evaluation gdf
samp = snotel_gdf.iloc[sample,:]

# save 
out = dataPath + 'eval_snotel_meta.geojson'
samp.to_file(out, driver='GeoJSON')

/nfs/attic/dfh/Aragon2/CSOassim/WY/eval_snotel_meta.geojson: No such file or directory
driver GeoJSON does not support creation option ENCODING


In [8]:
# save assimilation data and swe 
snotel_assim_sites = snotel_gdf[~snotel_gdf.code.isin(samp.code)]
snotel_assim_sites.reset_index(inplace = True,drop=True)
# save sites
out = dataPath + 'assim_snotel_sites.geojson'
snotel_assim_sites.to_file(out, driver='GeoJSON')

/nfs/attic/dfh/Aragon2/CSOassim/WY/assim_snotel_sites.geojson: No such file or directory
driver GeoJSON does not support creation option ENCODING


# CSO

In [14]:
def get_cso(st, ed, Bbox):
    #Issue CSO API observations request and load the results into a GeoDataFrame
    params = {
      "bbox": f"{Bbox['lonmin']},{Bbox['latmax']},{Bbox['lonmax']},{Bbox['latmin']}",
      "start_date": st,
      "end_date": ed,
      "format": "geojson",
      "limit": 5000,
    }

    csodata_resp = requests.get("https://api.communitysnowobs.org/observations", params=params)
    csodatajson = csodata_resp.json()
    #turn into geodataframe
    gdf = gpd.GeoDataFrame.from_features(csodatajson, crs=stn_proj)
    
    mask = (gdf['timestamp'] >= st) & (gdf['timestamp'] <= ed)
    gdf = gdf.loc[mask]
    gdf=gdf.reset_index(drop=True)
    print('Total number of CSO in domain = ',len(gdf))
    
    ingdf = extract_meta(gdf,mod_proj,dem_path,lc_path)
    
    #need to format data for Hs_to_SWE conversion
    ingdf['dt'] = pd.to_datetime(ingdf['timestamp'], format='%Y-%m-%dT%H:%M:%S').dt.date
    #ingdf['dt'] = pd.to_datetime(ingdf['dt']).dt.date
    ingdf['Y'] = pd.DatetimeIndex(ingdf['dt']).year
    ingdf['M'] = pd.DatetimeIndex(ingdf['dt']).month
    ingdf['D'] = pd.DatetimeIndex(ingdf['dt']).day
    ingdf["LON"] = ingdf.geometry.x
    ingdf["LAT"] = ingdf.geometry.y
    ingdf=ingdf.drop(columns=['dt'])
    
    #convert snow depth to mm to input into density function
    ingdf['H'] = ingdf.depth*10
    ingdf.head()
    
    #Hs to SWE
    SWE,DOY = swe_calc(ingdf.Y.values,ingdf.M.values,ingdf.D.values,ingdf.H.values,ingdf.LAT.values,ingdf.LON.values)
    
    #convert swe to m to input into SM
    ingdf['swe']=SWE/1000
    ingdf['doy']=DOY
    return ingdf


In [6]:
for i in range(len(st)):
    
    cgdf = get_cso(st[i], ed[i], Bbox)
    csogdf = point_index_from_grid(cgdf,dem_path)
csogdf

Total number of CSO in daimain =  310


Unnamed: 0,geometry,id,author,depth,source,timestamp,elevation,x,y,dem_elev,...,Y,M,D,LON,LAT,H,swe,doy,x_idx,y_idx
0,POINT (-110.59782 43.67017),D3gyiOmE,Katie O’Connell,10.00,MountainHub,2019-04-25T02:25:30.037Z,2131.137695,532422.643976,4.835319e+06,2128,...,2019,4,25,-110.597819,43.670172,100.0,0.033626,207,452,1452
1,POINT (-110.59854 43.67067),SEJG7WBM,Katie O’Connell,72.00,MountainHub,2019-04-25T02:24:32.498Z,2127.995361,532364.144681,4.835374e+06,2124,...,2019,4,25,-110.598541,43.670671,720.0,0.245734,207,451,1452
2,POINT (-110.59358 43.67015),Vn9QgQA7,Leanne,27.94,MountainHub,2019-04-24T15:56:24.335Z,2146.224609,532764.130403,4.835319e+06,2147,...,2019,4,24,-110.593583,43.670154,279.4,0.094851,206,455,1452
3,POINT (-110.59122 43.67052),xlFXLIST,Leanne,25.40,MountainHub,2019-04-24T15:53:28.932Z,2156.758057,532954.088693,4.835360e+06,2154,...,2019,4,24,-110.591224,43.670517,254.0,0.086550,206,457,1452
4,POINT (-110.59858 43.66764),4EMEMrPA,Colton Lewer,60.00,MountainHub,2019-04-24T15:53:19.864Z,2188.988770,532362.586657,4.835038e+06,2178,...,2019,4,24,-110.598580,43.667641,600.0,0.203867,206,451,1449
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,POINT (-110.94137 43.78968),RlB3wF6G,Fred Most,56.00,SnowPilot,2018-11-12T16:39:00.000Z,2760.414795,504716.859665,4.848515e+06,2811,...,2018,11,12,-110.941374,43.789676,560.0,0.117115,43,175,1584
306,POINT (-110.95167 43.47857),6+xKcZFv,Chris McCollister,68.00,SnowPilot,2018-11-09T00:20:00.000Z,2794.276123,503908.588273,4.813962e+06,2805,...,2018,11,9,-110.951670,43.478566,680.0,0.137963,40,167,1238
307,POINT (-110.85020 43.60553),7MKrMQAz,Josh Pope,95.00,SnowPilot,2018-11-08T17:30:00.000Z,2773.990234,512089.610379,4.828072e+06,2786,...,2018,11,8,-110.850197,43.605528,950.0,0.193702,39,248,1379
308,POINT (-110.85548 43.60229),BBqIOtV3,Mike Rheam,65.00,SnowPilot,2018-11-08T17:00:00.000Z,2794.865967,511663.718663,4.827712e+06,2861,...,2018,11,8,-110.855482,43.602289,650.0,0.134792,39,244,1376


In [7]:
# save 
out = dataPath + 'all_cso_'+st[i][0:4]+'_'+ed[i][0:4]+'_meta.geojson'
csogdf.to_file(out, driver='GeoJSON')

In [8]:
csogdf

Unnamed: 0,geometry,id,author,depth,source,timestamp,elevation,x,y,dem_elev,...,Y,M,D,LON,LAT,H,swe,doy,x_idx,y_idx
0,POINT (-110.59782 43.67017),D3gyiOmE,Katie O’Connell,10.00,MountainHub,2019-04-25T02:25:30.037Z,2131.137695,532422.643976,4.835319e+06,2128,...,2019,4,25,-110.597819,43.670172,100.0,0.033626,207,452,1452
1,POINT (-110.59854 43.67067),SEJG7WBM,Katie O’Connell,72.00,MountainHub,2019-04-25T02:24:32.498Z,2127.995361,532364.144681,4.835374e+06,2124,...,2019,4,25,-110.598541,43.670671,720.0,0.245734,207,451,1452
2,POINT (-110.59358 43.67015),Vn9QgQA7,Leanne,27.94,MountainHub,2019-04-24T15:56:24.335Z,2146.224609,532764.130403,4.835319e+06,2147,...,2019,4,24,-110.593583,43.670154,279.4,0.094851,206,455,1452
3,POINT (-110.59122 43.67052),xlFXLIST,Leanne,25.40,MountainHub,2019-04-24T15:53:28.932Z,2156.758057,532954.088693,4.835360e+06,2154,...,2019,4,24,-110.591224,43.670517,254.0,0.086550,206,457,1452
4,POINT (-110.59858 43.66764),4EMEMrPA,Colton Lewer,60.00,MountainHub,2019-04-24T15:53:19.864Z,2188.988770,532362.586657,4.835038e+06,2178,...,2019,4,24,-110.598580,43.667641,600.0,0.203867,206,451,1449
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,POINT (-110.94137 43.78968),RlB3wF6G,Fred Most,56.00,SnowPilot,2018-11-12T16:39:00.000Z,2760.414795,504716.859665,4.848515e+06,2811,...,2018,11,12,-110.941374,43.789676,560.0,0.117115,43,175,1584
306,POINT (-110.95167 43.47857),6+xKcZFv,Chris McCollister,68.00,SnowPilot,2018-11-09T00:20:00.000Z,2794.276123,503908.588273,4.813962e+06,2805,...,2018,11,9,-110.951670,43.478566,680.0,0.137963,40,167,1238
307,POINT (-110.85020 43.60553),7MKrMQAz,Josh Pope,95.00,SnowPilot,2018-11-08T17:30:00.000Z,2773.990234,512089.610379,4.828072e+06,2786,...,2018,11,8,-110.850197,43.605528,950.0,0.193702,39,248,1379
308,POINT (-110.85548 43.60229),BBqIOtV3,Mike Rheam,65.00,SnowPilot,2018-11-08T17:00:00.000Z,2794.865967,511663.718663,4.827712e+06,2861,...,2018,11,8,-110.855482,43.602289,650.0,0.134792,39,244,1376


In [16]:
st = ['2016-10-01','2017-10-01','2018-10-01','2019-10-01','2020-10-01']
ed = ['2017-09-30','2018-09-30','2019-09-30','2020-09-30','2021-09-30']
domain = ['OR','WY','UT','CA','WA','CO']
for i in range(len(st)):
    ST = st[i]
    ED = ed[i]
    for j in range(len(domain)):
        D = domain[j]
        Bbox = Bbox = domains[D]['Bbox']
        print(D, ST, ED)
        cgdf = get_cso(ST, ED, Bbox)

OR 2016-10-01 2017-09-30
Total number of CSO in domain =  2
WY 2016-10-01 2017-09-30
Total number of CSO in domain =  23
UT 2016-10-01 2017-09-30
Total number of CSO in domain =  46
CA 2016-10-01 2017-09-30
Total number of CSO in domain =  129
WA 2016-10-01 2017-09-30
Total number of CSO in domain =  0
CO 2016-10-01 2017-09-30
Total number of CSO in domain =  233
OR 2017-10-01 2018-09-30
Total number of CSO in domain =  87
WY 2017-10-01 2018-09-30
Total number of CSO in domain =  175
UT 2017-10-01 2018-09-30
Total number of CSO in domain =  76
CA 2017-10-01 2018-09-30
Total number of CSO in domain =  182
WA 2017-10-01 2018-09-30
Total number of CSO in domain =  13
CO 2017-10-01 2018-09-30
Total number of CSO in domain =  327
OR 2018-10-01 2019-09-30
Total number of CSO in domain =  187
WY 2018-10-01 2019-09-30
Total number of CSO in domain =  310
UT 2018-10-01 2019-09-30
Total number of CSO in domain =  117
CA 2018-10-01 2019-09-30
Total number of CSO in domain =  167
WA 2018-10-01 201