## NEON data

Next: check other networks shown in http://nationalsoilmoisture.com/

### This notebook requires that SMCalVal_NEON_setup has been run successfully

### This notebook performs the following:
- Loads previously generated siteList.json file
- Extracts NISAR retrieval dates and values from each track
- Extracts in situ data values for each NISAR date
- Populates new json file, one for each site, including the 5 stations and all overlapping tracks

### Notes


### Cite provisional data as:
NEON (National Ecological Observatory Network). Soil water content and water salinity (DP1.00094.001), provisional data. Dataset accessed from https://data.neonscience.org/data-products/DP1.00094.001 on Month, Day, Year. Data archived at [your DOI].

### Cite Released data as:
NEON (National Ecological Observatory Network). Soil water content and water salinity (DP1.00094.001), RELEASE-2024. https://doi.org/10.48443/a8vy-y813. Dataset accessed from https://data.neonscience.org/data-products/DP1.00094.001/RELEASE-2024 on Month, Day, Year.

In [1]:
import os
import glob
from pathlib import Path
import requests
import json
from osgeo import gdal,osr,ogr
import datetime
import pandas as pd
import geopandas as gpd
import shapely
import numpy as np
import h5py
import csv
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from matplotlib.dates import DateFormatter
import matplotlib.path as mpltPath
import re
from utilsCalVal import EASEconvert
import setParams as p
import wget

In [2]:
neonPath      = '/scratch/rlohman/SMCalVal/NEON/'
neonFile      = neonPath+'siteList.json'

if not(os.path.isfile(neonFile)):
    print('Need to run SMCalVal_NEON_setup to generate track/frame/site file')

startDate     = datetime.datetime(2021,4,1)
endDate       = datetime.datetime(2024,4+1,1)+datetime.timedelta(microseconds=-1)

In [3]:
with open(neonFile) as f:
    sites = json.load(f)
    print(sites)

[{'siteCode': 'ABBY', 'lat': 45.762439, 'lon': -122.330317, 'EASEGridRowIndex': 10319, 'EASEGridColIndex': 27796, 'lulc': 1, 'dem': 2.2055230140686035, 'crop': 255, 'web': 'https://www.neonscience.org/field-sites/abby', 'tracks': [34, 71, 172, 106, 34], 'frames': [24, 65, 65, 25, 25], 'HVpos': '001.502', 'depths': -0.16, 'framecount': 5}, {'siteCode': 'ABBY', 'lat': 45.762439, 'lon': -122.330317, 'EASEGridRowIndex': 10319, 'EASEGridColIndex': 27796, 'lulc': 1, 'dem': 2.2055230140686035, 'crop': 255, 'web': 'https://www.neonscience.org/field-sites/abby', 'tracks': [34, 71, 172, 106, 34], 'frames': [24, 65, 65, 25, 25], 'HVpos': '002.502', 'depths': -0.16, 'framecount': 5}, {'siteCode': 'ABBY', 'lat': 45.762439, 'lon': -122.330317, 'EASEGridRowIndex': 10319, 'EASEGridColIndex': 27796, 'lulc': 1, 'dem': 2.2055230140686035, 'crop': 255, 'web': 'https://www.neonscience.org/field-sites/abby', 'tracks': [34, 71, 172, 106, 34], 'frames': [24, 65, 65, 25, 25], 'HVpos': '003.502', 'depths': -0.1

In [4]:
SERVER      = 'http://data.neonscience.org/api/v0/'
productCode = 'DP1.00094.001'
url         = SERVER+'products/'+productCode

#Request the url and convert to json
site_json   = requests.get(url).json()

For testing only - come up with list of dates from SAR acquisitions.  Here use Walnut Gulch, A

In [5]:
R4dir = '/home/jovyan/WalnutGulchA/Path62Frame620BeamFP66/R4/'
outFiles     = np.array(glob.glob(R4dir+('[0-9]'*8)+'.h5'))

# #find index of begin/end of dates, assume format same for all
tmp          = re.search(r'2[0-9]{7}',outFiles[0])
inds         = tmp.span()
dates        = np.array([datetime.datetime.strptime(x[inds[0]:inds[1]],'%Y%m%d') for x in outFiles])
fileDates    = np.array([x.strftime('%Y-%m') for x in dates])
sort_index   = np.argsort(dates)
gcovs        = outFiles[sort_index]
dates        = dates[sort_index]
print(str(len(dates))+' dates in list')

46 dates in list


In [13]:
print(st)

{'siteCode': 'SRER', 'availableMonths': ['2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01', '2018-02', '2018-03', '2018-04', '2018-05', '2018-06', '2018-07', '2018-08', '2018-09', '2018-10', '2018-11', '2018-12', '2019-01', '2019-02', '2019-03', '2019-04', '2019-05', '2019-06', '2019-07', '2019-08', '2019-09', '2019-10', '2019-11', '2019-12', '2020-01', '2020-02', '2020-03', '2020-04', '2020-05', '2020-06', '2020-07', '2020-08', '2020-09', '2020-10', '2020-11', '2020-12', '2021-01', '2021-02', '2021-03', '2021-04', '2021-05', '2021-06', '2021-07', '2021-08', '2021-09', '2021-10', '2021-11', '2021-12', '2022-01', '2022-02', '2022-03', '2022-04', '2022-05', '2022-06', '2022-07', '2022-08', '2022-09', '2022-10', '2022-11', '2022-12', '2023-01', '2023-02', '2023-03', '2023-04', '2023-05', '2023-06', '2023-07', '2023-08', '2023-09', '2023-10', '2023-11', '2023-12', '2024-01', '2024-02', '2024-03', '2024-04', '2024-05', '2024-06', '2024-07', '2024-08', '2024-09', '2

In [11]:
for site in sites:
    if site['siteCode'] in ['SRER']: #near Walnut Gulch
        siteID   = [i for i,x in enumerate([sub['siteCode'] in [site['siteCode']] for sub in site_json['data']['siteCodes'] ] ) if x][0]
        st       = site_json['data']['siteCodes'][siteID] 
        # months   = [datetime.datetime.strptime(x, '%Y-%m') for x in st['availableMonths']] #convert from YYYY-MM to datetime
        urls     = st['availableDataUrls']
        for i in range(len(dates)):  
        for u in urls:
            if fileDates[i] in u:
                s2=requests.get(u).json()
                for file in s2['data']['files']:
                    u2 = file['url']
                    if 'basic' in u2 and site['HVpos']+'.030' in u2 and 'variables' not in u2:
                        print(u2)


https://storage.googleapis.com/neon-publication/NEON.DOM.SITE.DP1.00094.001/SRER/20220701T000000--20220801T000000/basic/NEON.D14.SRER.DP1.00094.001.001.502.030.SWS_30_minute.2022-07.basic.20221211T001137Z.csv
https://storage.googleapis.com/neon-publication/NEON.DOM.SITE.DP1.00094.001/SRER/20230701T000000--20230801T000000/basic/NEON.D14.SRER.DP1.00094.001.001.502.030.SWS_30_minute.2023-07.basic.20230809T231529Z.csv
https://storage.googleapis.com/neon-publication/NEON.DOM.SITE.DP1.00094.001/SRER/20211201T000000--20220101T000000/basic/NEON.D14.SRER.DP1.00094.001.001.502.030.SWS_30_minute.2021-12.basic.20221210T225155Z.csv
https://storage.googleapis.com/neon-publication/NEON.DOM.SITE.DP1.00094.001/SRER/20230301T000000--20230401T000000/basic/NEON.D14.SRER.DP1.00094.001.001.502.030.SWS_30_minute.2023-03.basic.20230403T231728Z.csv
https://storage.googleapis.com/neon-publication/NEON.DOM.SITE.DP1.00094.001/SRER/20221201T000000--20230101T000000/basic/NEON.D14.SRER.DP1.00094.001.001.502.030.SWS_

KeyError: 'data'

In [12]:
print(s2)

{'message': 'API rate limit exceeded'}


In [None]:
fig,axes = plt.subplots(2,2,figsize=(15,15))
#lulc
m = Basemap(projection='merc',llcrnrlat=min(lats)-5,urcrnrlat=max(lats)+1,llcrnrlon=min(lons)-5,urcrnrlon=max(lons)+5,lat_ts=35,resolution=None,ax=axes[0,0])
m.shadedrelief()
cmap=matplotlib.colors.ListedColormap(('darkgreen','orange','yellow','hotpink','red','gray','whitesmoke','mediumblue','darkcyan','limegreen','wheat'))
cb=m.scatter(lons,lats,c=lulcs,marker='^',vmin=0.5,vmax=11.5,cmap=cmap,latlon=True)
axes[0,0].set_title('lulc')
plt.colorbar(cb,ax=axes[0,0],ticks=np.unique(lulcs),orientation='horizontal')

#dem std. deviation within 1km grid cell
m = Basemap(projection='merc',llcrnrlat=min(lats)-5,urcrnrlat=max(lats)+1,llcrnrlon=min(lons)-5,urcrnrlon=max(lons)+5,lat_ts=35,resolution=None,ax=axes[0,1])
m.shadedrelief()
c=m.scatter(lons,lats,c=dems,marker='^',latlon=True)
axes[0,1].set_title('demstd')
plt.colorbar(c,ax=axes[0,1],orientation='horizontal')

#dem std. deviation within 1km grid cell
m = Basemap(projection='merc',llcrnrlat=min(lats)-5,urcrnrlat=max(lats)+1,llcrnrlon=min(lons)-5,urcrnrlon=max(lons)+5,lat_ts=35,resolution=None,ax=axes[1,0])
m.shadedrelief()
c=m.scatter(lons,lats,c=vrange,marker='^',latlon=True)
axes[1,0].set_title('# diff depth levels')
plt.colorbar(c,ax=axes[1,0],orientation='horizontal')

#number of frames covering each site
m = Basemap(projection='merc',llcrnrlat=min(lats)-5,urcrnrlat=max(lats)+1,llcrnrlon=min(lons)-5,urcrnrlon=max(lons)+5,lat_ts=35,resolution=None,ax=axes[1,1])
m.shadedrelief()
c=m.scatter(lons,lats,c=framecount,vmax=5,marker='^',latlon=True)
axes[1,1].set_title('#frames')
plt.colorbar(c,ax=axes[1,1],orientation='horizontal')

plt.show()


In [None]:
with open(neonPath+'siteList.json', 'w') as file:
  json.dump(sites, file)

In [None]:
allGood=True
for NEONpt in range(len(set(siteCodes))):
    urls     = site_json['data']['siteCodes'][NEONpt]['availableDataUrls']
    
    #get sensor position file for first date, "basic" version
    s2=requests.get(urls[0]).json()
    for file in s2['data']['files']:
        url = file['url']
        if 'basic' in url and 'sensor_positions' in url:
            data = pd.read_csv(url)    
    x=[]
    y=[]
    for i in range(1,6):
        a=data.loc[data['HOR.VER']==(0.501+i)]
        x.append(a['xOffset'].values[0])
        y.append(a['yOffset'].values[0])
    
    ezc         = ezcs[NEONpt]
    ezr         = ezrs[NEONpt]
    
    map_scale_m = 200.17900466991
    cols        = 173520
    rows        = 73080
    col0        = (cols-1)/2
    row0        = (rows-1)/2
    x0          = (ezc-col0)*map_scale_m+np.array(x)
    y0          = (row0-ezr)*map_scale_m+np.array(y)
    
    ezr0        = row0-y0/map_scale_m
    ezc0        = col0+x0/map_scale_m
    
    difr        = np.ceil(ezr - 0.5)-np.ceil(ezr0 - 0.5)
    difc       = np.ceil(ezc - 0.5)-np.ceil(ezc0 - 0.5)
    
    if np.sum(np.abs(difr))+np.sum(np.abs(difc))>0:
        print(siteCodes[NEONpt])
        allGood=False
if allGood:
    print('All 5 stations within same EASEGRID2.0 cell as main lat/lon, for all sites')

In [None]:
#Show overlap frames for one site
siteid    = 1
tfdb0     = tfdb.iloc[tfdb.sindex.query(shapely.Point(lons[siteid],lats[siteid]), predicate="intersects")]
tfdb0.boundary.plot()
plt.plot(lons[siteid],lats[siteid],'r*')
plt.title(siteCodes[siteid])
plt.show()

In [None]:
#Just for reference - this doesn't easily save the track/frame (csv, variable# of values)
sites = pd.DataFrame(
    {'siteCode': siteCodes,
     'lats': lats,
     'lons': lons,
     'EASEGridRowIndex': ezrs,
     'EASEGridColumnIndex': ezcs,
     'lulc': lulcs,
     'crop': crops,
     'demstd': dems,
     'web':sitePages
    })
sites.to_csv(neonPath+'siteList.csv')