In [1]:
import rasterio as rio 
import gdal
import math
import requests
import zipfile
import os
import pandas as pd 
import geopandas as gpd
import numpy as np
import subprocess

In [2]:
def round1000(x):
    ''' 
    round1000(x):
    Rounds value to nearest 1000
    '''
    return(1000 * math.floor(x / 1000))

In [3]:
@np.vectorize
def is_on_boundary(x, y):
    '''
    Returns: 
    0 if false,
    'hor' if on horizaontal boundary,
    'ver' if on verticle boudry
    '''
    answer = 0
    r = 20
    extent  = [x - r , x + r , y - r , y + r ]
    names   = ['xMin', 'xMax', 'yMin', 'yMax']
    extent = dict(zip(names, extent)) 

    # get tile coords
    tile_east =  round1000(x)
    tile_north =  round1000(y)

    answer = (f'{tile_east}_{tile_north}')
    
    # If the plot is on a horizontal tile boundary,
    # that boundary falls between yMax and yMin
    bound_below = round1000(extent['yMax'])
    if  bound_below >= extent['yMin']:
        answer = 'hor'

    # If the plot is on a vertical tile boundary,
    # that boundary falls between xMax and xMin
    bound_left = round1000(extent['xMax'])
    if  bound_left >= extent['xMin']:
        answer = 'ver'
        
    return(answer)


In [4]:
def get_plot_coords(plot_spdf):
    '''
    Takes base column array of plot entries, calculates coordinates
    of tiles and returns them as a column vector of strings
    '''
    x = plot_spdf['easting']
    y = plot_spdf['northing']
    boundary = is_on_boundary(x, y)
    return(boundary)

In [5]:
def download_plots_shp(data_path='/home/jovyan/data/all_plots/'):
    '''
    Downloads the NEON TOS plots data
    
    --------
    Parameters
    --------
    data_path path to which data will be downloaded
    '''
    # make data directory exists 
    os.makedirs(data_path, exist_ok=True)

    handle = requests.get(url='https://data.neonscience.org/api/v0/documents/All_NEON_TOS_Plots_V8')
    
    with open(data_path + 'All_NEON_TOS_Plots_V8.zip', 'wb') as f:
        f.write(handle.content)
    
    with zipfile.ZipFile(data_path + 'All_NEON_TOS_Plots_V8.zip', 'r') as zip_ref:
        zip_ref.extractall(data_path)

    NEON_all_plots = gpd.read_file(f'{data_path}All_NEON_TOS_Plots_V8/All_NEON_TOS_Plot_Polygons_V8.shp')
    
    return(NEON_all_plots)

In [6]:
NEON_all_plots = download_plots_shp()

In [77]:
sitecodes = ['BART', 'TEAK', 'HARV']

In [82]:
def define_sites_of_interest(sitecodes, cull_boundary_plots=True):
    NEON_all_plots = download_plots_shp()
    for sitecode in sitecodes:
        # find all base plots for the sitecode
        base_plots_SPDF = NEON_all_plots.loc[(NEON_all_plots.siteID == sitecode) & (NEON_all_plots.subtype == 'basePlot')]
        # make a dataframe of plot coordinates
        coord_df =  pd.DataFrame()
        coord_df['plotID'] = base_plots_SPDF.plotID
        coord_df['coord_String'] = get_plot_coords(base_plots_SPDF)
        # Remove plots that cross a mosaic tile boundary.
        # Maybe not necessary if we are using EPTs
        # and cloud based tiled tifs?
        if cull_boundary_plots:
            coord_df = coord_df.loc[(coord_df.coord_String != 'hor') & (coord_df.coord_String != 'ver')]
        # count how many plots are in each mosaic tile
        coord_count = coord_df.groupby('coord_String')['plotID'].apply(list)
        return(coord_count)

In [83]:
coord_count = define_sites_of_interest(sitecodes)

In [10]:
def download_cyverse_iput(files_dict, iput_path, username):
    '''
    Downloads and saves data to iROD server using cyverse
    using icomands.  A connection must be established using
    iinit before this can be used. For more info see:
    https://cyverse-2020-neon-aop-workshop.readthedocs-hosted.com/en/latest/step4.html

    --------
    Parameters
    --------
    files_dict - a dictionary with file names as keys and api urls as values
    iput_path  - path on the server where files are to be stored
    username   - cyverse userneame    
    '''
    for fname, url in files_dict.items():
        # make sure target directory exists on server
        
        
        
        # download 
        response = requests.get(url)
        with open(f'data/{fname}', 'wb') as f:
            f.write(response.content)
        # copy to server
        cmd = f'iput -KPf {fname} /iplant/home/{username}/data/{fname}'
        answer = subprocess.call(cmd, shell=True)
        # verify transfer
        if 'ERROR' in answer:
            print(answer)
        
        # remove local file

In [11]:
def download_local(files_dict, savedir, username=None):
    '''
    saves files into savedir.
    username only exists to make the signature match
    that of download_cyverse_iput
    '''
    savedir.rstrip('/')
    for fname, url in files_dict.items():
        # make sure target directory exists 
        os.makedirs(savedir, exist_ok=True)       
        # download 
        response = requests.get(url)
        with open(f'{savedir}/{fname}', 'wb') as f:
            f.write(response.content)

In [114]:
def get_AOP_from_API(coord_count, sitecodes, productcodes, daterange = 'most recent', download_func=download_local, username=None, savedir='data'):
    '''
    Downloads files from the NEON AOP API.
    
    --------
    Parameters
    --------
    coord_count   - list of local UTM coordinates, as strings,  seperated by '_', 
                    like the output of define_sites_of_interest()
    
    sitecodes     - list of NEON sitecodes, e.g. ['BART', 'TEAK']
    
    productcodes  - list of NEON AOP product codes, e.g.
                    ['DP3.30006.001', 'DP3.30006.001'].
                    If codes are not for AOP products errors will result.
                    There is no exception handling built in for this case.
    
    daterange     - list of yyyy-mm dates for each desired month, e.g.
                    ['2019-08', '2019-09', '2019-10'], or 'most recent'
                    for most recent available month.
                   
    download_func - function specifying where the data should be saved.
                    Some functions are provided in this library (download_local 
                    and download_cyverse_iput)User defined functions must fit 
                    the signature func(files_dict, savedir, username), where:
                        - files_dict is a dictionary of the form 
                          {'filename' : 'download_url'}
                        - savedir specifies the directory where files will be saved 
                          (specified by the keyword argument 'savedir' (see below).)
                        - username if needed is is a username to access remote storage,
                          if not needed  is None. (this argument is need for 
                          download_cyverse_iput)
                    If not specified, defaults to download_local, see docstring of
                    download_local and download_cyverse_iput for more information.
    
    username      - Username for remote storage if needed. Defaults to None.

    savedir       - Path to directory where downloads will be saved.  
    '''    
    server = 'https://data.neonscience.org/api/v0/'
    for site in sitecodes:
        for product in productcodes:
            url = f'{server}sites/{site}'
            response = requests.get(url)
            data = response.json()['data']
            dates = data['dataProducts'][0]['availableMonths']
            if daterange == 'most recent':
                # get the most recent date
                dates = [max(dates)]
            else:
                try:
                    # get dates in the range
                    assert isinstance(daterange,list)
                    begin, terminate = min(daterange), max(daterange)
                    dates = [d  for d in dates if (d >= begin) and (d <= terminate)]                 
                except AssertionError:
                    print('daterange must be a list, e.g. [\'2020-10\', \'2019-10\']')
                    return(None)
            # determine the existing products for the dates 
            for date in dates:
                url = f'{server}data/{product}/{site}/{date}'
                response = requests.get(url)
                data = response.json()
                fnames = data['data']['files']
                files_dict = dict()
                plots_list = []
                for f in fnames:
                    for coord, plotIDs in coord_count.items():
                        if coord in f['name']:
                            files_dict[f['name']] = f['url']
                            plots_list.append(plotIDs) 
            # download the files
            try:
                download_func(files_dict, savedir, username)
            except Exception as e:
                print(f'This happened:\n\n{e}')
        print(f'Done downloading files to {savedir}') 
    files_df = pd.DataFrame.from_dict(files_dict, orient='index', columns=['url'])
    files_df['plotIDs'] = plots_list
    return(files_df, sitecodes)
                
   

In [116]:
files_df, sitecodes = get_AOP_from_API(coord_count, ['BART'],productcodes=['DP3.30006.001'] ,daterange=['2019-08', '2019-08'])

Done downloading files to data


In [117]:
files_df.head(3)

Unnamed: 0,url,plotIDs
NEON_D01_BART_DP3_316000_4881000_reflectance.h5,https://neon-aop-products.s3.data.neonscience....,"[BART_039, BART_040, BART_050, BART_041, BART_..."
NEON_D01_BART_DP3_314000_4881000_reflectance.h5,https://neon-aop-products.s3.data.neonscience....,"[BART_026, BART_006, BART_025]"
NEON_D01_BART_DP3_316000_4879000_reflectance.h5,https://neon-aop-products.s3.data.neonscience....,[BART_004]


In [135]:
def make_tile_csv(files_df, sitecodes, savedir='data'):
    part = '_'.join(sitecodes)
    filename = f'tile_list_{part}.csv'
    files_df.to_csv(filename)
    return(filename)

In [136]:
fname = make_tile_csv(files_df, sitecodes)

In [138]:
pd.read_csv(fname).head()

Unnamed: 0.1,Unnamed: 0,url,plotIDs
0,NEON_D01_BART_DP3_316000_4881000_reflectance.h5,https://neon-aop-products.s3.data.neonscience....,"['BART_039', 'BART_040', 'BART_050', 'BART_041..."
1,NEON_D01_BART_DP3_314000_4881000_reflectance.h5,https://neon-aop-products.s3.data.neonscience....,"['BART_026', 'BART_006', 'BART_025']"
2,NEON_D01_BART_DP3_316000_4879000_reflectance.h5,https://neon-aop-products.s3.data.neonscience....,['BART_004']
3,NEON_D01_BART_DP3_316000_4880000_reflectance.h5,https://neon-aop-products.s3.data.neonscience....,"['BART_016', 'BART_029']"
4,NEON_D01_BART_DP3_317000_4880000_reflectance.h5,https://neon-aop-products.s3.data.neonscience....,"['BART_018', 'BART_005']"


In [74]:
files_dict, sitecodes = get_AOP_from_API(coord_count, ['BART'],productcodes=['DP3.30015.001'] ,daterange=['2019-08', '2019-08'])

Done downloading files to data


In [92]:
for f in coord_count.index:
    print(f)

314000_4879000
314000_4880000
314000_4881000
315000_4879000
315000_4880000
316000_4879000
316000_4880000
316000_4881000
316000_4882000
317000_4878000
317000_4879000
317000_4880000
317000_4881000
318000_4879000
318000_4880000
318000_4881000


In [101]:
f_df = pd.DataFrame.from_dict(files_dict, orient='index', columns=['url'])

In [102]:
for coord in coord_count.index:

Unnamed: 0,url
NEON_D01_BART_DP1_317000_4880000_classified_point_cloud.shp,https://neon-aop-products.s3.data.neonscience....
NEON_D01_BART_DP1_317000_4881000_classified_point_cloud.shx,https://neon-aop-products.s3.data.neonscience....
NEON_D01_BART_DP1_315000_4880000_classified_point_cloud.shx,https://neon-aop-products.s3.data.neonscience....
NEON_D01_BART_DP1_318000_4879000_classified_point_cloud.prj,https://neon-aop-products.s3.data.neonscience....
NEON_D01_BART_DP1_318000_4880000_classified_point_cloud.kml,https://neon-aop-products.s3.data.neonscience....
...,...
NEON_D01_BART_DP1_317000_4879000_classified_point_cloud.kml,https://neon-aop-products.s3.data.neonscience....
NEON_D01_BART_DP1_317000_4878000_classified_point_cloud.kml,https://neon-aop-products.s3.data.neonscience....
NEON_D01_BART_DP1_318000_4879000_classified_point_cloud.shx,https://neon-aop-products.s3.data.neonscience....
NEON_D01_BART_DP1_317000_4879000_classified_point_cloud.prj,https://neon-aop-products.s3.data.neonscience....


In [103]:
df = pd.DataFrame({'age': [20, 32], 'state': ['NY', 'CA'], 'point': [64, 92]},
                  index=['Alice', 'Bob'])

In [112]:
for index, row in coord_count.items():
    print(index)
    print(row)
   

   

314000_4879000
['BART_012', 'BART_080', 'BART_079']
314000_4880000
['BART_066']
314000_4881000
['BART_026', 'BART_006', 'BART_025']
315000_4879000
['BART_001', 'BART_007']
315000_4880000
['BART_030', 'BART_031', 'BART_028', 'BART_062']
316000_4879000
['BART_004']
316000_4880000
['BART_016', 'BART_029']
316000_4881000
['BART_039', 'BART_040', 'BART_050', 'BART_041', 'BART_042', 'BART_034', 'BART_046', 'BART_047', 'BART_072', 'BART_073', 'BART_036', 'BART_032', 'BART_071', 'BART_051']
316000_4882000
['BART_019']
317000_4878000
['BART_002']
317000_4879000
['BART_013', 'BART_015']
317000_4880000
['BART_018', 'BART_005']
317000_4881000
['BART_074', 'BART_070', 'BART_033', 'BART_044', 'BART_037']
318000_4879000
['BART_023', 'BART_010']
318000_4880000
['BART_027', 'BART_024', 'BART_003']
318000_4881000
['BART_081']
