In [195]:
import rasterio as rio 
import gdal
import math
import requests
import zipfile
import os
import pandas as pd 
import geopandas as gpd
import numpy as np
import subprocess

In [2]:
def round1000(x):
    ''' 
    round1000(x):
    Rounds value to nearest 1000
    '''
    return(1000 * math.floor(x / 1000))

In [174]:
@np.vectorize
def is_on_boundary(x, y):
    '''
    Returns: 
    0 if false,
    'hor' if on horizaontal boundary,
    'ver' if on verticle boudry
    '''
    answer = 0
    r = 20
    extent  = [x - r , x + r , y - r , y + r ]
    names   = ['xMin', 'xMax', 'yMin', 'yMax']
    extent = dict(zip(names, extent)) 

    # get tile coords
    tile_east =  round1000(x)
    tile_north =  round1000(y)

    answer = (f'{tile_east}_{tile_north}')
    
    # If the plot is on a horizontal tile boundary,
    # that boundary falls between yMax and yMin
    bound_below = round1000(extent['yMax'])
    if  bound_below >= extent['yMin']:
        answer = 'hor'

    # If the plot is on a vertical tile boundary,
    # that boundary falls between xMax and xMin
    bound_left = round1000(extent['xMax'])
    if  bound_left >= extent['xMin']:
        answer = 'ver'
        
    return(answer)


In [72]:
def get_plot_coords(plot_spdf):
    '''
    Takes base column array of plot entries, calculates coordinates
    of tiles and returns them as a column vector of strings
    '''
    x = plot_spdf['easting']
    y = plot_spdf['northing']
    boundary = is_on_boundary(x, y)
    return(boundary)

In [20]:
def download_plots_shp(data_path='/home/jovyan/data/all_plots/'):
    '''
    Downloads the NEON TOS plots data
    
    --------
    Parameters
    --------
    data_path path to which data will be downloaded
    '''
    # make data directory exists 
    os.makedirs(data_path, exist_ok=True)

    handle = requests.get(url='https://data.neonscience.org/api/v0/documents/All_NEON_TOS_Plots_V8')
    
    with open(data_path + 'All_NEON_TOS_Plots_V8.zip', 'wb') as f:
        f.write(handle.content)
    
    with zipfile.ZipFile(data_path + 'All_NEON_TOS_Plots_V8.zip', 'r') as zip_ref:
        zip_ref.extractall(data_path)

    NEON_all_plots = gpd.read_file(f'{data_path}All_NEON_TOS_Plots_V8/All_NEON_TOS_Plot_Polygons_V8.shp')
    
    return(NEON_all_plots)

In [22]:
NEON_all_plots = download_plots_shp()

In [26]:
download_plots_shp()
sitecodes = ['BART', 'TEAK', 'HARV']

In [184]:
def define_sites_of_interest(sitecodes, cull_boundary_plots=True):
    for sitecode in sitecodes:
        # find all base plots for the sitecode
        base_plots_SPDF = NEON_all_plots.loc[(NEON_all_plots.siteID == sitecode) & (NEON_all_plots.subtype == 'basePlot')]
        # make a dataframe of plot coordinates
        coord_df =  pd.DataFrame()
        coord_df['plotID'] = base_plots_SPDF.plotID
        coord_df['coord_String'] = get_plot_coords(base_plots_SPDF)
        # Remove plots that cross a mosaic tile boundary.
        # Maybe not necessary if we are using EPTs
        # and cloud based tiled tifs?
        if cull_boundary_plots:
            coord_df = coord_df.loc[(coord_df.coord_String != 'hor') & (coord_df.coord_String != 'ver')]
        # count how many plots are in each mosaic tile
        coord_count = coord_df.groupby('coord_String').count()
        coord_count = list(coord_count.index)
        return(coord_count)

In [185]:
coord_count = define_sites_of_interest(sitecodes)

In [205]:
def download_cyverse_iput(files_dict, iput_path, username):
    '''
    Downloads and saves data to iROD server using cyverse
    using icomands.  A connection must be established using
    iinit before this can be used. For more info see:
    https://cyverse-2020-neon-aop-workshop.readthedocs-hosted.com/en/latest/step4.html

    --------
    Parameters
    --------
    files_dict - a dictionary with file names as keys and api urls as values
    iput_path  - path on the server where files are to be stored
    username   - cyverse userneame    
    '''
    for fname, url in files_dict.items():
        # make sure target directory exists on server
        
        
        
        # download 
        response = requests.get(url)
        with open(f'data/{fname}', 'wb') as f:
            f.write(response.content)
        # copy to server
        cmd = f'iput -KPf {fname} /iplant/home/{username}/data/{fname}'
        answer = subprocess.call(cmd, shell=True)
        # verify transfer
        if 'ERROR' in answer:
            print(answer)
        
        # remove local file

In [208]:
def download_local(files_dict, savedir, username=None):
    '''
    saves files into savedir.
    username only exists to make the signature match
    that of download_cyverse_iput
    '''
    savedir.rstrip('/')
    for fname, url in files_dict.items():
        # make sure target directory exists on server
        os.makedirs(savedir, exist_ok=True)       
        # download 
        response = requests.get(url)
        with open(f'{savedir}/{fname}', 'wb') as f:
            f.write(response.content)

In [213]:
def get_from_API(coord_count, sitecodes, productcodes=['DP1.30003.001'], daterange = 'most recent', download_func=download_local, username=None, savedir='data'):
    server = 'https://data.neonscience.org/api/v0/'
    for site in sitecodes:
        for product in productcodes:
            url = f'{server}sites/{site}'
            response = requests.get(url)
            data = response.json()['data']
            dates = data['dataProducts'][0]['availableMonths']
            if daterange == 'most recent':
                # get the most recent date
                dates = [max(dates)]
            else:
                try:
                    # get dates in the range
                    assert isinstance(daterange,list)
                    begin, terminate = min(daterange), max(daterange)
                    dates = [d  for d in dates if (d >= begin) and (d <= terminate)]                 
                except AssertionError:
                    print('daterange must be a list, e.g. [\'2020-10\', \'2019-10\']')
                    return(None)
            # determine the existing products for the dates 
            for date in dates:
                url = f'{server}data/{product}/{site}/{date}'
                response = requests.get(url)
                data = response.json()
                fnames = data['data']['files']
                file_name = dict()
                for f in fnames:
                    for coord in coord_count:
                        if coord in f['name']:
                            file_name[f['name']] = f['url']
            # download the files
            try:
                download_func(files_dict, savedir, username)
            except Exception as e:
                print(f'This happened:\n\n{e}')
        print(f'Done downloading files to {savedir}')     
                
                
   

In [214]:
get_from_API(coord_count, ['BART'], daterange=['2019-08', '2019-08'])

Done downloading files to data


In [210]:
ls

aop_sites_split_geojsons.ipynb  downloads.stdout.log  [0m[01;34mneon_sites[0m/
[01;34mdata[0m/                           [01;34mgeemap[0m/               [01;34mNEON_workshop[0m/
downloads.stderr.log            max_base_plots.ipynb


In [186]:
coord_count

['314000_4879000',
 '314000_4880000',
 '314000_4881000',
 '315000_4879000',
 '315000_4880000',
 '316000_4879000',
 '316000_4880000',
 '316000_4881000',
 '316000_4882000',
 '317000_4878000',
 '317000_4879000',
 '317000_4880000',
 '317000_4881000',
 '318000_4879000',
 '318000_4880000',
 '318000_4881000']