In [4]:
#installs
%pip install onc


Note: you may need to restart the kernel to use updated packages.


In [297]:
#imports
import onc
from onc.onc import ONC
import pandas as pd
import geopandas as gpd
from shapely.geometry import box
from shapely.geometry import Polygon
from copy import copy
from datetime import datetime
from datetime import timedelta
import numpy as np
#token
onc = ONC("57ffdc86-22ab-433f-b613-7d755fa7635c")


In [303]:
#function to read all the user inputs (location and date)

def onc_data(*args,**kwargs):
    
    filter_dict_short = {"deviceCategoryCode": "HYDROPHONE",
    "dataProductCode": "AD",
    "returnOptions": "all",}
    filter_dict_long = {"deviceCategoryCode": "HYDROPHONE",}

    temp_short = copy(filter_dict_short)
    temp_long = copy(filter_dict_long)
    #print(kwargs)
    MinTime = kwargs.get("min_time", None)

    if not MinTime:
        MinTime = datetime(2000, 1, 1)
    temp_short["dateFrom"] = MinTime.strftime("%Y-%m-%d")
    temp_long["dateFrom"] = MinTime.strftime("%Y-%m-%d")
        
    MaxTime = kwargs.get("max_time", None)
    if not MaxTime:
        MaxTime = datetime.now() + timedelta(days = 2)
    temp_short["dateTo"] = MaxTime.strftime("%Y-%m-%d")
    temp_long["dateTo"] = MaxTime.strftime("%Y-%m-%d")
    
    
    # check
    if MaxTime - MinTime > timedelta(days = 60):
        # response will take too long, do other query and alert user
        # call function that checks if data exists and returns generic urls
        #print(temp_long)
        df = onc_long_date_range(temp_long,min_long, max_long, min_lat, max_lat)
        
    else:
        #call function that returns specific urls
        #print(temp_short)
        df = onc_short_date_range(temp_short, temp_long, min_long, max_long, min_lat, max_lat)
        
    return(df)

In [92]:
#function to get bounding boxes of locations with hydrophone data
def hydrophone_locations(temp_long):
    locations = onc.getLocations(temp_long)
    df = gpd.GeoDataFrame(locations)
    df_separate = gpd.GeoDataFrame([
    {
        **location, 
        'geometry': box(
            location['bbox']['minLon'], 
            location['bbox']['minLat'], 
            location['bbox']['maxLon'], 
            location['bbox']['maxLat']
        )
    } for location in locations
])
    return(df_separate)


In [93]:

#function to input locations (from user) and get output of subset of 
#locations (that intersects with user input bounding box) with hydrophone data
def subset_locations(temp_long, min_long, max_long, min_lat, max_lat):
    gdf = hydrophone_locations(temp_long)
    subset = gdf.cx[min_long:max_long, min_lat:max_lat]
    locations_codes = subset.locationCode
    return(subset, locations_codes)

In [300]:

def onc_long_date_range(temp_long, min_long, max_long, min_lat, max_lat):
    print("Date range is too long for specific data url query from Ocean Networks Canada. However, the requested data exists at https://data.oceannetworks.ca/DataSearch")
    subset, locations_codes = subset_locations(temp_long, min_long, max_long, min_lat, max_lat)
    df = pd.DataFrame(subset)
    df_bbox = pd.DataFrame.from_records(subset['bbox'],index = df.index)
    newdf = pd.DataFrame()
    newdf['filename']=""
    newdf['min_time']=np.nan
    newdf['max_time']=np.nan
    newdf['min_lat']=df_bbox['minLat']
    newdf['max_lat']=df_bbox['maxLat']
    newdf['min_long']=df_bbox['minLon']
    newdf['max_long']=df_bbox['maxLon']
    newdf['min_freq']=np.nan
    newdf['max_freq']=np.nan
    newdf['min_depth']=df_bbox['minDepth']
    newdf['max_depth']=df_bbox['maxDepth']
    newdf['data_url'] = df['dataSearchURL']
    
    return(newdf)

In [301]:
def onc_short_date_range(temp_short, temp_long, min_long, max_long, min_lat, max_lat):
    print("Extracting data urls from Ocean Networks Canada")
    subset, location_code_list = subset_locations(temp_long, min_long, max_long, min_lat, max_lat)
    query_list = []
    url_list = []
    for location_code in location_code_list:
        temp2 = copy(temp_short)
        temp2['locationCode'] = location_code
        query_list.append(temp2)
    for query in query_list:
        results = onc.archive.getListByLocation(query,allPages=True)
        if not results['files']:
            pass
        else:
            urls = [
        {
            **file,
            "fullPath": f"{onc.archive._serviceUrl('archivefiles')}?token={onc.archive._config('token')}&method=getFile&filename={file['filename']}",
        }
        for file in results['files']
    ]
            url_list.append(urls)

    
    for i in range(len(url_list)):
        df = pd.DataFrame(url_list[i])
        newdf = pd.DataFrame()

        newdf['filename'] = df['filename']
        newdf['min_time'] = df['dateFrom']
        newdf['max_time'] = df['dateTo']
        newdf['min_lat']=np.nan
        newdf['max_lat']=np.nan
        newdf['min_long']=np.nan
        newdf['max_long']=np.nan
        newdf['min_freq']=np.nan
        newdf['max_freq']=np.nan
        newdf['min_depth']=np.nan
        newdf['max_depth']=np.nan
        newdf['data_url'] = df['fullPath']
        if i != 0:
            newdf = pd.concat([olddf, newdf], axis=0, ignore_index=True)
        olddf = newdf
    
    return(newdf)

In [304]:

#standardized_column_output = ["filename", "min_time", "max_time", "min_lat", 
#"max_lat", "min_long", "max_long", "min_freq", "max_freq", "min_depth", "max_depth", "data_url"]
#test
input_dict = {"min_long": -150,
              "max_long": -100,
              "min_lat": 30,
              "max_lat": 60,
              "min_time": datetime(2020, 1, 1),
              "max_time": datetime(2020, 4, 2)
             }
df = onc_data(filter_dict_long, filter_dict_short, **input_dict)
pd.options.display.max_colwidth = 200
print(df)

Date range is too long for specific data url query from Ocean Networks Canada. However, the requested data exists at https://data.oceannetworks.ca/DataSearch
   filename  min_time  max_time    min_lat    max_lat    min_long    max_long  \
0       NaN       NaN       NaN  48.345773  48.345773 -126.157685 -126.157685   
2       NaN       NaN       NaN  48.699407  48.699407 -126.872425 -126.872425   
3       NaN       NaN       NaN  48.699407  48.699407 -126.872425 -126.872425   
4       NaN       NaN       NaN  50.020767  50.020767 -125.235350 -125.235350   
5       NaN       NaN       NaN  54.258806  54.258806 -130.430694 -130.430694   
6       NaN       NaN       NaN  49.043300  49.043300 -123.316108 -123.316108   
7       NaN       NaN       NaN  49.043300  49.043300 -123.316108 -123.316108   
8       NaN       NaN       NaN  49.043300  49.043300 -123.316108 -123.316108   
9       NaN       NaN       NaN  49.043300  49.043300 -123.316108 -123.316108   
10      NaN       NaN       NaN 