# This examples shows how to interact with the ONC API and additional features from `strawb`
The methods 

In [1]:
%load_ext autoreload
%autoreload 2
import pandas
import plotly.express as px
import numpy as np

import strawb

In [2]:
onc_downloader = strawb.ONCDownloader(showInfo=False, timeout=60)

# select a device: e.g. 'ONCMJB016' the STRAWb MiniJB; or a module: 'TUMLIDAR001', 'TUMMUONTRACKER001',..
filters = {'deviceCode': 'ONCMJB016',
           'dateFrom': '2021-10-11T00:00:00.000Z',
           'dateTo': '2021-10-12T00:00:00.000Z',
           # 'extension': 'hdf5'
          }

# More `filters` parameter are: 
# 'dataProductCode': 'SMRD'  # use onc_downloader.getDataProducts(...) (see below) to check for possible entries 
# 'extension': 'hdf5'  # use onc_downloader.getDataProducts(...) (see below) to check for possible entries

# 1. Get available files from the ONC server

## 1.1. Basic method from the ONC API (not recommended)
the returned dict has a list with filenames under 'files'

In [3]:
onc_downloader.getListByDevice(filters=filters, allPages=True)

{'next': None,
 'queryUrl': 'https://data.oceannetworks.ca/api/archivefiles?deviceCode=ONCMJB016&dateFrom=2021-10-11T00%3A00%3A00.000Z&dateTo=2021-10-12T00%3A00%3A00.000Z&token=0db751f8-9430-47af-bc11-ed6691b38e22&method=getListByDevice',
 'files': ['ONCMJB016_20211011T000000.000Z.txt']}

## 1.2. STRAWb method (recommended): 
### 1.2.1 Get the files as a Pandas DataFrame + a structured 'outPath'
The columns are information from the ONC DB. The column 'synced' shows if the file is available locally.

In [4]:
df_result = onc_downloader.get_files_structured(dev_codes=[filters['deviceCode']],
                                                date_from=filters['dateFrom'],
                                                date_to=filters['dateTo'],
#                                                 extensions=['txt', 'hdf5', 'hld']
                                               )

df_result  # show the pandas DataFrame

100%|██████████| 1/1 [00:00<00:00,  1.21devices/s, i=ONCMJB016]


Unnamed: 0_level_0,archiveLocation,archivedDate,compression,dataProductCode,dateFrom,dateTo,deviceCode,fileSize,filename,modifyDate,path,uncompressedFileSize,outPath,fullPath,synced
fullPath,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
/Users/kilian/vscode/strawb_package/raw_module_data/oncmjb016/2021_10/ONCMJB016_20211011T000000.000Z.txt,/NA_Archive_04,2021-10-12 03:55:53.941000+00:00,gz,LF,2021-10-11 00:00:00+00:00,2021-10-12 00:00:00+00:00,ONCMJB016,8585653,ONCMJB016_20211011T000000.000Z.txt,2021-10-12 03:55:57.241000+00:00,14/10/96,71139520,/Users/kilian/vscode/strawb_package/raw_module...,/Users/kilian/vscode/strawb_package/raw_module...,False
/Users/kilian/vscode/strawb_package/raw_module_data/oncmjb016/2021_10/ONCMJB016_20211012T000000.000Z.txt,/NA_Archive_04,2021-10-13 05:27:13.126000+00:00,gz,LF,2021-10-12 00:00:00+00:00,2021-10-13 00:00:00+00:00,ONCMJB016,8591088,ONCMJB016_20211012T000000.000Z.txt,2021-10-13 05:27:27.157000+00:00,14/17/51,71065440,/Users/kilian/vscode/strawb_package/raw_module...,/Users/kilian/vscode/strawb_package/raw_module...,False


### 1.2.2 Mask the resulting DataFrame this has more features as provided by the filters

In [5]:
mask = df_result['fileSize']<10e3
df_result[mask]  # show the filtered files

Unnamed: 0_level_0,archiveLocation,archivedDate,compression,dataProductCode,dateFrom,dateTo,deviceCode,fileSize,filename,modifyDate,path,uncompressedFileSize,outPath,fullPath,synced
fullPath,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1


# 2. Download files 

## 2.1 Basic method from the ONC API (not recommended -> #)

In [6]:
# onc_downloader.getDirectFiles(filters_or_result=filters)

## 2.2 STRAWb method (recommended)
use the mask and DataFrame from 1.2.1 and 1.2.2

In [7]:
onc_downloader.getDirectFiles(filters_or_result=df_result[mask])

Downloaded - Directory: /Users/kilian/vscode/strawb_package/raw_module_data; Files: 0; Size: 0 Bytes; Time: 0.000 seconds; Speed: 0 Bytes/s


## 2.3 STRAWb method (recommended, too)

In [8]:
# as standalone
onc_downloader.download_structured(dev_codes=[filters['deviceCode']],
                                   date_from=filters['dateFrom'],
                                   date_to=filters['dateTo'],
                                   max_file_size=10e3,
#                                  extensions=['txt', 'hdf5', 'hld']
                                  )

100%|██████████| 1/1 [00:00<00:00,  1.22devices/s, i=ONCMJB016]
In total: 0 files; exclude: 0; size to download: 0.00  bytes
Downloaded - Directory: /Users/kilian/vscode/strawb_package/raw_module_data; Files: 0; Size: 0 Bytes; Time: 0.000 seconds; Speed: 0 Bytes/s


Unnamed: 0_level_0,archiveLocation,archivedDate,compression,dataProductCode,dateFrom,dateTo,deviceCode,fileSize,filename,modifyDate,path,uncompressedFileSize,outPath,fullPath,synced
fullPath,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1


In [9]:
# based on the DataFrame
onc_downloader.download_structured(pd_result=df_result[mask])

In total: 0 files; exclude: 0; size to download: 0.00  bytes
Downloaded - Directory: /Users/kilian/vscode/strawb_package/raw_module_data; Files: 0; Size: 0 Bytes; Time: 0.000 seconds; Speed: 0 Bytes/s


Unnamed: 0_level_0,archiveLocation,archivedDate,compression,dataProductCode,dateFrom,dateTo,deviceCode,fileSize,filename,modifyDate,path,uncompressedFileSize,outPath,fullPath,synced
fullPath,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1


### Access the info about the download
works with all options from 2.

In [10]:
onc_downloader.result

{'downloadResults': [],
 'stats': {'totalSize': 0, 'downloadTime': 0, 'fileCount': 0}}

# 3. Explore the ONC DB
## 3.1 Get Data Products
possible dataProductCodes and dataProductName for a device

In [11]:
# get possible dataProductCodes and dataProductName for the device
onc_downloader.getDataProducts(filters={'deviceCode': filters['deviceCode'],
                                        # 'extension': 'hdf5',  # more filter options possible
                                        # 'dataProductCode': 'LF',
                                        }
                              )

[{'dataProductCode': 'LF',
  'dataProductName': 'Log File',
  'extension': 'txt',
  'hasDeviceData': True,
  'hasPropertyData': False,
  'helpDocument': 'https://wiki.oceannetworks.ca/display/DP/4'},
 {'dataProductCode': 'TSSD',
  'dataProductName': 'Time Series Scalar Data',
  'extension': 'json',
  'hasDeviceData': True,
  'hasPropertyData': False,
  'helpDocument': 'https://wiki.oceannetworks.ca/display/DP/1'},
 {'dataProductCode': 'TSSD',
  'dataProductName': 'Time Series Scalar Data',
  'extension': 'csv',
  'hasDeviceData': True,
  'hasPropertyData': False,
  'helpDocument': 'https://wiki.oceannetworks.ca/display/DP/1'},
 {'dataProductCode': 'TSSD',
  'dataProductName': 'Time Series Scalar Data',
  'extension': 'mat',
  'hasDeviceData': True,
  'hasPropertyData': False,
  'helpDocument': 'https://wiki.oceannetworks.ca/display/DP/1'},
 {'dataProductCode': 'TSSD',
  'dataProductName': 'Time Series Scalar Data',
  'extension': 'txt',
  'hasDeviceData': True,
  'hasPropertyData': Fal

## 3.2 Get Locations

In [12]:
result = onc_downloader.getLocations()

result

[{'deployments': 5,
  'locationName': 'Albert Head',
  'depth': -29.6,
  'bbox': {'maxDepth': -29.0,
   'maxLat': 48.38981,
   'maxLon': -123.48739,
   'minDepth': -30.0,
   'minLat': 48.38981,
   'minLon': -123.48785},
  'description': ' Albert Head is a community located in Metchosin, on the south coast of Vancouver Island.',
  'hasDeviceData': True,
  'lon': -123.487666,
  'locationCode': 'AL2H',
  'hasPropertyData': False,
  'lat': 48.38981,
  'dataSearchURL': 'http://data.oceannetworks.ca/DataSearch?location=AL2H'},
 {'deployments': 12,
  'locationName': 'Argus',
  'depth': -0.066956,
  'bbox': {'maxDepth': -0.066956,
   'maxLat': 48.584709,
   'maxLon': -123.517527,
   'minDepth': -0.066956,
   'minLat': 48.584709,
   'minLon': -123.517527},
  'description': 'This Remotely Operated Vehicle is operated by Ocean Exploration Trust. It is a support vehicle to Hercules. ',
  'hasDeviceData': True,
  'lon': -123.517527,
  'locationCode': 'ARG',
  'hasPropertyData': False,
  'lat': 48.5

In [13]:
# convert the result to a DataFrame
df = pandas.DataFrame(data=result)

# filter out locations which are located at 'Cascadia' Basin
mask = df.description.str.contains('Cascadia')
mask &= ~df.description.isnull()

# show the dataframe
df[mask]

Unnamed: 0,deployments,locationName,depth,bbox,description,hasDeviceData,lon,locationCode,hasPropertyData,lat,dataSearchURL
59,3,ODP 1027C,2656.666667,"{'maxDepth': 2658.0, 'maxLat': 47.756512, 'max...",ODP 1027C is a borehole site used to pinpoint...,True,-127.731334,CBC27,False,47.756471,http://data.oceannetworks.ca/DataSearch?locati...
783,26,ODP 1026,2657.526923,"{'maxDepth': 2663.0, 'maxLat': 47.762707, 'max...",ODP 1026 is a borehole site used to pinpoint ...,True,-127.758613,NC27,False,47.762591,http://data.oceannetworks.ca/DataSearch?locati...


In [14]:
# Style dataframe for plotting

# add location column to group data
df['location'] = 'Others'
df.loc[mask, 'location'] = 'Cascadia Basin'

# mask invalid parameters that `fig.update_geos(fitbounds="locations")` works
mask_invalid = df.lon.abs() > 180.
mask_invalid | df.lat.abs() > 90.

# detect positions where either 'lon', 'lat' or both is Nan
# must be `mask_invalid = ` here <-> pandas...
mask_invalid = mask_invalid | df[['lon', 'lat']].isnull().any(axis=1)

In [15]:
# Plot locations
fig = px.scatter_geo(df[~mask_invalid],
                    lat='lat',
                    lon='lon',
                    color='location',
                    hover_data=["locationName", 'locationCode'])

fig.update_geos(fitbounds="locations")

# to tune the plot
# fig.update_layout(
#     geo = dict(
#         #scope = 'north america',
#         showland = True,
#         #showlakes = True,
#         #showsubunits = True,
#         showcountries = True,
#         #showrivers=True, #rivercolor="Blue",
#         resolution = 110,  # either 110 or 50
#         #projection = dict(
#         #    type = "natural earth",
#         #    rotation_lon = -100
#         #),
#         lonaxis = dict(
#             showgrid = True,
#             gridwidth = .5,
#             #range= [ -140.0, -55.0 ],
#             dtick = 5
#         ),
#         lataxis = dict (
#             showgrid = True,
#             gridwidth = .5,
#             #range= [ 30.0, 90.0 ],
#             dtick = 5
#         )
#     ),
# )

fig.show()