In [45]:
# from siphon.simplewebservice.ndbc import NDBC
import pandas as pd
from erddapy import ERDDAP
from joblib import Parallel, delayed
import multiprocessing
import xarray as xr

Now using ERDDAP directly instead. Need to make sure to get:

Sensors:
* covered by below but need to maybe add:
    * More NDBC
    * More CO-OPS
    * PORTS (not in erddap) (or do separately)
    * Work on ACCESS sensor ingestion first to see how that works.

Platforms:
* how to access this data? ADCPs, etc

Other:
* HF Radar DONE
* sea ice (grids)

In [42]:
# user input
kw = {
    "min_lon": -99.0,
    "max_lon": -88.0,
    "min_lat": 20.0,
    "max_lat": 30.0,
    "min_time": "2016-07-10T00:00:00",#Z",
    "max_time": "2017-02-10T00:00:00",#Z"
}


# HF Radar

In [8]:
e = ERDDAP(server='https://coastwatch.pfeg.noaa.gov/erddap/', protocol='griddap')

In [56]:
standard_names = ['surface_eastward_sea_water_velocity','surface_northward_sea_water_velocity']

# search for datasets in our region of interest. We know for HF Radar which instutition we want 
# to search for so we don't need to specify other terms.
inst = 'Coastal Observing Research and Development Center, Scripps Institution of Oceanography'
url = e.get_search_url(search_for=inst, response="csv", **kw, items_per_page=10000)
print(url)

# Read in the search info to find the dataset_ids for datasets we will be reading data from.
df = pd.read_csv(url)
dataset_ids = df['Dataset ID']
print(dataset_ids.head())



https://coastwatch.pfeg.noaa.gov/erddap/search/advanced.csv?page=1&itemsPerPage=10000&protocol=griddap&cdm_data_type=(ANY)&institution=(ANY)&ioos_category=(ANY)&keywords=(ANY)&long_name=(ANY)&standard_name=(ANY)&variableName=(ANY)&minLon=-99.0&maxLon=-88.0&minLat=20.0&maxLat=30.0&minTime=1468108800.0&maxTime=1486684800.0&searchFor=Coastal+Observing+Research+and+Development+Center%2C+Scripps+Institution+of+Oceanography
0    ucsdHfrE1
1    ucsdHfrE2
2    ucsdHfrE6
Name: Dataset ID, dtype: object


In [None]:
# decide which datasets to actually use; presumably the highest res?
# maybe should depend on the model output?

In [57]:
def request(dataset_id):
    
    # In case the variable is named differently from the standard names, 
    # we back out the variable names here for each dataset. This also only 
    # returns those names for which there is data in the dataset.
    varnames = e.get_var_by_attr(
        dataset_id=dataset_id,
        standard_name=lambda v: v in standard_names
    )
    
    # the search terms that can be input for tabledap do not work for griddap
    # in erddapy currently. Instead, put together an opendap link and then 
    # narrow the dataset with xarray.
    e.dataset_id = dataset_id
    # get opendap link
    url = e.get_download_url(response='opendap')
    ds = xr.open_dataset(url).sel(latitude=slice(kw['min_lat'],kw['max_lat']), 
                                  longitude=slice(kw['min_lon'],kw['max_lon']), 
                                  time=slice(kw['min_time'],kw['max_time']))

    return (dataset_id, ds)

In [54]:
%%time
    
num_cores = multiprocessing.cpu_count()
downloads = Parallel(n_jobs=num_cores)(
    delayed(request)(dataset_id) for dataset_id in dataset_ids
)

dss = {dataset_id: ds for (dataset_id, ds) in downloads}

# (or can concat together the dss)
len(dss)

CPU times: user 73.3 ms, sys: 101 ms, total: 175 ms
Wall time: 22.9 s


3

# Sensors

New approach using erddapy to do this

In [626]:
e = ERDDAP(server="http://erddap.sensors.axds.co/erddap")

In [627]:
# Vars is the base names to use for searching but not full standard_names
Vars = ['sea_water_temperature','sea_water_practical_salinity','sea_water_speed','sea_water_velocity_to_direction','sea_surface_height']

# Get all possible related standard_names
url = e.get_categorize_url(
    categorize_by="standard_name",
    response="csv"
)

cats = pd.read_csv(url)["Category"]

# get one big list of all the standard_names to search for
standard_names = []
for Var in Vars:
    standard_names += [name for name in cats if name.startswith(Var)]
standard_names

['sea_water_temperature',
 'sea_water_temperature_quality_flag',
 'sea_water_practical_salinity',
 'sea_water_practical_salinity_quality_flag',
 'sea_water_speed',
 'sea_water_speed_quality_flag',
 'sea_water_velocity_to_direction',
 'sea_water_velocity_to_direction_quality_flag',
 'sea_surface_height',
 'sea_surface_height_above_sea_level',
 'sea_surface_height_above_sea_level_quality_flag',
 'sea_surface_height_amplitude_due_to_geocentric_ocean_tide',
 'sea_surface_height_amplitude_due_to_geocentric_ocean_tide_quality_flag',
 'sea_surface_height_quality_flag']

In [628]:
# find all the dataset ids which we will use to get the data
dataset_ids = []
for standard_name in standard_names:
    kw['standard_name'] = standard_name
    
    search_url = e.get_search_url(response="csv", **kw, items_per_page=10000)
    try:
        search = pd.read_csv(search_url)
    except:
        print('standard_name %s not found' % standard_name)
        
    dataset_ids.extend(search["Dataset ID"])
    
# only need a dataset id once since we will check them each for all standard_names
dataset_ids = list(set(dataset_ids))
len(dataset_ids)

standard_name sea_water_temperature_quality_flag not found
standard_name sea_water_practical_salinity_quality_flag not found
standard_name sea_water_speed_quality_flag not found
standard_name sea_water_velocity_to_direction_quality_flag not found
standard_name sea_surface_height not found
standard_name sea_surface_height_above_sea_level_quality_flag not found
standard_name sea_surface_height_amplitude_due_to_geocentric_ocean_tide_quality_flag not found
standard_name sea_surface_height_quality_flag not found


286

In [560]:
def request(dataset_id):
    
    # In case the variable is named differently from the standard names, 
    # we back out the variable names here for each dataset. This also only 
    # returns those names for which there is data in the dataset.
    varnames = e.get_var_by_attr(
        dataset_id=dataset_id,
        standard_name=lambda v: v in standard_names
    )

#     e.constraints = None
    e.protocol = "tabledap"
    e.variables = ["time","longitude", "latitude", "station"] + varnames
    # set the same time restraints as before
    e.constraints = {'time<=': kw['max_time'], 'time>=': kw['min_time'],}
    e.dataset_id = dataset_id
    # Drop cols and rows that are only NaNs.
    try:
        df = e.to_pandas(response="csvp", index_col=0, parse_dates=True).dropna(axis='index', how='all').dropna(axis='columns', how='all')
    except:
        print('no data to be read in for %s' % dataset_id)
        df = None
    return (dataset_id, df)

In [563]:
%%time
    
num_cores = multiprocessing.cpu_count()
downloads = Parallel(n_jobs=num_cores)(
    delayed(request)(dataset_id) for dataset_id in dataset_ids
)

dfs = {dataset_id: df for (dataset_id, df) in downloads}
# dfs
# (or can concat together the dfs)
len(dfs)

CPU times: user 1.11 s, sys: 549 ms, total: 1.66 s
Wall time: 1min


286

In [535]:
%%time

# downloads = []
# for dataset_id in dataset_ids[:5]:
    
    
#     downloads.append(request(dataset_id))
    
num_cores = multiprocessing.cpu_count()
downloads = Parallel(n_jobs=num_cores)(
    delayed(request)(dataset_id) for dataset_id in dataset_ids
)

dfs = {dataset_id: df for (dataset_id, df) in downloads}
# dfs
# (or can concat together the dfs)



CPU times: user 4.26 s, sys: 8.68 s, total: 12.9 s
Wall time: 5min 31s


In [536]:
len(dfs)

286

In [547]:
downloads[1]

('wmo_42927',
                            longitude (degrees_east)  latitude (degrees_north)  \
 time (UTC)                                                                      
 2017-03-05 13:35:00+00:00                   -88.032                    28.418   
 2017-03-05 13:15:00+00:00                   -88.032                    28.418   
 2017-03-05 12:55:00+00:00                   -88.032                    28.418   
 2017-03-05 12:14:00+00:00                   -88.032                    28.418   
 2017-03-05 11:54:00+00:00                   -88.032                    28.418   
 ...                                             ...                       ...   
 2016-09-25 17:41:00+00:00                   -88.032                    28.418   
 2016-09-25 17:21:00+00:00                   -88.032                    28.418   
 2016-09-25 17:01:00+00:00                   -88.032                    28.418   
 2016-09-25 16:41:00+00:00                   -88.032                    28.418   
 2

In [546]:
dfs['wmo_42927']

Unnamed: 0_level_0,longitude (degrees_east),latitude (degrees_north),station,sea_water_temperature_ocean (degree_Celsius)
time (UTC),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-03-05 13:35:00+00:00,-88.032,28.418,42927 - Noble Bully I - Mississippi Canyon 567,22.35
2017-03-05 13:15:00+00:00,-88.032,28.418,42927 - Noble Bully I - Mississippi Canyon 567,22.31
2017-03-05 12:55:00+00:00,-88.032,28.418,42927 - Noble Bully I - Mississippi Canyon 567,22.38
2017-03-05 12:14:00+00:00,-88.032,28.418,42927 - Noble Bully I - Mississippi Canyon 567,22.31
2017-03-05 11:54:00+00:00,-88.032,28.418,42927 - Noble Bully I - Mississippi Canyon 567,22.35
...,...,...,...,...
2016-09-25 17:41:00+00:00,-88.032,28.418,42927 - Noble Bully I - Mississippi Canyon 567,29.78
2016-09-25 17:21:00+00:00,-88.032,28.418,42927 - Noble Bully I - Mississippi Canyon 567,29.78
2016-09-25 17:01:00+00:00,-88.032,28.418,42927 - Noble Bully I - Mississippi Canyon 567,29.71
2016-09-25 16:41:00+00:00,-88.032,28.418,42927 - Noble Bully I - Mississippi Canyon 567,29.74


In [465]:

kw = {
    "standard_name": "sea_water_practical_salinity",
    "min_lon": -99.0,
    "max_lon": -88.0,
    "min_lat": 20.0,
    "max_lat": 30.0,
    "min_time": "2016-07-10T00:00:00Z",
    "max_time": "2017-02-10T00:00:00Z"
}


search_url = e.get_search_url(response="csv", **kw, items_per_page=10000)
# search_url
search = pd.read_csv(search_url)
search
# print(search["Dataset ID"][0])

# dataset_id = search["Dataset ID"][0]
# info_url = e.get_info_url(dataset_id=dataset_id, response="csv")
# info = pd.read_csv(info_url)
# info[100:150]

Unnamed: 0,griddap,Subset,tabledap,Make A Graph,wms,files,Title,Summary,FGDC,ISO 19115,Info,Background Info,RSS,Institution,Dataset ID
0,,,http://erddap.sensors.axds.co/erddap/tabledap/...,http://erddap.sensors.axds.co/erddap/tabledap/...,,,Aransas Wildlife Refuge (TCOON),Timeseries data from 'Aransas Wildlife Refuge ...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/info/noaa...,https://sensors.ioos.us/#metadata/57560/station,http://erddap.sensors.axds.co/erddap/rss/noaa_...,NOAA Center for Operational Oceanographic Prod...,noaa_nos_co_ops_8774230
1,,,http://erddap.sensors.axds.co/erddap/tabledap/...,http://erddap.sensors.axds.co/erddap/tabledap/...,,,"Baffin Bay, TX","Timeseries data from 'Baffin Bay, TX' (urn:ioo...",http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/info/noaa...,https://sensors.ioos.us/#metadata/45616/station,http://erddap.sensors.axds.co/erddap/rss/noaa_...,NOAA Center for Operational Oceanographic Prod...,noaa_nos_co_ops_8776604
2,,,http://erddap.sensors.axds.co/erddap/tabledap/...,http://erddap.sensors.axds.co/erddap/tabledap/...,,,"Barataria Bay N of Grand Isle, LA",Timeseries data from 'Barataria Bay N of Grand...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/info/gov_...,https://sensors.ioos.us/#metadata/24642/station,http://erddap.sensors.axds.co/erddap/rss/gov_u...,USGS National Water Information System (NWIS),gov_usgs_waterdata_07380251
3,,,http://erddap.sensors.axds.co/erddap/tabledap/...,http://erddap.sensors.axds.co/erddap/tabledap/...,,,"Barataria Bay near Grand Terre Island, LA",Timeseries data from 'Barataria Bay near Grand...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/info/gov_...,https://sensors.ioos.us/#metadata/23003/station,http://erddap.sensors.axds.co/erddap/rss/gov_u...,USGS National Water Information System (NWIS),gov_usgs_waterdata_291929089562600
4,,,http://erddap.sensors.axds.co/erddap/tabledap/...,http://erddap.sensors.axds.co/erddap/tabledap/...,,,"Barataria Pass at Grand Isle, LA",Timeseries data from 'Barataria Pass at Grand ...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/info/gov_...,https://sensors.ioos.us/#metadata/61772/station,http://erddap.sensors.axds.co/erddap/rss/gov_u...,USGS National Water Information System (NWIS),gov_usgs_waterdata_073802516
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,,,http://erddap.sensors.axds.co/erddap/tabledap/...,http://erddap.sensors.axds.co/erddap/tabledap/...,,,"Texas Point, Sabine Pass","Timeseries data from 'Texas Point, Sabine Pass...",http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/info/noaa...,https://sensors.ioos.us/#metadata/57559/station,http://erddap.sensors.axds.co/erddap/rss/noaa_...,NOAA Center for Operational Oceanographic Prod...,noaa_nos_co_ops_8770822
62,,,http://erddap.sensors.axds.co/erddap/tabledap/...,http://erddap.sensors.axds.co/erddap/tabledap/...,,,"Trinity Rv at Wallisville, TX",Timeseries data from 'Trinity Rv at Wallisvill...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/info/gov_...,https://sensors.ioos.us/#metadata/32690/station,http://erddap.sensors.axds.co/erddap/rss/gov_u...,USGS National Water Information System (NWIS),gov_usgs_waterdata_08067252
63,,,http://erddap.sensors.axds.co/erddap/tabledap/...,http://erddap.sensors.axds.co/erddap/tabledap/...,,,"USS Lexington, TX","Timeseries data from 'USS Lexington, TX' (urn:...",http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/info/noaa...,https://sensors.ioos.us/#metadata/45612/station,http://erddap.sensors.axds.co/erddap/rss/noaa_...,NOAA Center for Operational Oceanographic Prod...,noaa_nos_co_ops_8775296
64,,,http://erddap.sensors.axds.co/erddap/tabledap/...,http://erddap.sensors.axds.co/erddap/tabledap/...,,,"Vermilion Bay near Cypremort Point, LA",Timeseries data from 'Vermilion Bay near Cypre...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/metadata/...,http://erddap.sensors.axds.co/erddap/info/gov_...,https://sensors.ioos.us/#metadata/32721/station,http://erddap.sensors.axds.co/erddap/rss/gov_u...,USGS National Water Information System (NWIS),gov_usgs_waterdata_07387040


In [427]:
info[info['Attribute Name'] == 'defaultDataQuery']['Value'].values

array(['sea_surface_height_above_sea_level_geoid_mllw,air_temperature,sea_water_electrical_conductivity,sea_surface_height_amplitude_due_to_geocentric_ocean_tide_geoid_mllw,wind_speed_of_gust,sea_water_temperature,z,wind_speed,time,wind_from_direction,air_pressure,sea_water_practical_salinity&time>=max(time)-3days'],
      dtype=object)

In [436]:
info[info['Row Type'] == 'variable']['Variable Name'].values

array(['time', 'latitude', 'longitude', 'z', 'air_pressure',
       'sea_water_electrical_conductivity',
       'sea_water_practical_salinity', 'air_temperature',
       'sea_water_temperature',
       'sea_surface_height_amplitude_due_to_geocentric_ocean_tide_geoid_mllw',
       'sea_surface_height_above_sea_level_geoid_mllw',
       'wind_speed_of_gust', 'wind_speed', 'wind_from_direction',
       'station'], dtype=object)

In [419]:
e.get_var_by_attr(
    dataset_id=dataset_id,
    standard_name="sea_water_temperature"
)


['sea_water_temperature']

In [420]:
e.get_var_by_attr(
    dataset_id=dataset_id,
    standard_name="sea_surface_height_amplitude_due_to_geocentric_ocean_tide_geoid_mllw"
)


[]

In [421]:
e.get_var_by_attr(dataset_id, axis=lambda v: v in ["X", "Y", "Z", "T"])

['longitude', 'time', 'latitude', 'z']

In [430]:
e.get_var_by_attr(
    dataset_id=dataset_id,
    standard_name=lambda v: v in ["sea_water_practical_salinity","sea_water_temperature",'sea_surface_height',
 'sea_surface_height_above_sea_level',
 'sea_surface_height_above_sea_level_quality_flag',
 'sea_surface_height_amplitude_due_to_geocentric_ocean_tide',
 'sea_surface_height_amplitude_due_to_geocentric_ocean_tide_quality_flag',
 'sea_surface_height_quality_flag']
)


['sea_water_practical_salinity',
 'sea_surface_height_above_sea_level_geoid_mllw',
 'sea_surface_height_amplitude_due_to_geocentric_ocean_tide_geoid_mllw',
 'sea_water_temperature']

In [422]:
e.get_var_by_attr(
    dataset_id=dataset_id,
    standard_name="sea_water_practical_salinity"
)


['sea_water_practical_salinity']

In [445]:
url = e.get_search_url(search_for="tabs", response="csv")

df = pd.read_csv(url)

In [449]:
info_url = e.get_info_url(dataset_id='tabs_b', response="csv")
info = pd.read_csv(info_url)
info

Unnamed: 0,Row Type,Variable Name,Attribute Name,Data Type,Value
0,attribute,NC_GLOBAL,cdm_data_type,String,TimeSeries
1,attribute,NC_GLOBAL,cdm_timeseries_variables,String,"station,longitude,latitude"
2,attribute,NC_GLOBAL,contributor_email,String,"None,feedback@axiomdatascience.com"
3,attribute,NC_GLOBAL,contributor_name,String,Gulf of Mexico Coastal Ocean Observing System ...
4,attribute,NC_GLOBAL,contributor_role,String,"funder,processor"
...,...,...,...,...,...
202,attribute,station,ioos_category,String,Identifier
203,attribute,station,ioos_code,String,urn:ioos:station:com.axiomdatascience:57437
204,attribute,station,long_name,String,TABS Buoy B
205,attribute,station,short_name,String,urn:ioos:station:tabs:B


In [450]:
info[info['Row Type'] == 'variable']['Variable Name'].values

array(['time', 'latitude', 'longitude', 'z', 'air_pressure',
       'sea_water_electrical_conductivity',
       'sea_water_velocity_to_direction', 'sea_water_speed',
       'relative_humidity', 'sea_water_practical_salinity',
       'air_temperature', 'sea_water_temperature', 'wind_speed_of_gust',
       'wind_speed', 'wind_from_direction', 'station'], dtype=object)

In [452]:
url = e.get_categorize_url(
    categorize_by="standard_name",
    response="csv"
)

cats = pd.read_csv(url)["Category"]
print([name for name in cats if 'sea_surface_height' in name])
print([name for name in cats if 'sea_water_practical_salinity' in name])
print([name for name in cats if 'sea_water_temperature' in name])
print([name for name in cats if 'sea_water_speed' in name])
print([name for name in cats if 'sea_water_velocity_to_direction' in name])


['sea_surface_height', 'sea_surface_height_above_sea_level', 'sea_surface_height_above_sea_level_quality_flag', 'sea_surface_height_amplitude_due_to_geocentric_ocean_tide', 'sea_surface_height_amplitude_due_to_geocentric_ocean_tide_quality_flag', 'sea_surface_height_quality_flag']
['sea_water_practical_salinity', 'sea_water_practical_salinity_quality_flag']
['difference_between_sea_water_temperature_and_freezing_point', 'sea_water_temperature', 'sea_water_temperature_quality_flag']
['sea_water_speed', 'sea_water_speed_quality_flag']
['sea_water_velocity_to_direction', 'sea_water_velocity_to_direction_quality_flag']


In [494]:
# e = ERDDAP(server="http://erddap.sensors.axds.co/erddap")
e.constraints = None
e.protocol = "tabledap"
# e.variables = list(info[info['Row Type'] == 'variable']['Variable Name'].values)
e.variables = ["time","longitude", "latitude", "station", "sea_water_temperature"]#, "sea_water_practical_salinity",'sea_surface_height_above_sea_level_geoid_mllw',
#  'sea_surface_height_amplitude_due_to_geocentric_ocean_tide_geoid_mllw']
e.dataset_id = dataset_id
# Drop units in the first line and NaNs.
df = e.to_pandas(response="csvp").dropna(axis='index', how='all').dropna(axis='columns', how='all')
df

ValueError: I/O operation on closed file.

In [368]:
df

Unnamed: 0,longitude,latitude,sea_water_temperature
64471,-69.248,40.503,12.7
64473,-69.248,40.503,12.7
64475,-69.248,40.503,12.7
64477,-69.248,40.503,12.7
64479,-69.248,40.503,12.7
...,...,...,...
127974,-69.248,40.503,11.8
127976,-69.248,40.503,11.8
127978,-69.248,40.503,11.9
127980,-69.248,40.503,11.9


# What data does Axiom already have compared to what is out there

## NDBC

### Active

Get list of active stations. 

List also available here: https://www.ndbc.noaa.gov/activestations.xml

In [233]:
# https://unidata.github.io/siphon/latest/examples/ndbc/latest_request.html
df = NDBC.latest_observations()
stations_ndbc_active = df.station.values
df.head()

Unnamed: 0,station,latitude,longitude,wind_direction,wind_speed,wind_gust,wave_height,dominant_wave_period,average_wave_period,dominant_wave_direction,pressure,3hr_pressure_tendency,air_temperature,water_temperature,dewpoint,visibility,water_level_above_mean,time
0,14041,-8.0,55.0,229.0,1.0,,,,,,1011.2,,28.3,29.6,,,,2021-03-22 17:00:00+00:00
1,14047,-4.0,57.0,,,,,,,,,,30.6,30.4,,,,2021-03-22 17:00:00+00:00
2,22101,37.24,126.02,180.0,3.0,,,,,,,,5.2,5.2,,,,2021-03-22 17:00:00+00:00
3,22102,34.79,125.78,240.0,3.0,,,,,,,,8.1,8.1,,,,2021-03-22 17:00:00+00:00
4,22103,34.0,127.5,290.0,9.0,,,,,,,,10.4,14.3,,,,2021-03-22 17:00:00+00:00


### Full

Get list of all stations, which would have to be filtered to find appropriate historical data

In [144]:
url = 'https://www.ndbc.noaa.gov/data/stations/station_table.txt'
dfall = pd.read_table(url, sep='|', index_col=0)
dfall

Unnamed: 0_level_0,OWNER,TTYPE,HULL,NAME,PAYLOAD,LOCATION,TIMEZONE,FORECAST,NOTE
# STATION_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
#,,,,,,,,,
00922,DU,Slocum Glider,,OTN201 - 4800922,,"30.000 N 90.000 W (30&#176;0'0"" N 90&#176;0'0"" W)",E,,
00923,DU,Slocum Glider,,OTN200 - 4800923,,"30.000 N 90.000 W (30&#176;0'0"" N 90&#176;0'0"" W)",E,,
01500,R,Spray Glider,,SP031 - 3801500,,"30.000 N 90.000 W (30&#176;0'0"" N 90&#176;0'0"" W)",?,,
01502,UA,Slocum Glider,,Penobscot - 4801502,,"30.000 N 90.000 W (30&#176;0'0"" N 90&#176;0'0"" W)",E,,
...,...,...,...,...,...,...,...,...,...
ygnn6,EA,GLOS Weather Station,,"Niagara Coast Guard Station, NY",,"43.262 N 79.064 W (43&#176;15'42"" N 79&#176;3'...",E,,"For Great Lakes marine forecasts, select: <a h..."
ykrv2,PT,Water Level Observation Network,,"8637611 - York River East Rear Range Light, VA",,"37.251 N 76.342 W (37&#176;15'5"" N 76&#176;20'...",E,FZUS51.KAKQ,
yktv2,O,Water Level Observation Network,,"8637689 - Yorktown USCG Training Center, VA",,"37.227 N 76.479 W (37&#176;13'36"" N 76&#176;28...",E,FZUS51.KAKQ,
yrsv2,NR,NERRS Weather Station,,"Taskinas Creek, Chesapeake Bay Reserve, VA",,"37.414 N 76.712 W (37&#176;24'51"" N 76&#176;42...",E,,Water Quality data for this Reserve are availa...


Filter full list of NDBC stations to get those that would match with the descriptions of the active stations, and also limit to the US.

In [294]:
# descriptions used in active buoy list
descs = ['buoy', 'platform', 'tower', 'station', 'Water Level Observation Network']

# lon lat box to determine if in US
# [just west of AK stations, just east of Maine stations, just south of Hawaii, north of AK historial stations]
llbox = [-192, -65, 15.5, 76.5]

# loop over full list of NDBC stations and limit to those that seem to match
# those in the active list (avoiding listings like gliders)
# for id, ttype in dfall[['# STATION_ID ',' TTYPE ']].values:
stations_ndbc_all = []
for id, (loc, ttype) in zip(dfall.index.values[1:], dfall[[' LOCATION ', ' TTYPE ']].values[1:]):
    inList, toKeep = False, False
    # search for description in NDBC 
    for desc in descs:
        if desc in str(ttype):
            inList = True
    # filter by lon/lat
    if inList and ('N' in loc) and ('W' in loc):  # don't need any in southern hemisphere
#         print(loc)
        lat = float(loc.split('N')[0][:-1])
        lon = -float(loc.split('N')[1].split(' ')[1])
#         print(lon,lat)
        if (llbox[0]<lon<llbox[1]) and (llbox[2] < lat < llbox[3]):
            toKeep = True
#             print(id)
            stations_ndbc_all.append(id)
    

In [295]:
len(stations_ndbc_all)

606

### Axiom list

In [296]:
# agent id is 18 for NDBC
url2 = 'https://oikos.axds.co/rest/sensor/stations/by/agent/18'
ds2 = pd.read_json(url2)
ds2.set_index('id', inplace=True)
ds2.sourceLabel = [label.lower() for label in ds2.sourceLabel.values]
ds2.head()

Unnamed: 0_level_0,label,platformTypeId,sourceLabel,sourceUrl,isactive,submitToNdbc,addToThredds,wmoId,source,owner,otherAffiliations,lat,lon,elevation,active,visible
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
49357,"46247 - San Francisco Offshore, CA (180)",10,46247,https://www.ndbc.noaa.gov/station_page.php?sta...,False,False,False,,"{'id': 51, 'label': 'NetCDF', 'type': 'source'}","{'id': 18, 'label': 'NOAA National Data Buoy C...",[],37.752617,-122.83313,0,False,False
41916,"SHPF1 - SHP - Shell Point, FL",6,shpf1,https://www.ndbc.noaa.gov/station_page.php?sta...,True,False,False,SHPF1,"{'id': 18, 'label': 'National Data Buoy Center...","{'id': 144, 'label': 'USF CMS - Coastal Ocean ...","[{'id': 18, 'label': 'NOAA National Data Buoy ...",30.058,-84.29,0,True,True
41922,SSBN7 - Sunset Beach Nearshore Waves,6,ssbn7,https://www.ndbc.noaa.gov/station_page.php?sta...,True,False,False,SSBN7,"{'id': 18, 'label': 'National Data Buoy Center...","{'id': 154, 'label': 'UNCW - Coastal Ocean Res...","[{'id': 234, 'label': 'Southeast Coastal Ocean...",33.83,-78.48,0,True,True
57366,KATP - Green Canyon 787 / Atlantis (BP),6,katp,https://www.ndbc.noaa.gov/station_page.php?sta...,True,False,False,KATP,"{'id': 18, 'label': 'National Data Buoy Center...","{'id': 18, 'label': 'NOAA National Data Buoy C...","[{'id': 18, 'label': 'NOAA National Data Buoy ...",27.195,-90.027,0,True,True
15683,"PILA2 - Pilot Rock, AK",10,pila2,https://www.ndbc.noaa.gov/station_page.php?sta...,True,False,False,PILA2,"{'id': 18, 'label': 'National Data Buoy Center...","{'id': 18, 'label': 'NOAA National Data Buoy C...","[{'id': 18, 'label': 'NOAA National Data Buoy ...",59.742,-149.47,24,True,True


In [331]:
urlall = 'https://erddap.dataexplorer.oceanobservatories.org/erddap/tabledap/allDatasets.nc?datasetID%2Caccessible%2Cinstitution%2CdataStructure%2Ccdm_data_type%2Cclass%2Ctitle%2CminLongitude%2CmaxLongitude%2ClongitudeSpacing%2CminLatitude%2CmaxLatitude%2ClatitudeSpacing%2CminAltitude%2CmaxAltitude%2CminTime%2CmaxTime%2CtimeSpacing%2Cgriddap%2Csubset%2Ctabledap%2CMakeAGraph%2Csos%2Cwcs%2Cwms%2Cfiles%2Cfgdc%2Ciso19115%2Cmetadata%2CsourceUrl%2CinfoUrl%2Crss%2Cemail%2CtestOutOfDate%2CoutOfDate%2Csummary&minLongitude%3E=-99&maxLongitude%3C=-88&minLatitude%3E=20&maxLatitude%3E=31&minTime%3E=2010-01-01T00%3A00%3A00Z&maxTime%3C=2020-01-01T00%3A00%3A00Z.nc'

In [335]:
urlall = 'https://erddap.dataexplorer.oceanobservatories.org/erddap/tabledap/allDatasets.nc?minLongitude%2CmaxLongitude%2CminLatitude%2CmaxLatitude%2CminTime%2CmaxTime&minLongitude%3E=-99&maxLongitude%3C=-88&minLatitude%3E=20&maxLatitude%3E=31&minTime%3E=2010-01-01T00%3A00%3A00Z&maxTime%3C=2020-01-01T00%3A00%3A00Z.nc'

In [None]:
'https://erddap.sensors.ioos.us/erddap/tabledap/edu_ucsd_cdip_244.html'
'https://erddap.sensors.ioos.us/erddap/tabledap/edu_ucsd_cdip_244.html?time%2Csea_water_velocity_to_direction%2Csea_water_speed%2Csea_water_velocity_to_direction_qc_agg%2Csea_water_speed_qc_agg'
'https://erddap.sensors.ioos.us/erddap/tabledap/indian-river-lagoon-fort-pierce-.html?time%2Cair_pressure%2Cair_pressure_qc_agg'

In [339]:
'https://erddap.dataexplorer.oceanobservatories.org/erddap/tabledap/allDatasets.nc?accessible%2Cinstitution%2CdataStructure%2Ccdm_data_type%2Cclass%2CminLongitude%2CmaxLongitude%2CminLatitude%2CmaxLatitude%2CminTime%2CmaxTime%2CtestOutOfDate&accessible=%22public%22&minLongitude%3E=-99&maxLongitude%3C=-89&minLatitude%3E=20&maxLatitude%3C=30&minTime%3E=2010-01-01T00%3A00%3A00Z&maxTime%3C=2020-01-01T00%3A00%3A00Z&distinct().nc'

'https://erddap.dataexplorer.oceanobservatories.org/erddap/tabledap/allDatasets.nc?accessible%2Cinstitution%2CdataStructure%2Ccdm_data_type%2Cclass%2CminLongitude%2CmaxLongitude%2CminLatitude%2CmaxLatitude%2CminTime%2CmaxTime%2CtestOutOfDate&accessible=%22public%22&minLongitude%3E=-99&maxLongitude%3C=-89&minLatitude%3E=20&maxLatitude%3C=30&minTime%3E=2010-01-01T00%3A00%3A00Z&maxTime%3C=2020-01-01T00%3A00%3A00Z&distinct().nc'

In [348]:
urlall = 'https://erddap.sensors.ioos.us/erddap/tabledap/indian-river-lagoon-fort-pierce-.nc?time%2Cair_pressure%2Cair_pressure_qc_agg'

In [349]:
import xarray as xr
ds = xr.open_dataset(urlall)

OSError: [Errno -90] NetCDF: file not found: b'https://erddap.sensors.ioos.us/erddap/tabledap/indian-river-lagoon-fort-pierce-.nc?time%2Cair_pressure%2Cair_pressure_qc_agg'

In [353]:
url = 'https://erddap.dataexplorer.oceanobservatories.org/erddap/tabledap/allDatasets.nc?minLongitude=-99&maxLongitude=-89&minLatitude=20&maxLatitude=30&minTime=2010-01-01T00%3A00%3A00Z&maxTime%3C=2020-01-01T00%3A00%3A00Z'
ds = xr.open_dataset(url)

OSError: [Errno -75] NetCDF: Malformed or unexpected Constraint: b'https://erddap.dataexplorer.oceanobservatories.org/erddap/tabledap/allDatasets.nc?minLongitude=-99&maxLongitude=-89&minLatitude=20&maxLatitude=30&minTime=2010-01-01T00%3A00%3A00Z&maxTime%3C=2020-01-01T00%3A00%3A00Z'

In [347]:
url = 'https://erddap.sensors.ioos.us/erddap/tabledap/edu_ucsd_cdip_244.csv?time%2Csea_water_velocity_to_direction%2Csea_water_speed%2Csea_water_velocity_to_direction_qc_agg%2Csea_water_speed_qc_agg'
https://erddap.sensors.ioos.us/erddap/tabledap/edu_ucsd_cdip_132.csv?time%2Csea_water_velocity_to_direction%2Csea_water_speed%2Csea_water_velocity_to_direction_qc_agg%2Csea_water_speed_qc_agg
dft = pd.read_csv(url)
dft

Unnamed: 0,time,sea_water_velocity_to_direction,sea_water_speed,sea_water_velocity_to_direction_qc_agg,sea_water_speed_qc_agg
0,UTC,degrees,m.s-1,,
1,2019-02-16T01:03:45Z,,,,
2,2019-02-16T01:32:45Z,,,,
3,2019-02-16T01:33:45Z,,,,
4,2019-02-16T02:02:45Z,,,,
...,...,...,...,...,...
116294,2021-03-22T21:55:00Z,,,,
116295,2021-03-22T22:00:00Z,125.69230651855469,0.22499999403953552,1.0,1.0
116296,2021-03-22T22:05:00Z,,,,
116297,2021-03-22T22:10:00Z,116.02198028564453,0.2619999945163727,1.0,1.0


In [354]:
url = 'https://search.axds.co/v2/search?portalId=45&page=1&pageSize=10&type=sensor_station&geom=%7B%22type%22%3A%22Polygon%22%2C%22coordinates%22%3A%5B%5B%5B-134.69678953882718%2C33.10075517593025%5D%2C%5B-122.22549788393279%2C33.10075517593025%5D%2C%5B-122.22549788393279%2C43.67582565045436%5D%2C%5B-134.69678953882718%2C43.67582565045436%5D%2C%5B-134.69678953882718%2C33.10075517593025%5D%5D%5D%7D&startDateTime=1616140800&endDateTime=1616486399'

In [361]:
ds = pd.read_json(url, typ='series')
ds

ValueError: Expected object or value

In [371]:
import io
url = 'https://search.axds.co/v2/search?portalId=45&page=1&pageSize=10&type=sensor_station'
res = io.StringIO(url)
# result.decode('utf-8')
# pd.read_json(url)

In [531]:
nc4.date2num?

[0;31mDocstring:[0m
date2num(dates, units, calendar=None)

Return numeric time values given datetime objects. The units
of the numeric time values are described by the **units** argument
and the **calendar** keyword. The datetime objects must
be in UTC with no time-zone offset.  If there is a
time-zone offset in **units**, it will be applied to the
returned numeric values.

**dates**: A datetime object or a sequence of datetime objects.
The datetime objects should not include a time-zone offset. They
can be either native python datetime instances (which use
the proleptic gregorian calendar) or cftime.datetime instances.

**units**: a string of the form **<time units> since <reference time>**
describing the time units. **<time units>** can be days, hours, minutes,
seconds, milliseconds or microseconds. **<reference time>** is the time
origin. **months_since** is allowed *only* for the **360_day** calendar.

**calendar**: describes the calendar to be used in the time calculations.
All 

In [529]:
import netCDF4 as nc4
nc4.num2date(1000186399, units='seconds since 1970-01-01')

cftime.DatetimeGregorian(2001, 9, 11, 5, 33, 19, 0)

In [532]:
nc4.date2num(pd.Timestamp('2010-01-01'), units='seconds since 1970-01-01')

1262304000

In [539]:
import urllib
import json
# url = 'https://search.axds.co/v2/search?portalId=45&page=1&pageSize=1000&type=sensor_station&geom=%7B%22type%22%3A%22Polygon%22,%22coordinates%22%3A[[[-99,20],[-88,20],[-88,30],[-99,30],[-99,20]]]%7D&searchStartDateTime=1262304000&endDateTime=1616486399'
url = 'https://search.axds.co/v2/search?portalId=45&page=1&geom=%7B%22type%22%3A%22Polygon%22%2C%22coordinates%22%3A%5B%5B%5B-134.40150203577394%2C50.51342652633956%5D%2C%5B-126.91416037374725%2C50.51342652633956%5D%2C%5B-126.91416037374725%2C55.3541353102106%5D%2C%5B-134.40150203577394%2C55.3541353102106%5D%2C%5B-134.40150203577394%2C50.51342652633956%5D%5D%5D%7D&startDateTime=1584939600&endDateTime=1616561999&type=sensor_station&type=parameter_group&type=affiliate&pageSize=1'
# with specific variable
url = 'https://search.axds.co/v2/search?portalId=45&page=1&pageSize=10&type=sensor_station&geom=%7B%22type%22%3A%22Polygon%22%2C%22coordinates%22%3A%5B%5B%5B-134.40150203577394%2C50.51342652633956%5D%2C%5B-126.91416037374725%2C50.51342652633956%5D%2C%5B-126.91416037374725%2C55.3541353102106%5D%2C%5B-134.40150203577394%2C55.3541353102106%5D%2C%5B-134.40150203577394%2C50.51342652633956%5D%5D%5D%7D&startDateTime=1584939600&endDateTime=1616561999&tag=Parameter%20Group%3ATemperature%3A%20Water%20Temperature'
# with multiple variables
url = 'https://search.axds.co/v2/search?portalId=45&page=1&pageSize=10&type=sensor_station&geom=%7B%22type%22%3A%22Polygon%22%2C%22coordinates%22%3A%5B%5B%5B-100.52377679565701%2C16.29905101458183%5D%2C%5B-77.97366543708242%2C16.29905101458183%5D%2C%5B-77.97366543708242%2C32.84267363195431%5D%2C%5B-100.52377679565701%2C32.84267363195431%5D%2C%5B-100.52377679565701%2C16.29905101458183%5D%5D%5D%7D&startDateTime=1584939600&endDateTime=1616561999&tag=Parameter%20Group%3ATemperature%3A%20Water%20Temperature&tag=Parameter%20Group%3ACurrents'
result = urllib.request.urlopen(url)
result = result.readline()
res = result.decode('utf-8')
res = res[len('callback('):-1]
res
res = json.loads(res)
res
meta = pd.json_normalize(res)
meta

Unnamed: 0,error,accessMethods,types,results,totalHits,page,totalPages,searchTimeInMillis,totalTimeInMillis,portalStartDateTime,...,searchEndDateTime,successfulShards,failedShards,totalShards,shardSearchFailures,query,explanations,tags.Data Provider,tags.Parameter Group,tags.Affiliate
0,,[],"[{'id': 'sensor_station', 'label': 'Sensor Sta...","[{'data': {'hasNarrative': False, 'figures': [...",804,1,1,147,199,1918-10-12T01:00:00.000Z,...,2021-03-30T10:55:00.000Z,5,0,5,[],,,"[{'id': None, 'label': 'Amerada Hess Corporati...","[{'id': None, 'label': 'Atmospheric Pressure: ...","[{'id': None, 'label': 'Amerada Hess Corporati..."


In [540]:
meta.columns

Index(['error', 'accessMethods', 'types', 'results', 'totalHits', 'page',
       'totalPages', 'searchTimeInMillis', 'totalTimeInMillis',
       'portalStartDateTime', 'portalEndDateTime', 'searchStartDateTime',
       'searchEndDateTime', 'successfulShards', 'failedShards', 'totalShards',
       'shardSearchFailures', 'query', 'explanations', 'tags.Data Provider',
       'tags.Parameter Group', 'tags.Affiliate'],
      dtype='object')

In [541]:
meta[['portalStartDateTime', 'portalEndDateTime', 'searchStartDateTime',
       'searchEndDateTime']]

Unnamed: 0,portalStartDateTime,portalEndDateTime,searchStartDateTime,searchEndDateTime
0,1918-10-12T01:00:00.000Z,2021-03-30T17:53:00.000Z,1990-07-28T16:30:00.000Z,2021-03-30T10:55:00.000Z


In [495]:
meta[['portalStartDateTime', 'portalEndDateTime', 'searchStartDateTime',
       'searchEndDateTime']]

Unnamed: 0,portalStartDateTime,portalEndDateTime,searchStartDateTime,searchEndDateTime
0,1918-10-12T01:00:00.000Z,2021-03-30T16:54:00.000Z,1990-07-28T16:30:00.000Z,2021-03-30T10:55:00.000Z


In [459]:
len(meta['results'][0])

570

### Compare lists

These stations are on both the full NDBC list and Axiom's NDBC list

In [304]:
onbothlists = list(set(stations_ndbc_all) & set(list(ds2.sourceLabel.values)))

In [325]:
summary = '''There are:
             -%i buoys on the active NDBC buoy list,
             -%i buoys on the full NDBC list, 
             -%i buoys on the filtered full NDBC to try to catch only appropriate U.S. buoys, 
             -%i buoys on the Axiom NDBC list, and 
             -%i buoys that are on both the Axiom and filtered full NDBC list''' \
             % (len(stations_ndbc_active), len(dfall), len(stations_ndbc_all),
                len(ds2), len(onbothlists))
print(summary)

There are:
             -733 buoys on the active NDBC buoy list,
             -1935 buoys on the full NDBC list, 
             -606 buoys on the filtered full NDBC to try to catch only appropriate U.S. buoys, 
             -1001 buoys on the Axiom NDBC list, and 
             -236 buoys that are on both the Axiom and filtered full NDBC list


In [328]:
print('The following NDBC buoys are not on the Axiom list: ', list(set(stations_ndbc_all) - set(list(ds2.sourceLabel.values))))

The following NDBC buoys are not on the Axiom list:  ['tcbm2', 'scis1', 'atgm1', 'tcmw1', 'pxsc1', '41005', '42037', 'mcyf1', 'bdsp1', 'fptt2', '42006', 'neaw1', '46304', 'gtot2', 'sapf1', 'camm2', 'gctf1', 'mbet2', 'espp4', 'ppxc1', 'nwwh1', '42041', 'labl1', 'acyn4', 'amps3', 'gwpm6', 'dkcm6', 'mgpt2', 'eb31', '46416', 'ulra2', 'darts', 'pnlm4', '46037', 'pmoa2', 'optf1', '41021', 'msg10', 'pvdr1', 'pxac1', 'sblf1', 'eb43', 'nwcl1', 'chyw1', 'mzxc1', 'txpt2', 'mros1', 'psxc1', '44490', 'hrbm4', 'qptr1', 'lndc1', 'msg14', '46008', 'wahv2', '44038', '42009', 'crva2', 'jmpn7', '44026', 'pmaf1', '41011', '46flo', 'frvm3', 'kwhh1', 'tlbo3', 'dartl', 'dartn', 'smoc1', 'brhc3', 'oouh1', 'apcf1', 'cndo1', 'nfdf1', 'amrl1', 'bltm2', '42015', 'orin7', 'mnmm4', '41012', 'lpnm4', 'pfdc1', 'kptn6', 'fcgt2', 'bftn7', '44023', 'rplv2', 'cman4', 'tpaf1', 'ptit2', 'alxn6', 'casm1', 'dmbc1', 'pacf1', 'clbp4', '42025', 'capl1', 'mnpv2', 'frdf1', 'dpxc1', '41023', 'gom01', 'blif1', 'skcf1', '46107', 'sh

In [300]:
stations_ndbc_all

606

In [47]:
[('buoy' in str(ttype).lower()) or ('platform' in str(ttype).lower()) or ('station' in str(ttype).lower()) or 
 ('tower' in str(ttype).lower()) or ('Water Level Observation Network' in str(ttype).lower()) for ttype in allstations[' TTYPE ']].count(True)

1385

DON'T INCLUDE IF OUTSIDE US

In [125]:
stationlist = [('buoy' in str(ttype).lower()) or ('platform' in str(ttype).lower()) or ('station' in str(ttype).lower()) or 
 ('tower' in str(ttype).lower()) or ('Water Level Observation Network' in str(ttype).lower()) for ttype in allstations[' TTYPE ']]
stationlist = list(allstations.iloc[stationlist].index.values)
stationlist

['0y2w3',
 '13001',
 '13002',
 '13008',
 '13009',
 '13010',
 '14040',
 '14041',
 '14043',
 '14047',
 '15001',
 '15002',
 '15006',
 '15007',
 '15319',
 '18ci3',
 '18cy3',
 '20cm4',
 '21178',
 '21346',
 '21347',
 '21348',
 '21401',
 '21413',
 '21414',
 '21415',
 '21416',
 '21417',
 '21418',
 '21419',
 '21420',
 '21595',
 '21597',
 '21598',
 '21600',
 '21636',
 '21637',
 '21640',
 '21707',
 '21d13',
 '22101',
 '22102',
 '22103',
 '22104',
 '22105',
 '22106',
 '22107',
 '22108',
 '23001',
 '23003',
 '23004',
 '23006',
 '23007',
 '23008',
 '23009',
 '23010',
 '23011',
 '23012',
 '23013',
 '23014',
 '23015',
 '23016',
 '23017',
 '23020',
 '23217',
 '23218',
 '23219',
 '23220',
 '23223',
 '23225',
 '23226',
 '23227',
 '23228',
 '23401',
 '28401',
 '31001',
 '31002',
 '31003',
 '31004',
 '31005',
 '31006',
 '31007',
 '31051',
 '31052',
 '31053',
 '31201',
 '31260',
 '31261',
 '31478',
 '32012',
 '32066',
 '32067',
 '32068',
 '32069',
 '32301',
 '32302',
 '32401',
 '32402',
 '32403',
 '32404',


In [51]:
[station for station in allstations.index if ('buoy' in str(station[' TTYPE ']).lower()) or ('platform' in str(station[' TTYPE ']).lower()) or ('station' in str(station[' TTYPE ']).lower()) or 
 ('tower' in str(station[' TTYPE ']).lower()) or ('Water Level Observation Network' in str(station[' TTYPE ']).lower())]

TypeError: string indices must be integers

In [66]:
allstations.loc['00922'][' TTYPE ']

'Slocum Glider'

In [71]:
[station for station in allstations.index[1:] if 'buoy' in str(allstations.loc[station][' TTYPE '])]

['21413',
 '21414',
 '21415',
 '21416',
 '21417',
 '21418',
 '21419',
 '21420',
 '21d13',
 '32012',
 '32066',
 '32067',
 '32068',
 '32069',
 '32301',
 '32302',
 '32401',
 '32402',
 '32411',
 '32412',
 '32413',
 '32489',
 '32d12',
 '41001',
 '41002',
 '41003',
 '41004',
 '41005',
 '41006',
 '41007',
 '41008',
 '41009',
 '41010',
 '41011',
 '41012',
 '41013',
 '41015',
 '41016',
 '41017',
 '41018',
 '41021',
 '41022',
 '41023',
 '41025',
 '41035',
 '41036',
 '41040',
 '41041',
 '41043',
 '41044',
 '41046',
 '41047',
 '41048',
 '41049',
 '41060',
 '41061',
 '41420',
 '41421',
 '41424',
 '41425',
 '41a46',
 '41b41',
 '41d20',
 '41d21',
 '41s43',
 '41s46',
 '41x01',
 '41x24',
 '42001',
 '42002',
 '42003',
 '42004',
 '42005',
 '42006',
 '42007',
 '42009',
 '42012',
 '42015',
 '42016',
 '42017',
 '42018',
 '42019',
 '42020',
 '42025',
 '42035',
 '42036',
 '42037',
 '42038',
 '42039',
 '42040',
 '42041',
 '42042',
 '42053',
 '42054',
 '42055',
 '42056',
 '42057',
 '42058',
 '42059',
 '42060',


In [44]:
[allstations.loc[station.lower(),' TTYPE '] for station in df.station if station.lower() in allstations.index.values]

['Atlas Buoy',
 'Atlas Buoy',
 'Atlas Buoy',
 'Atlas Buoy',
 'Atlas Buoy',
 'Atlas Buoy',
 'Atlas Buoy',
 'Buoy',
 'Buoy',
 'Buoy',
 'Buoy',
 'Buoy',
 'Buoy',
 'Buoy',
 'Buoy',
 'Atlas Buoy',
 'Atlas Buoy',
 'Atlas Buoy',
 'Atlas Buoy',
 'Atlas Buoy',
 'Ocean Reference Station',
 '3-meter discus buoy',
 '3-meter foam buoy',
 '3-meter foam buoy',
 '3-meter foam buoy',
 'Moored Buoy',
 'Moored Buoy',
 'Moored Buoy',
 'Moored Buoy',
 'Moored Buoy',
 '3-meter foam buoy',
 '3-meter discus buoy',
 '3-meter discus buoy',
 '3-meter discus buoy',
 '3-meter foam buoy',
 '3-meter discus buoy',
 'Waverider Buoy',
 'Waverider Buoy',
 'Waverider Buoy',
 'Waverider Buoy',
 'Waverider Buoy',
 'Waverider Buoy',
 'Waverider Buoy',
 'Atlas Buoy',
 'Waverider Buoy',
 'Ocean Reference Station',
 '3-meter discus buoy',
 '3-meter discus buoy',
 '3-meter discus buoy',
 'Offshore Buoy',
 '2.1-meter ionomer foam buoy',
 'Offshore Buoy',
 'Offshore Buoy',
 'Offshore Buoy',
 '3-meter discus buoy',
 '3-meter foam 

In [40]:
[station.lower() in allstations.index.values for station in df.station].count(True)

738

In [25]:
allstations['# STATION_ID '].values

array(['#', '00922', '00923', ..., 'yktv2', 'yrsv2', 'zbqn7'],
      dtype=object)

In [32]:
df.station

0      13001
1      13002
2      13008
3      14040
4      14041
       ...  
733    WWEF1
734    WYCM6
735    YATA2
736    YKRV2
737    YKTV2
Name: station, Length: 738, dtype: object

### Axiom NDBC station

In [213]:
url2 = 'https://oikos.axds.co/rest/sensor/stations/by/agent/18'
ds2 = pd.read_json(url2)
ds2.set_index('id', inplace=True)
ds2.sourceLabel = [label.lower() for label in ds2.sourceLabel.values]
ds2

Unnamed: 0_level_0,label,platformTypeId,sourceLabel,sourceUrl,isactive,submitToNdbc,addToThredds,wmoId,source,owner,otherAffiliations,lat,lon,elevation,active,visible
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
49357,"46247 - San Francisco Offshore, CA (180)",10,46247,https://www.ndbc.noaa.gov/station_page.php?sta...,False,False,False,,"{'id': 51, 'label': 'NetCDF', 'type': 'source'}","{'id': 18, 'label': 'NOAA National Data Buoy C...",[],37.752617,-122.833130,0,False,False
41916,"SHPF1 - SHP - Shell Point, FL",6,shpf1,https://www.ndbc.noaa.gov/station_page.php?sta...,True,False,False,SHPF1,"{'id': 18, 'label': 'National Data Buoy Center...","{'id': 144, 'label': 'USF CMS - Coastal Ocean ...","[{'id': 18, 'label': 'NOAA National Data Buoy ...",30.058000,-84.290000,0,True,True
41922,SSBN7 - Sunset Beach Nearshore Waves,6,ssbn7,https://www.ndbc.noaa.gov/station_page.php?sta...,True,False,False,SSBN7,"{'id': 18, 'label': 'National Data Buoy Center...","{'id': 154, 'label': 'UNCW - Coastal Ocean Res...","[{'id': 234, 'label': 'Southeast Coastal Ocean...",33.830000,-78.480000,0,True,True
57366,KATP - Green Canyon 787 / Atlantis (BP),6,katp,https://www.ndbc.noaa.gov/station_page.php?sta...,True,False,False,KATP,"{'id': 18, 'label': 'National Data Buoy Center...","{'id': 18, 'label': 'NOAA National Data Buoy C...","[{'id': 2009, 'label': 'World Meteorological O...",27.195000,-90.027000,0,True,True
15683,"PILA2 - Pilot Rock, AK",10,pila2,https://www.ndbc.noaa.gov/station_page.php?sta...,True,False,False,PILA2,"{'id': 18, 'label': 'National Data Buoy Center...","{'id': 18, 'label': 'NOAA National Data Buoy C...","[{'id': 2009, 'label': 'World Meteorological O...",59.742000,-149.470000,24,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58090,T2N155W (51021),6,t2n155w,http://tao.ndbc.noaa.gov/refreshed/site.php?si...,True,False,False,51021,"{'id': 87, 'label': 'Ocean SITES', 'type': 'so...","{'id': 149, 'label': 'OceanSITES', 'type': 'Ow...","[{'id': 18, 'label': 'NOAA National Data Buoy ...",6.639700,-146.119995,0,True,True
57574,T0N110W (32323),6,t0n110w,http://tao.ndbc.noaa.gov/refreshed/site.php?si...,True,False,False,32323,"{'id': 87, 'label': 'Ocean SITES', 'type': 'so...","{'id': 149, 'label': 'OceanSITES', 'type': 'Ow...","[{'id': 2009, 'label': 'World Meteorological O...",0.046600,-109.917999,0,True,True
58106,T5N155W (51020),6,t5n155w,http://tao.ndbc.noaa.gov/refreshed/site.php?si...,True,False,False,51020,"{'id': 87, 'label': 'Ocean SITES', 'type': 'so...","{'id': 149, 'label': 'OceanSITES', 'type': 'Ow...","[{'id': 2009, 'label': 'World Meteorological O...",4.966200,-154.947006,0,True,True
75575,"OCSM2 - Ocean City, MD",6,ocsm2,https://www.ndbc.noaa.gov/station_page.php?sta...,True,False,False,,"{'id': 18, 'label': 'National Data Buoy Center...","{'id': 202, 'label': 'U.S. Army Corps of Engin...","[{'id': 18, 'label': 'NOAA National Data Buoy ...",38.338000,-75.070000,0,True,True


In [215]:
len([station for station in df.station if (station.lower() in ds2.sourceLabel.values)])# or (station in str(ds2.index.values)) or (station in ds2.label.values)])

429

In [197]:
[station for station in stationlist if (station.lower() in ds2.sourceLabel.values) or (station.lower() in str(ds2.index.values))]

['21346',
 '21347',
 '21348',
 '21413',
 '21414',
 '21415',
 '21416',
 '21418',
 '21419',
 '21595',
 '21597',
 '21598',
 '21637',
 '23218',
 '23219',
 '23220',
 '23223',
 '23226',
 '23227',
 '23228',
 '23401',
 '32012',
 '32066',
 '32067',
 '32401',
 '32402',
 '32403',
 '32411',
 '32412',
 '32413',
 '32489',
 '34420',
 '41001',
 '41002',
 '41004',
 '41008',
 '41009',
 '41010',
 '41013',
 '41024',
 '41025',
 '41029',
 '41033',
 '41036',
 '41037',
 '41038',
 '41040',
 '41041',
 '41043',
 '41044',
 '41046',
 '41047',
 '41048',
 '41049',
 '41051',
 '41052',
 '41053',
 '41056',
 '41057',
 '41060',
 '41062',
 '41063',
 '41064',
 '41108',
 '41110',
 '41112',
 '41113',
 '41114',
 '41115',
 '41116',
 '41117',
 '41118',
 '41159',
 '41420',
 '41421',
 '41424',
 '42001',
 '42002',
 '42003',
 '42012',
 '42013',
 '42019',
 '42020',
 '42022',
 '42023',
 '42035',
 '42036',
 '42039',
 '42040',
 '42055',
 '42056',
 '42057',
 '42058',
 '42059',
 '42060',
 '42067',
 '42085',
 '42088',
 '42089',
 '42090',


In [217]:
len(set(stationlist) & set(list(ds2.sourceLabel.values)))

783

In [218]:
len(stationlist)

1385

In [149]:
len(ds2)

1001

What is a buoy that is present in Axiom list but not NDBC list?

In [219]:
notndbc = list(set(ds2.sourceLabel.values) - set(stationlist))
len(notndbc)

216

In [228]:
list(set(stationlist) - set(ds2.sourceLabel.values))

['62091',
 '42094',
 '53009',
 '45178',
 '53040',
 '32302',
 '45019',
 '52842',
 'caro3',
 '22107',
 '45183',
 '41005',
 '42037',
 '42010',
 '46245',
 '48677',
 'erxa2',
 '31003',
 '44142',
 'cdxa2',
 '41098',
 'gptw1',
 'nwst2',
 '42006',
 '46264',
 '46304',
 'shxa2',
 '42078',
 '22103',
 '91356',
 '31052',
 'lprp4',
 '22102',
 '42008',
 'jnga2',
 '46111',
 'lixa2',
 '42041',
 'dmno3',
 'dsln7',
 '21417',
 'amps3',
 '42024',
 '22106',
 '91222',
 '22104',
 'huqn6',
 'ildl1',
 'spag1',
 'eb31',
 'farp2',
 '46416',
 'darts',
 '41193',
 'mxxa2',
 '91352',
 '46016',
 '53401',
 '46037',
 '62052',
 'kcmb',
 'acmn4',
 '41021',
 'msg10',
 '44086',
 'fpxc1',
 'eb43',
 '48680',
 '42014',
 'eb92',
 '45017',
 '44076',
 '44488',
 'ehsc1',
 'mbnn7',
 '31007',
 'hssf1',
 '41933',
 '42391',
 '46269',
 '51212',
 '44490',
 '41027',
 '46250',
 'tibc1',
 'msg14',
 '46008',
 '44012',
 'svls1',
 'tdpc1',
 '44038',
 '42009',
 '44174',
 'dryf1',
 '23001',
 '46266',
 '46780',
 'hplm2',
 'gbcl1',
 '44026',
 'mb

In [227]:
ds2[ds2['sourceLabel'] == notndbc[-50]][['owner','sourceUrl']].values#['sourceUrl'].values

array([[{'id': 205, 'label': 'Amerada Hess Corporation', 'type': 'Owner'},
        'https://www.ndbc.noaa.gov/station_page.php?station=42919']],
      dtype=object)

In [192]:
ds2[ds2['sourceLabel'] == notndbc]

Unnamed: 0_level_0,label,platformTypeId,sourceLabel,sourceUrl,isactive,submitToNdbc,addToThredds,wmoId,source,owner,otherAffiliations,lat,lon,elevation,active,visible
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
13782,"AUGA2 - Augustine Island, AK",10,AUGA2,https://www.ndbc.noaa.gov/station_page.php?sta...,True,False,False,AUGA2,"{'id': 18, 'label': 'National Data Buoy Center...","{'id': 18, 'label': 'NOAA National Data Buoy C...","[{'id': 18, 'label': 'NOAA National Data Buoy ...",59.378,-153.348,0,True,True


In [193]:
NDBC.buoy_data_types('AUGA2')

{}

## Axiom CO-OPS stations

In [72]:
url1 = 'https://oikos.axds.co/rest/sensors/stations/by/agent/9'
url2 = 'https://oikos.axds.co/rest/sensor/stations/by/agent/9'

In [95]:
ds1 = pd.read_json(url1)
ds1

Unnamed: 0,id,label,slug,lat,lon,z,platformType,visible,publicNotice,hasNarrative,stationAgents,datasetId
0,52539,El Capitan Passage,noaa_nos_co_ops_9450997,56.163300,-133.330000,0.00,fixed,True,,False,"[{'id': 252, 'associationType': 'publisher', '...",noaa_nos_co_ops_9450997
1,15253,"Red Bluff Bay, Baranof Island",noaa_nos_co_ops_9451467,56.856700,-134.723000,0.00,fixed,True,,False,"[{'id': 258, 'associationType': 'owner', 'fore...",noaa_nos_co_ops_9451467
2,15629,Perry Island (South Bay),noaa_nos_co_ops_9454721,60.671700,-147.932000,0.00,fixed,True,,False,"[{'id': 272, 'associationType': 'owner', 'fore...",noaa_nos_co_ops_9454721
3,45388,"Beck Island, Clarence Strait",noaa_nos_co_ops_9450906,56.046700,-132.862000,0.00,fixed,True,,False,"[{'id': 250, 'associationType': 'owner', 'fore...",noaa_nos_co_ops_9450906
4,13822,"KDAA2 - 9457292- Kodiak Island, AK",noaa_nos_co_ops_kdaa2,57.730000,-152.514000,0.00,fixed,True,,False,"[{'id': 942, 'associationType': 'affiliate', '...",noaa_nos_co_ops_kdaa2
...,...,...,...,...,...,...,...,...,...,...,...,...
335,105538,"Aguchik Island, AK, Tide Station (9456901)",aguchik-island-ak-tide-station-9,58.294639,-154.265611,0.00,fixed,True,,True,"[{'id': 13284, 'associationType': 'publisher',...",aguchik-island-ak-tide-station-9
336,100007,"Chinitna Bay, AK, Tide Station (9456357) [Prel...",chinitna-bay-ak-tide-station-,59.842095,-152.992627,-16.36,fixed,False,,True,"[{'id': 34, 'associationType': 'owner', 'forei...",chinitna-bay-ak-tide-station-
337,100008,"Coal Point, Homer, AK, Tide Station (9455558)",coal-point-homer-ak-tide-stat,59.602639,-151.410306,-17.00,fixed,True,,True,"[{'id': 13094, 'associationType': 'sponsor', '...",coal-point-homer-ak-tide-stat
338,103695,"Gadsden Cut, Tampa Bay",gadsden-cut-tampa-bay,27.773528,-82.516861,0.00,fixed,True,,False,"[{'id': 8217, 'associationType': 'owner', 'for...",gadsden-cut-tampa-bay


Which active NDBC stations are found in url2?

In [97]:
ds2 = pd.read_json(url2)
ds2

Unnamed: 0,id,label,platformTypeId,sourceLabel,sourceUrl,isactive,submitToNdbc,addToThredds,wmoId,source,owner,otherAffiliations,lat,lon,elevation,active,visible
0,46477,"Dahlgren, Upper Machodoc Creek, Va.",10,8635027,https://tidesandcurrents.noaa.gov/noaatidepred...,False,False,False,,"{'id': 9, 'label': 'Center for Operational Oce...","{'id': 9, 'label': 'NOAA Center for Operationa...","[{'id': 236, 'label': 'Mid-Atlantic Coastal Oc...",38.3200,-77.0367,0,False,True
1,15298,"Muir Inlet, Glacier Bay",10,9452584,https://tidesandcurrents.noaa.gov/noaatidepred...,False,False,False,,"{'id': 9, 'label': 'Center for Operational Oce...","{'id': 9, 'label': 'NOAA Center for Operationa...",[],58.9133,-136.1080,0,False,False
2,15656,"AKUTAN, ALASKA",10,9462694,https://tidesandcurrents.noaa.gov/noaatidepred...,False,False,False,,"{'id': 9, 'label': 'Center for Operational Oce...","{'id': 9, 'label': 'NOAA Center for Operationa...",[],54.1333,-165.7773,0,False,False
3,52545,"TEXAS POINT, SABINE PASS (TCOON)",10,8770822,https://tidesandcurrents.noaa.gov/noaatidepred...,False,False,False,,"{'id': 9, 'label': 'Center for Operational Oce...","{'id': 9, 'label': 'NOAA Center for Operationa...",[],29.6893,-93.8418,0,False,True
4,46191,BRANDYWINE SHOAL LIGHT,10,8555889,https://tidesandcurrents.noaa.gov/noaatidepred...,False,False,False,,"{'id': 9, 'label': 'Center for Operational Oce...","{'id': 9, 'label': 'NOAA Center for Operationa...","[{'id': 236, 'label': 'Mid-Atlantic Coastal Oc...",38.9867,-75.1133,0,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3339,47827,Queen Isabella Causeway,10,8779724,https://tidesandcurrents.noaa.gov/noaatidepred...,True,False,False,,"{'id': 9, 'label': 'Center for Operational Oce...","{'id': 9, 'label': 'NOAA Center for Operationa...",[],26.0783,-97.1700,0,True,True
3340,47584,"Hungry Harbor, Wash.",10,9440563,https://tidesandcurrents.noaa.gov/noaatidepred...,True,False,False,,"{'id': 9, 'label': 'Center for Operational Oce...","{'id': 9, 'label': 'NOAA Center for Operationa...",[],46.2583,-123.8480,0,True,True
3341,47835,Ifalik Atoll,10,TPT2647,https://tidesandcurrents.noaa.gov/noaatidepred...,True,False,False,,"{'id': 9, 'label': 'Center for Operational Oce...","{'id': 9, 'label': 'NOAA Center for Operationa...",[],7.2500,144.4500,0,True,True
3342,46181,"Stone Harbor, Great Channel",10,8535581,https://tidesandcurrents.noaa.gov/noaatidepred...,True,False,False,,"{'id': 9, 'label': 'Center for Operational Oce...","{'id': 9, 'label': 'NOAA Center for Operationa...",[],39.0567,-74.7650,0,True,True


In [115]:
len([station for station in df.station if (station in ds2.sourceLabel.values) or (station in str(ds2.id.values))])

50

In [113]:
ds2.id.values

array([46477, 15298, 15656, ..., 47835, 46181, 46928])

In [109]:
[label for label in ds2.sourceLabel if label in df.station.values]

['SKTA2',
 'OVIA2',
 'CECC1',
 'LJAC1',
 'OHBC1',
 'PRDA2',
 'MGZP4',
 'VDZA2',
 'NMTA2',
 'SDBC1',
 'MHRN6',
 'ITKA2',
 'SWLA2',
 'ALIA2',
 'KECA2',
 'FREL1',
 'PRUR1',
 'PORO3',
 'CHAO3',
 'PFXC1',
 'RDDA2',
 'SNDA2',
 'NKTA2',
 'KGCA2',
 'PRJC1',
 'YATA2',
 'ELFA2',
 'RTYC1',
 'NTBC1',
 'UPBC1',
 'PCOC1',
 'OMHC1',
 'PRYC1',
 'SBEO3',
 'ANTA2',
 'OLSA2',
 'FTPC1',
 'PSLC1',
 'RCMC1',
 'ANVC1',
 'ICAC1',
 'HBYC1',
 'AAMC1',
 'CPVM2',
 'UNLA2',
 'PLXA2',
 'ATKA2',
 'ADKA2',
 'KDAA2']

In [126]:
[station for station in stationlist if (station in ds2.sourceLabel.values) or (station in str(ds2.id.values))]

['46181']

In [134]:
list(ds2.sourceLabel.values)

['8635027',
 '9452584',
 '9462694',
 '8770822',
 '8555889',
 '8774230',
 'SKTA2',
 '9450997',
 '9458849',
 '8740166',
 'OVIA2',
 'CECC1',
 '9452611',
 '9457283',
 '9469439',
 '9455204',
 'LJAC1',
 '9451124',
 '9491873',
 '9451263',
 '9455869',
 '9459465',
 '9457724',
 '9452368',
 '9455711',
 '9491253',
 'OHBC1',
 'PRDA2',
 '9466477',
 '9454616',
 '9457376',
 '9456901',
 '9457634',
 '9466931',
 '9454751',
 '9451335',
 '9454721',
 '9457152',
 '9451467',
 '9454652',
 '9458819',
 '9462723',
 '9462782',
 '9454777',
 '9452022',
 '9454757',
 '9450906',
 '9455159',
 '9462719',
 '9455437',
 '9454662',
 '9454794',
 '9462662',
 '9456173',
 '9450913',
 '9450998',
 '9462787',
 '9466298',
 '9450296',
 '9462705',
 '9451936',
 '9451422',
 '9451434',
 '9451005',
 '9465261',
 '9453208',
 '9466153',
 '9459163',
 '9455145',
 '9458779',
 '9452336',
 '9454949',
 '9454825',
 '9469338',
 '9451853',
 '9462711',
 '9490424',
 '9451317',
 '9459758',
 '9454755',
 '9466057',
 '9469239',
 '9451906',
 '9450970',
 '94

In [133]:
set(stationlist) & set(list(ds2.sourceLabel.values))

set()

In [139]:
list(ds2.id.values)

[46477,
 15298,
 15656,
 52545,
 46191,
 52549,
 13804,
 52539,
 15451,
 47744,
 13768,
 18403,
 15744,
 15411,
 15589,
 15366,
 16977,
 15194,
 19977,
 15196,
 15638,
 15465,
 15422,
 15295,
 15729,
 15733,
 16978,
 13839,
 15574,
 15326,
 15432,
 15440,
 15416,
 52542,
 15331,
 15206,
 15629,
 15401,
 15253,
 15627,
 15446,
 15873,
 15486,
 15355,
 15229,
 15353,
 45388,
 15365,
 15653,
 15375,
 15335,
 15351,
 15657,
 15394,
 15183,
 52540,
 15484,
 20277,
 15594,
 20665,
 15275,
 15208,
 15200,
 15141,
 15568,
 15305,
 15662,
 15650,
 15360,
 15448,
 15241,
 15350,
 15349,
 15588,
 15273,
 15655,
 19976,
 15198,
 15470,
 15356,
 15661,
 15587,
 15222,
 15142,
 15619,
 20662,
 15444,
 15134,
 15271,
 15673,
 15389,
 20663,
 15382,
 15114,
 15306,
 15232,
 45389,
 15129,
 15453,
 15218,
 15263,
 15443,
 15586,
 15408,
 15307,
 20664,
 15221,
 15438,
 20703,
 15746,
 15171,
 15321,
 15381,
 15660,
 15199,
 15244,
 15135,
 15745,
 15550,
 15342,
 15441,
 15487,
 15144,
 15391,
 15362,


## HF Radar

## Compare amount of available data

### NDBC

In [144]:
url = 'https://www.ndbc.noaa.gov/data/stations/station_table.txt'
dfall = pd.read_table(url, sep='|', index_col=0)
dfall

Unnamed: 0_level_0,OWNER,TTYPE,HULL,NAME,PAYLOAD,LOCATION,TIMEZONE,FORECAST,NOTE
# STATION_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
#,,,,,,,,,
00922,DU,Slocum Glider,,OTN201 - 4800922,,"30.000 N 90.000 W (30&#176;0'0"" N 90&#176;0'0"" W)",E,,
00923,DU,Slocum Glider,,OTN200 - 4800923,,"30.000 N 90.000 W (30&#176;0'0"" N 90&#176;0'0"" W)",E,,
01500,R,Spray Glider,,SP031 - 3801500,,"30.000 N 90.000 W (30&#176;0'0"" N 90&#176;0'0"" W)",?,,
01502,UA,Slocum Glider,,Penobscot - 4801502,,"30.000 N 90.000 W (30&#176;0'0"" N 90&#176;0'0"" W)",E,,
...,...,...,...,...,...,...,...,...,...
ygnn6,EA,GLOS Weather Station,,"Niagara Coast Guard Station, NY",,"43.262 N 79.064 W (43&#176;15'42"" N 79&#176;3'...",E,,"For Great Lakes marine forecasts, select: <a h..."
ykrv2,PT,Water Level Observation Network,,"8637611 - York River East Rear Range Light, VA",,"37.251 N 76.342 W (37&#176;15'5"" N 76&#176;20'...",E,FZUS51.KAKQ,
yktv2,O,Water Level Observation Network,,"8637689 - Yorktown USCG Training Center, VA",,"37.227 N 76.479 W (37&#176;13'36"" N 76&#176;28...",E,FZUS51.KAKQ,
yrsv2,NR,NERRS Weather Station,,"Taskinas Creek, Chesapeake Bay Reserve, VA",,"37.414 N 76.712 W (37&#176;24'51"" N 76&#176;42...",E,,Water Quality data for this Reserve are availa...


Filter full list of NDBC stations to get those that would match with the descriptions of the active stations, and also limit to the US.

In [294]:
# descriptions used in active buoy list
descs = ['buoy', 'platform', 'tower', 'station', 'Water Level Observation Network']

# lon lat box to determine if in US
# [just west of AK stations, just east of Maine stations, just south of Hawaii, north of AK historial stations]
llbox = [-192, -65, 15.5, 76.5]

# loop over full list of NDBC stations and limit to those that seem to match
# those in the active list (avoiding listings like gliders)
# for id, ttype in dfall[['# STATION_ID ',' TTYPE ']].values:
stations_ndbc_all = []
for id, (loc, ttype) in zip(dfall.index.values[1:], dfall[[' LOCATION ', ' TTYPE ']].values[1:]):
    inList, toKeep = False, False
    # search for description in NDBC 
    for desc in descs:
        if desc in str(ttype):
            inList = True
    # filter by lon/lat
    if inList and ('N' in loc) and ('W' in loc):  # don't need any in southern hemisphere
#         print(loc)
        lat = float(loc.split('N')[0][:-1])
        lon = -float(loc.split('N')[1].split(' ')[1])
#         print(lon,lat)
        if (llbox[0]<lon<llbox[1]) and (llbox[2] < lat < llbox[3]):
            toKeep = True
#             print(id)
            stations_ndbc_all.append(id)
    

In [295]:
len(stations_ndbc_all)

606

Currently available in Axiom erddap:

In [305]:
url = 'http://erddap.sensors.axds.co/erddap/search/advanced.csv?page=1&itemsPerPage=10000&searchFor=ndbc'
df = pd.read_csv(url)
print(len(df))

884


In [None]:
DO A MORE CAREFUL MATCHING OF STATIONS TYPES?

### CO-OPS

CUrrently available in axiom erddap

In [306]:
url = 'http://erddap.sensors.axds.co/erddap/search/advanced.csv?page=1&itemsPerPage=10000&searchFor=co-ops'
df = pd.read_csv(url)
print(len(df))

3309


## Access data

In [None]:
startDate = '2020-01-01'
endDate = '2021-01-01'
minLon, maxLon = -99, -88
minLat, maxLat = 20, 30

In [267]:
# function to read in only the columns that overlap with variable names
def readfunc(col):
    for Var in Vars+indices+checks:
        if Var in col:
            return True

In [266]:
indices = ['time (UTC)','latitude (degrees_north)','longitude (degrees_east)','station']
Vars = ['sea_water_temperature','sea_water_practical_salinity','sea_water_speed','sea_surface_height']
checks = ['depth_reading_ocean (m)']

Cols = ['time (UTC)','latitude (degrees_north)','longitude (degrees_east)','station','sea_water_temperature (degree_Celsius)', 'sea_water_temperature_qc_agg', 'sea_water_temperature_ocean (degree_Celsius)', 'sea_water_practical_salinity (1e-3)', 'sea_water_speed (m.s-1)', 'sea_water_velocity_to_direction (degrees)','sea_surface_height_above_sea_level_geoid_mllw (m)']

# Var = Vars[0]
dfs = []
for Var in Vars:
    url_base = 'http://erddap.sensors.axds.co/erddap/search/advanced.csv?page=1&itemsPerPage=10000'
    url = '%s&searchFor=%s&maxLat=%f&minLon=%f&maxLon=%f&minLat=%f&minTime=%s&maxTime=%s' % (url_base,Var,maxLat,minLon,maxLon,minLat,pd.Timestamp(startDate).isoformat(), pd.Timestamp(endDate).isoformat())
    dfs.append(pd.read_csv(url))
# &variableName=
# Gather together all data urls, but drop duplicates
# will have to check for all Vars in each dataset
df = pd.concat(dfs, axis=0, ignore_index=True).drop_duplicates()

In [206]:
import numpy as np

LIST OF NAMES OF variables by source

TABS
z (Altitude, m) 
sea_water_velocity_to_direction (degrees)
sea_water_speed (Current Speed, m.s-1)
sea_water_practical_salinity (1e-3)


NDBC
z (Altitude, m)
sea_surface_height_above_sea_level_geoid_mllw (m)
sea_water_temperature (degree_Celsius)


CO-OPS
z (Altitude, m)
sea_surface_height_amplitude_due_to_geocentric_ocean_tide_geoid_mllw (cm)
sea_water_practical_salinity (1e-3)
sea_water_temperature (degree_Celsius)
sea_surface_height_above_sea_level_geoid_mllw (m) (not always available)

Another COOPS
sea_surface_height_amplitude_due_to_geocentric_ocean_tide_geoid_mllw (cm)
 sea_surface_height_amplitude_due_to_geocentric_ocean_tide_geoid_mllw_qc_agg
 sea_surface_height_amplitude_due_to_geocentric_ocean_tide_geoid_mllw_qc_tests
 sea_surface_height_above_sea_level_geoid_mllw (m)
 sea_surface_height_above_sea_level_geoid_mllw_qc_agg
 
 
NWIS
height_geoid_local_station_datum (m)
 sea_water_temperature (degree_Celsius)
 water_surface_height_above_reference_datum_geoid_localstationdatum (m)
 


ECOHAB_II - 621   RSS
Institution: 	USGS Coastal and Marine Geology Program (USGS-CMGP)   (Dataset ID: ecohab_ii-621)
sea_water_temperature_6211mc_a (degree_Celsius)




SEE WHAT STATIONS I AM NOT CATCHING WITH THESE INSTITUTIONS AND THEN FILL IN
NEED TO BE ABLE TO CALL VARIABLE COLUMNS BY NAME and don't want to catch weird bad data

In [303]:
%%time
print(len(df['tabledap']))
dfs = []
for url in df['tabledap']:
    dfnew = pd.read_csv(url + '.csvp', index_col=indices, parse_dates=['time (UTC)'],
            usecols = readfunc).dropna(axis='index', how='all').dropna(axis='columns', how='all')
    
    checkkey = 'depth_reading_ocean (m)'
    if checkkey in dfnew.columns:
        # check if depth changes and if it does, don't use dataset
        if not np.allclose(dfnew[checkkey].min(), dfnew[checkkey].max()):
            # DROP DATAFRAME
            dfnew = None
        # CHECK QA TOO
    
    dfs.append(dfnew)

252


  mask |= (ar1 == a)


CPU times: user 5min 25s, sys: 33.6 s, total: 5min 58s
Wall time: 51min 46s


In [304]:
%%time
dfall = pd.concat(dfs)

CPU times: user 10.7 s, sys: 5.43 s, total: 16.1 s
Wall time: 18.2 s


In [312]:
dfall.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,sea_water_temperature (degree_Celsius),sea_water_temperature_qc_agg,depth_reading_ocean (m),sea_water_temperature_ocean (degree_Celsius),height_geoid_local_station_datum (m),water_surface_height_above_reference_datum_geoid_localstationdatum (m),sea_surface_height_amplitude_due_to_geocentric_ocean_tide_geoid_mllw (cm),sea_surface_height_above_sea_level_geoid_mllw (m),sea_water_practical_salinity (1e-3),sea_water_speed (m.s-1),sea_surface_height_above_sea_level_geoid_navd88 (m)
time (UTC),latitude (degrees_north),longitude (degrees_east),station,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-01-14 20:32:45+00:00,29.01665,-89.832433,,19.5,1.0,,,,,,,,,
2020-01-14 21:02:45+00:00,29.01665,-89.832433,,19.35,1.0,,,,,,,,,
2020-01-14 21:32:45+00:00,29.01665,-89.832433,,19.15,1.0,,,,,,,,,
2020-01-14 22:02:45+00:00,29.01665,-89.832433,,19.799999,1.0,,,,,,,,,
2020-01-14 22:32:45+00:00,29.01665,-89.832433,,20.049999,1.0,,,,,,,,,


In [319]:
dfall.reset_index(level=[0,1,2,3])

Unnamed: 0,time (UTC),latitude (degrees_north),longitude (degrees_east),station,sea_water_temperature (degree_Celsius),sea_water_temperature_qc_agg,depth_reading_ocean (m),sea_water_temperature_ocean (degree_Celsius),height_geoid_local_station_datum (m),water_surface_height_above_reference_datum_geoid_localstationdatum (m),sea_surface_height_amplitude_due_to_geocentric_ocean_tide_geoid_mllw (cm),sea_surface_height_above_sea_level_geoid_mllw (m),sea_water_practical_salinity (1e-3),sea_water_speed (m.s-1),sea_surface_height_above_sea_level_geoid_navd88 (m)
0,2020-01-14 20:32:45+00:00,29.01665,-89.832433,,19.500000,1.0,,,,,,,,,
1,2020-01-14 21:02:45+00:00,29.01665,-89.832433,,19.350000,1.0,,,,,,,,,
2,2020-01-14 21:32:45+00:00,29.01665,-89.832433,,19.150000,1.0,,,,,,,,,
3,2020-01-14 22:02:45+00:00,29.01665,-89.832433,,19.799999,1.0,,,,,,,,,
4,2020-01-14 22:32:45+00:00,29.01665,-89.832433,,20.049999,1.0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42247296,2015-05-07 06:56:00+00:00,26.06830,-97.156700,"SOUTH PADRE ISLAND, BRAZOS SANTIAGO PASS",,,,,,,-9.0,,,,
42247297,2015-05-06 15:25:00+00:00,26.06830,-97.156700,"SOUTH PADRE ISLAND, BRAZOS SANTIAGO PASS",,,,,,,49.0,,,,
42247298,2015-05-06 06:16:00+00:00,26.06830,-97.156700,"SOUTH PADRE ISLAND, BRAZOS SANTIAGO PASS",,,,,,,-9.0,,,,
42247299,2015-05-05 14:32:00+00:00,26.06830,-97.156700,"SOUTH PADRE ISLAND, BRAZOS SANTIAGO PASS",,,,,,,46.0,,,,


In [281]:
# for url in df['tabledap']:
url = df['tabledap'][0]

# open a dataset
# pd.read_csv(url + '.csvp', index_col=0, parse_dates=[0],
#             usecols = lambda col:(col in Cols))
# df1 = pd.read_csv(url + '.csvp', index_col=[0,1,2,-1], parse_dates=[0],
#             usecols = lambda col: col in Cols).dropna()
df1 = pd.read_csv(url + '.csvp', index_col=indices, parse_dates=['time (UTC)'],
            usecols = readfunc).dropna(axis='index', how='all').dropna(axis='columns', how='all')


url = df['tabledap'][100]
df2 = pd.read_csv(url + '.csvp', index_col=indices, parse_dates=['time (UTC)'],
            usecols = readfunc).dropna(axis='index', how='all').dropna(axis='columns', how='all')
checkkey = 'depth_reading_ocean (m)'
if checkkey in df2.columns:
    # check if depth changes and if it does, don't use dataset
    if not np.allclose(df2[checkkey].min(), df2[checkkey].max()):
        # DROP DATAFRAME
        df2 = None
    # CHECK QA TOO

Use the following to find the relevant datasets for a time range and region

HERE I HAVE SKETCHED OUT HOW TO BE ABLE TO QUERY AND AGGREGATE DATA. Need to:

* see what data I am missing that isn't in the sensors database
* see about adding the missing stations from the data sources
* do I need to include something about QA/QC at this stage? How to do that?

THIS IS THE AGGREGATED CELL OF WORK CURRENTLY:

In [125]:
import requests

# user input
startDate = '2020-03-23 00:00'
endDate = '2021-03-23'
minLon, maxLon = -99, -88
minLat, maxLat = 20, 30

# convert input datetime to seconds since 1970 which is almost correct but isn't quite what the 
# system does apparently
startDateTime = (pd.Timestamp(startDate) - pd.Timestamp("1970-01-01 00:00")) // pd.Timedelta('1s')
endDateTime = (pd.Timestamp(endDate) - pd.Timestamp("1970-01-01 00:00")) // pd.Timedelta('1s')

search_headers = {'Accept': 'application/json'}

Vars = {'temp': {'metaName': 'Temperature: Water Temperature', 'downloadName': 'sea_water_temperature'},
        'salt': {'metaName': 'Salinity', 'downloadName': 'sea_water_practical_salinity'},
        'currents': {'metaName': 'Currents', 'downloadName': 'sea_water_velocity_to_direction,sea_water_speed'},
        'ssh': {'metaName': 'Sea Surface Height', 'downloadName': 'sea_surface_height_geoid_navd88_9751hwl_a'},
        'waterLevel': {'metaName': 'Water Level', 'downloadName': 'sea_surface_height_above_sea_level_geoid_mllw'},
        'u_seaice': {'metaName': 'Sea Ice: Northward Velocity', 'downloadName': 'eastward_sea_ice_velocity'},
        'v_seaice': {'metaName': 'Sea Ice: Eastward Velocity', 'downloadName': 'northward_sea_ice_velocity'}}

# initialize downloadUrls as list for each data type
for key in Vars.keys():
    Vars[key]['downloadUrls'] = []

url_meta_base = 'https://search.axds.co/v2/search?portalId=45&page=1&pageSize=10000&type=sensor_station&geom={"type":"Polygon","coordinates":'
url = '%s[[[%f,%f],[%f,%f],[%f,%f],[%f,%f],[%f,%f]]]}&startDateTime=%i&endDateTime=%i' % (url_meta_base,minLon,minLat,maxLon,minLat,maxLon,maxLat,minLon,maxLat,minLon,minLat,startDateTime,endDateTime)
url_download_base = 'https://erddap.sensors.ioos.us/erddap/tabledap'

# Find url for dataset search for each data type
for key in Vars.keys():
    Vars[key]['url_meta'] = url + '&tag=Parameter Group:' + Vars[key]['metaName']

    search_results = requests.get(Vars[key]['url_meta'], headers = search_headers).json()

    # Find download link for each data type
    for result in search_results['results']:
        # https://erddap.sensors.ioos.us/erddap/tabledap/mares-mooring-m1-2017-2019.csv?time%2Ceastward_sea_ice_velocity%2Cz&time%3E%3D2017-11-07T11%3A30%3A33Z&time%3C%3D2019-02-11T16%3A13%3A47Z
#         url_csv = '%s/%s.csvp?time,%s&time>=2017-11-07T11:30:33Z&time<=2019-02-11T16:13:47Z' % (url_download_base,result['data']['datasetId'], Vars[key]['downloadName'])
        url_csv = '%s/%s.csvp?time,%s&time>=%sZ&time<=%sZ' % (url_download_base,result['data']['datasetId'], Vars[key]['downloadName'],pd.Timestamp(startDate).isoformat(),pd.Timestamp(endDate).isoformat())
        Vars[key]['downloadUrls'].append(url_csv)


# Vars
# then can download with `pd.read_csv`

In [127]:
len(Vars['waterLevel']['downloadUrls'])

50

In [72]:
url_csv

'https://erddap.sensors.ioos.us/erddap/tabledap/com_chevron_42934.csvp?time,sea_water_temperature&time>=2020-03-23T00:00:00Z&time<=2021-03-23T00:00:00Z'

In [71]:
Vars['temp']['url_meta']

'https://search.axds.co/v2/search?portalId=45&page=1&pageSize=10000&type=sensor_station&geom={"type":"Polygon","coordinates":[[[-99.000000,20.000000],[-88.000000,20.000000],[-88.000000,30.000000],[-99.000000,30.000000],[-99.000000,20.000000]]]}&startDateTime=1584921600&endDateTime=1616457600&tag=Parameter Group:Temperature: Water Temperature'

In [78]:
search_results['results'][0]#['data']['figures'][0]['plots'][0]['subPlots'][0]['datasetVariableId']

{'data': {'hasNarrative': False,
  'figures': [{'annotations': [],
    'label': 'Conductivity',
    'parameterGroupId': 23,
    'plots': [{'subPlots': [{'plotLabel': '[default]',
        'parameterId': 49,
        'endDate': '2021-03-17T05:00:00Z',
        'maxZ': 0.0,
        'medianTimeIntervalSecs': None,
        'numObservations': None,
        'qcConfigId': None,
        'instrument': {},
        'label': 'Conductivity',
        'units': 'mS.cm-1',
        'availableZBins': [],
        'hasQc': False,
        'deviceId': 482550,
        'parameterGroupId': 23,
        'sensorParameterId': 49,
        'maxVal': None,
        'minVal': None,
        'discriminant': None,
        'unitId': 28,
        'feeds': [482550],
        'datasetVariableId': 'sea_water_electrical_conductivity',
        'minZ': 0.0,
        'startDate': '2017-01-15T04:00:00Z',
        'availableZ': []}],
      'label': '[default]'}]},
   {'annotations': [],
    'label': 'Salinity',
    'parameterGroupId': 14,
 

In [66]:
!pip install nested-lookup

Collecting nested-lookup
  Downloading nested-lookup-0.2.22.tar.gz (10 kB)
Building wheels for collected packages: nested-lookup
  Building wheel for nested-lookup (setup.py) ... [?25ldone
[?25h  Created wheel for nested-lookup: filename=nested_lookup-0.2.22-py3-none-any.whl size=7684 sha256=216337c7177a7f015982050c6c7c19ea0b598420e0e7507f73ad890558046021
  Stored in directory: /Users/kthyng/Library/Caches/pip/wheels/51/ce/8c/92df23b3564f2d3e84de93b535ef062d3159d68a10fead7742
Successfully built nested-lookup
Installing collected packages: nested-lookup
Successfully installed nested-lookup-0.2.22


In [70]:
from nested_lookup import nested_lookup
print(nested_lookup('datasetVariableId', search_results['results'][0])) # THEN NEED TO SELECT OUT THE ONE WITH TEMP IN IT FOR EXAMPLE

['sea_water_electrical_conductivity', 'sea_water_practical_salinity', 'height_geoid_local_station_datum', 'sea_water_temperature', 'water_surface_height_above_reference_datum_geoid_localstationdatum']


In [61]:
from functools import reduce
from operator import getitem
def get_nested_item(data, keys):
    return reduce(getitem, keys, data)

In [64]:
def get_nested_item(data, keys):
    return reduce(lambda seq, key: seq[key], keys, data)

In [65]:
get_nested_item(search_results, ['datasetVariableId'])

KeyError: 'datasetVariableId'

In [60]:
any(key.startswith('datasetVariableId') for key in search_results)

False

In [20]:
%%time

df = pd.DataFrame()

for url in Vars['salt']['downloadUrls']:
    dfnew = pd.read_csv(url, index_col=0, parse_dates=True).sort_index()
    datasetId = url.split('/')[-1].split('.')[0]
    dfnew.rename(columns={dfnew.columns[0]: '%s: %s' % (dfnew.columns[0], datasetId)}, inplace=True)

    df = df.join(dfnew, how='outer')

CPU times: user 12.5 s, sys: 1.19 s, total: 13.7 s
Wall time: 4min 11s


In [21]:
print(len(Vars['salt']['downloadUrls']))

63


In [24]:
Vars['salt']['df'] = df

In [None]:
https://erddap.sensors.ioos.us/erddap/tabledap/wmo_42876.csv?time%2Csea_water_temperature_ocean%2Cz&time%3E%3D2017-04-25T00%3A37%3A00Z&time%3C%3D2021-03-09T18%3A37%3A00Z

In [28]:
# pd.read_csv(url)
url

'https://erddap.sensors.ioos.us/erddap/tabledap/wmo_42876.csvp?time,sea_water_temperature&time>=2020-03-23T00:00:00Z&time<=2021-03-23T00:00:00Z'

In [25]:
%%time

# loop over all variables
for key in Vars.keys():
    df = pd.DataFrame()
    for url in Vars[key]['downloadUrls']:
        dfnew = pd.read_csv(url, index_col=0, parse_dates=True).sort_index()
        datasetId = url.split('/')[-1].split('.')[0]
        dfnew.rename(columns={dfnew.columns[0]: '%s: %s' % (dfnew.columns[0], datasetId)}, inplace=True)

        df = df.join(dfnew, how='outer')
    
    # add resultant dataframe to Vars dict
    Vars[key]['df'] = df

HTTPError: HTTP Error 400: 

In [607]:
tz = 'US/Central'
date = pd.Timestamp('2020-03-23 00:00').tz_localize(tz)
pd.Timedelta(str(1584939600 - (date - pd.Timestamp("1970-01-01 00:00").tz_localize(tz)) // pd.Timedelta('1s')) + 's')
# (date - pd.Timestamp("1970-01-01 00:00").tz_localize(tz)) // pd.Timedelta('1s')


Timedelta('0 days 06:00:00')

In [727]:
import urllib
import json
# url = 'https://search.axds.co/v2/search?portalId=45&page=1&pageSize=1000&type=sensor_station&geom=%7B%22type%22%3A%22Polygon%22,%22coordinates%22%3A[[[-99,20],[-88,20],[-88,30],[-99,30],[-99,20]]]%7D&searchStartDateTime=1262304000&endDateTime=1616486399'
# url = 'https://search.axds.co/v2/search?portalId=45&page=1&geom=%7B%22type%22%3A%22Polygon%22%2C%22coordinates%22%3A%5B%5B%5B-134.40150203577394%2C50.51342652633956%5D%2C%5B-126.91416037374725%2C50.51342652633956%5D%2C%5B-126.91416037374725%2C55.3541353102106%5D%2C%5B-134.40150203577394%2C55.3541353102106%5D%2C%5B-134.40150203577394%2C50.51342652633956%5D%5D%5D%7D&startDateTime=1584939600&endDateTime=1616561999&type=sensor_station&type=parameter_group&type=affiliate&pageSize=1'
# with specific variable
# url = 'https://search.axds.co/v2/search?portalId=45&page=1&pageSize=10&type=sensor_station&geom=%7B%22type%22%3A%22Polygon%22%2C%22coordinates%22%3A%5B%5B%5B-134.40150203577394%2C50.51342652633956%5D%2C%5B-126.91416037374725%2C50.51342652633956%5D%2C%5B-126.91416037374725%2C55.3541353102106%5D%2C%5B-134.40150203577394%2C55.3541353102106%5D%2C%5B-134.40150203577394%2C50.51342652633956%5D%5D%5D%7D&startDateTime=1584939600&endDateTime=1616561999&tag=Parameter%20Group%3ATemperature%3A%20Water%20Temperature'
# with multiple variables


# user input
startDate = '2020-03-23 00:00'
endDate = '2021-03-23'
minLon, maxLon = -99, -88
minLat, maxLat = 20, 30

# convert input datetime to seconds since 1970 which is almost correct but isn't quite what the 
# system does apparently
startDateTime = (pd.Timestamp(startDate) - pd.Timestamp("1970-01-01 00:00")) // pd.Timedelta('1s')
endDateTime = (pd.Timestamp(endDate) - pd.Timestamp("1970-01-01 00:00")) // pd.Timedelta('1s')


# Also need sea ice coverage. Do I need to search another database?
# find these by looking at the searches that come up under developer tools and network
# e.g. https://sensors.ioos.us/#search/advanced?type_group=sensor_stations&mapBounds=-193.33040437082403%2C61.93895042666063%2C-116.871433045657%2C82.85338229176081&page=1
# do we also need to download QA/QC information?
vars_meta = ['Temperature: Water Temperature','Currents','Salinity',
             'Sea Surface Height', 'Water Level', 
             'Sea Ice: Northward Velocity','Sea Ice: Eastward Velocity']

# Vars = {'temp': {'meta': 'Temperature: Water Temperature', 'download': 'sea_water_temperature'},
#         'salt': {'meta'}}
                

# url = 'https://search.axds.co/v2/search?portalId=45&page=1&pageSize=1000&type=sensor_station&geom=%7B%22type%22%3A%22Polygon%22%2C%22coordinates%22%3A%5B%5B%5B%f%2CminLat%5D%2C%5BmaxLon%2CminLat%5D%2C%5BmaxLon%2CmaxLat%5D%2C%5BminLon%2CmaxLat%5D%2C%5BminLon%2CminLat%5D%5D%5D%7D&startDateTime=1584939600&endDateTime=1616561999&tag=Parameter%20Group%3ATemperature%3A%20Water%20Temperature&tag=Parameter%20Group%3ACurrents' % (minLon, )
# url = 'https://search.axds.co/v2/search?portalId=45&page=1&pageSize=1000&type=sensor_station&geom={"type":"Polygon","coordinates":[[[%f,%f],[%f,%f],[%f,%f],[%f,%f],[%f,%f]]]}&startDateTime=1584939600&endDateTime=1616561999&tag=Parameter Group:Temperature: Water Temperature&tag=Parameter Group:Currents' % (minLon,minLat,maxLon,minLat,maxLon,maxLat,minLon,maxLat,minLon,minLat)#,startDateTime,endDateTime)
urlbase = 'https://search.axds.co/v2/search?portalId=45&page=1&pageSize=1000&type=sensor_station&geom={"type":"Polygon","coordinates":'
url = '%s[[[%f,%f],[%f,%f],[%f,%f],[%f,%f],[%f,%f]]]}&startDateTime=%i&endDateTime=%i' % (urlbase,minLon,minLat,maxLon,minLat,maxLon,maxLat,minLon,maxLat,minLon,minLat,startDateTime,endDateTime)




# for var_meta in vars_meta:
var_meta = vars_meta[0]
url += '&tag=Parameter Group:' + var_meta
print(url)
search_headers = {'Accept': 'application/json'}
import requests
search_results = requests.get(url, headers = search_headers).json()

https://search.axds.co/v2/search?portalId=45&page=1&pageSize=1000&type=sensor_station&geom={"type":"Polygon","coordinates":[[[-99.000000,20.000000],[-88.000000,20.000000],[-88.000000,30.000000],[-99.000000,30.000000],[-99.000000,20.000000]]]}&startDateTime=1584921600&endDateTime=1616457600&tag=Parameter Group:Temperature: Water Temperature


In [728]:
len(search_results['results'])

190

In [822]:
df1 = pd.read_csv(Vars['salt']['downloadUrls'][0], index_col=0, parse_dates=True).sort_index()
datasetId = Vars['salt']['downloadUrls'][0].split('/')[-1].split('.')[0]
df1.rename(columns={df1.columns[0]: '%s: %s' % (df1.columns[0], datasetId)}, inplace=True)

df2 = pd.read_csv(Vars['salt']['downloadUrls'][1], index_col=0, parse_dates=True).sort_index()
datasetId = Vars['salt']['downloadUrls'][1].split('/')[-1].split('.')[0]
df2.rename(columns={df2.columns[0]: '%s: %s' % (df2.columns[0], datasetId)}, inplace=True)

df3 = df1.join(df2, how='outer')

In [749]:
for result in search_results['results']:
    
#     print(result['data']['id'])
#     if 'edu_ucsd_cdip_132' in result:
    print(result['data']['datasetId'])  # works for TABS
    print(result['data']['~uuid'])
    
    url_base = 'https://erddap.sensors.ioos.us/erddap/tabledap'
    url_csv = '%s/%s.csvp?time,sea_water_temperature' % (url_base,result['data']['datasetId'])
#     url_csv = 'https://erddap.sensors.ioos.us/erddap/tabledap/tabs_w.csv?time%2Csea_water_practical_salinity%2Cz&time%3E%3D2016-01-14T12%3A00%3A00Z&time%3C%3D2020-11-19T14%3A00%3A00Z'
    pd.read_csv(url_csv)
    
#

gov_usgs_waterdata_294045092492300
urn:ioos:station:gov.usgs.waterdata:294045092492300
nerrs_marcwwq
urn:ioos:station:nerrs:marcwwq
wmo_42876
urn:ioos:station:wmo:42876


HTTPError: HTTP Error 400: 

In [None]:
https://erddap.sensors.ioos.us/erddap/tabledap/tabs_w.html?time%2Csea_water_practical_salinity%2Cz&time%3E%3D2016-01-14T12%3A00%3A00Z&time%3C%3D2020-11-19T14%3A00%3A00Z
https://erddap.sensors.ioos.us/erddap/tabledap/tabs_w.csv?time%2Csea_water_practical_salinity%2Cz&time%3E%3D2016-01-14T12%3A00%3A00Z&time%3C%3D2020-11-19T14%3A00%3A00Z
https://erddap.sensors.ioos.us/erddap/tabledap/tabs_w.nc?time%2Csea_water_practical_salinity%2Cz&time%3E%3D2016-01-14T12%3A00%3A00Z&time%3C%3D2020-11-19T14%3A00%3A00Z
https://sensors.axds.co/stationsensorservice/getExcelSheet?sensorids=14&sensorid=14&stationid=57544&stationids=57544&appregion=all&realtimeonly=false&next_gen_api=false

In [735]:
# url_csv = 'https://erddap.sensors.ioos.us/erddap/tabledap/urn:ioos:station:com.chevron:42934.csvp?time%2Csea_water_temperature%2Csea_water_temperature_qc_agg'
# url_csv = 'https://sensors.axds.co/stationsensorservice/getExcelSheet?sensorsids=7&sensorid=7&stationids=57541&stationid=57541&appregion=all&realtimeonly=false&next_gen_api=false'
url_csv = 'https://erddap.sensors.ioos.us/erddap/tabledap/tabs_w.csv?time%2Csea_water_practical_salinity%2Cz&time%3E%3D2016-01-14T12%3A00%3A00Z&time%3C%3D2020-11-19T14%3A00%3A00Z'
pd.read_csv(url_csv)

# url_netcdf = 'https://sensors.axds.co/stationsensorservice/getSensorNetcdf?sensorsids=7&sensorid=7&stationids=57541&stationid=57541&appregion=all&realtimeonly=false&next_gen_api=false'
# xr.open_dataset(url_netcdf)

Unnamed: 0,time,sea_water_practical_salinity,z
0,UTC,0.001000,m
1,2020-11-19T13:00:00Z,0.032716,0.0
2,2020-11-19T12:30:00Z,0.032724,0.0
3,2020-11-19T12:00:00Z,0.032734,0.0
4,2020-11-19T11:30:00Z,0.032741,0.0
...,...,...,...
51016,2016-01-14T14:30:00Z,36.450000,0.0
51017,2016-01-14T14:00:00Z,36.450000,0.0
51018,2016-01-14T13:30:00Z,36.450000,0.0
51019,2016-01-14T13:00:00Z,36.450000,0.0


In [748]:
url_base = 'https://erddap.sensors.ioos.us/erddap/tabledap'
url_csv = '%s/%s.csvp?time,sea_water_temperature' % (url_base,'tabs_w')
# url_csv = 'https://erddap.sensors.ioos.us/erddap/tabledap/tabs_w.csv?time%2Csea_water_practical_salinity%2Cz&time%3E%3D2016-01-14T12%3A00%3A00Z&time%3C%3D2020-11-19T14%3A00%3A00Z'
# https://erddap.sensors.ioos.us/erddap/tabledap/tabs_w.csv?time%2Csea_water_temperature%2Cz&time%3E%3D2016-01-14T12%3A00%3A00Z&time%3C%3D2020-11-19T14%3A00%3A00Z
print(url_csv)
pd.read_csv(url_csv)


https://erddap.sensors.ioos.us/erddap/tabledap/tabs_w.csvp?sea_water_temperature


Unnamed: 0,sea_water_temperature (degree_Celsius)
0,22.7
1,22.6
2,22.6
3,22.7
4,22.7
...,...
51015,23.1
51016,23.1
51017,23.1
51018,23.1


In [560]:
url_csv = 'https://erddap.sensors.ioos.us/erddap/tabledap/edu_ucsd_cdip_132.csvp?time%2Csea_water_temperature%2Csea_water_temperature_qc_agg'
url_csv = 'https://erddap.sensors.ioos.us/erddap/tabledap/edu_ucsd_cdip_132.csvp?time%2Csea_water_velocity_to_direction%2Csea_water_speed%2Csea_water_velocity_to_direction_qc_agg%2Csea_water_speed_qc_agg'
url_csv = 'https://erddap.sensors.ioos.us/erddap/tabledap/indian-river-lagoon-fort-pierce-.csv?time%2Csea_water_temperature%2Csea_water_temperature_qc_agg'
pd.read_csv(url_csv)

Unnamed: 0,time (UTC),sea_water_velocity_to_direction (degrees),sea_water_speed (m.s-1),sea_water_velocity_to_direction_qc_agg,sea_water_speed_qc_agg
0,2006-02-09T15:25:45Z,,,,
1,2006-02-09T15:54:45Z,,,,
2,2006-02-09T15:55:45Z,,,,
3,2006-02-09T16:24:45Z,,,,
4,2006-02-09T16:25:45Z,,,,
...,...,...,...,...,...
532802,2021-03-23T19:25:00Z,,,,
532803,2021-03-23T19:30:00Z,227.461533,0.139,1.0,1.0
532804,2021-03-23T19:35:00Z,,,,
532805,2021-03-23T19:40:00Z,220.516479,0.101,1.0,1.0
