# IOOS Access 02: Accessing model data via the IOOS Catalog OGC CSW endpoint

Taken from Filipe's notebook http://nbviewer.jupyter.org/gist/ocefpaf/36ab048b94859e8bf9a070db95fed174

In [1]:
from owslib import fes

In [2]:
min_lon, max_lon = -127, -123.75
min_lat, max_lat = 43, 48

bbox = [min_lon, min_lat, max_lon, max_lat]
crs = 'urn:ogc:def:crs:OGC:1.3:CRS84'

bbox_crs = fes.BBox(bbox, crs=crs)

In [3]:
from datetime import datetime, timedelta
from ioos_tools.ioos import fes_date_filter

# dt = 5
dt = 12

date = datetime.utcnow()
start = date - timedelta(days=dt)
stop = date + timedelta(days=dt)

begin, end = fes_date_filter(start, stop)

  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)


In [4]:
cf_names = [
    'sea_water_temperature',
    'sea_surface_temperature',
    'sea_water_potential_temperature',
    'equivalent_potential_temperature',
    'sea_water_conservative_temperature',
    'pseudo_equivalent_potential_temperature',
]


kw = dict(
    wildCard='*',
    escapeChar='\\',
    singleChar='?',
    propertyname='apiso:AnyText'
)

or_filt = fes.Or(
    [fes.PropertyIsLike(literal=('*%s*' % val), **kw)
        for val in cf_names])

In [5]:
filter_list = [
    fes.And(
        [
            bbox_crs,
            begin, end,
            or_filt,
            fes.Not([fes.PropertyIsLike(literal='*cdip*', **kw)]),
            fes.Not([fes.PropertyIsLike(literal='*grib*', **kw)]),
        ]
    )
]

In [6]:
from owslib.csw import CatalogueServiceWeb
from ioos_tools.ioos import get_csw_records

endpoint = 'https://data.ioos.us/csw'

csw = CatalogueServiceWeb(endpoint, timeout=60)

get_csw_records(
    csw,
    filter_list,
    pagesize=10,
    maxrecords=1000,
    esn='full'
)

records = '\n'.join(csw.records.keys())
print('Found {} records.\n'.format(len(csw.records.keys())))

Found 26 records.



In [7]:
for key, value in list(csw.records.items()):
    print(u'[{}]\n  {}'.format(value.title, key))

[urn:ioos:station:NOAA.NOS.CO-OPS:9437540 station, Garibaldi, OR]
  opendap.co-ops.nos.noaa.gov-urn_ioos_station_NOAA.NOS.CO-OPS_9437540
[urn:ioos:station:NOAA.NOS.CO-OPS:9439040 station, Astoria, OR]
  opendap.co-ops.nos.noaa.gov-urn_ioos_station_NOAA.NOS.CO-OPS_9439040
[urn:ioos:station:NOAA.NOS.CO-OPS:9440581 station, Cape Disappointment]
  opendap.co-ops.nos.noaa.gov-urn_ioos_station_NOAA.NOS.CO-OPS_9440581
[urn:ioos:station:NOAA.NOS.CO-OPS:9440910 station, Toke Point, WA]
  opendap.co-ops.nos.noaa.gov-urn_ioos_station_NOAA.NOS.CO-OPS_9440910
[urn:ioos:station:NOAA.NOS.CO-OPS:9441102 station, Westport, WA]
  opendap.co-ops.nos.noaa.gov-urn_ioos_station_NOAA.NOS.CO-OPS_9441102
[urn:ioos:station:NOAA.NOS.CO-OPS:9442396 station, La Push, WA]
  opendap.co-ops.nos.noaa.gov-urn_ioos_station_NOAA.NOS.CO-OPS_9442396
[(CMOP) SATURN-02]
  data.nanoos.org-urn_ioos_station_nanoos_cmop_ogi02
[(CMOP) SATURN-03]
  data.nanoos.org-urn_ioos_station_nanoos_cmop_saturn03
[(CMOP) SATURN-04]
  data.nan

In [26]:
# 'CMOP Virtual Columbia River (SELFE); f33'
csw.records['2d_ST'].references

[{'scheme': 'WWW:LINK',
  'url': 'http://amb6400b.stccmop.org:8080/thredds/dodsC/model_data/forecast.html'},
 {'scheme': '',
  'url': 'http://amb6400b.stccmop.org:8080/thredds/dodsC/model_data/forecast'},
 {'scheme': '',
  'url': 'http://amb6400b.stccmop.org:8080/ncWMS2/wms?service=WMS&version=1.3.0&request=GetCapabilities'}]

In [27]:
csw.records['OCOS'].references

[{'scheme': 'WWW:LINK',
  'url': 'http://ona.coas.oregonstate.edu:8080/thredds/dodsC/NANOOS/OCOS.html'},
 {'scheme': '',
  'url': 'http://ona.coas.oregonstate.edu:8080/thredds/dodsC/NANOOS/OCOS'},
 {'scheme': '',
  'url': 'http://ona.coas.oregonstate.edu:8080/thredds/wms/NANOOS/OCOS?service=WMS&version=1.3.0&request=GetCapabilities'}]

In [41]:
csw.records['/opendap/hyrax/aggregated/ocean_time_aggregation.ncml'].references

[{'scheme': 'OPeNDAP:OPeNDAP',
  'url': 'http://ingria.coas.oregonstate.edu/opendap/hyrax/aggregated/ocean_time_aggregation.ncml.html'},
 {'scheme': '',
  'url': 'http://ingria.coas.oregonstate.edu/opendap/hyrax/aggregated/ocean_time_aggregation.ncml'}]

In [8]:
from geolinks import sniff_link
import pandas as pd


df = []

for key, rec in csw.records.items():
    df.append(pd.DataFrame(rec.references))

df = pd.concat(df, ignore_index=True)
df['geolink'] = [sniff_link(url) for url in df['url']]

# Filtering via scheme or geolinks is completely broken at the model. The cell below perform some heuristics to figure out what is OPeNDAP and what is not.

In [50]:
df.loc[(df['scheme'] == 'OPeNDAP:OPeNDAP') | (df['geolink'] == 'OPeNDAP:OPeNDAP')]

Unnamed: 0,scheme,url,geolink
5,WWW:LINK,https://opendap.co-ops.nos.noaa.gov/ioos-dif-s...,OPeNDAP:OPeNDAP
18,WWW:LINK,https://opendap.co-ops.nos.noaa.gov/ioos-dif-s...,OPeNDAP:OPeNDAP
51,WWW:LINK,https://opendap.co-ops.nos.noaa.gov/ioos-dif-s...,OPeNDAP:OPeNDAP
64,WWW:LINK,https://opendap.co-ops.nos.noaa.gov/ioos-dif-s...,OPeNDAP:OPeNDAP
183,OPeNDAP:OPeNDAP,http://thredds.cencoos.org/thredds/dodsC/AOOS_...,
193,OPeNDAP:OPeNDAP,http://oos.soest.hawaii.edu/thredds/dodsC/paci...,
287,OPeNDAP:OPeNDAP,http://oos.soest.hawaii.edu/thredds/dodsC/hioo...,
344,OPeNDAP:OPeNDAP,http://ingria.coas.oregonstate.edu/opendap/hyr...,OPeNDAP:OPeNDAP
345,,http://ingria.coas.oregonstate.edu/opendap/hyr...,OPeNDAP:OPeNDAP


# With `geolinks` we got two that are the same and with `scheme` we got two but there are 4 as we can see below.

In [34]:
dap_urls = []

for i, df_row in df.iterrows():
    row = df_row.to_dict()
    # Handle the presenece of "opendap" in the COOPS obs domain
    url = row['url']
    if row['scheme'] == 'OPeNDAP:OPeNDAP' or 'dodsC' in url:
        if not '.html' in url:
            dap_urls.append(url)

In [35]:
len(dap_urls)

6

In [36]:
dap_urls

['http://amb6400b.stccmop.org:8080/thredds/dodsC/model_data/forecast',
 'http://www.ncdc.noaa.gov/oa/wct/wct-jnlp-beta.php?singlefile=http://thredds.cencoos.org/thredds/dodsC/AOOS_OSTIA.nc',
 'http://thredds.cencoos.org/thredds/dodsC/AOOS_OSTIA.nc',
 'http://oos.soest.hawaii.edu/thredds/dodsC/pacioos/hycom/global',
 'http://oos.soest.hawaii.edu/thredds/dodsC/hioos/satellite/dhw_5km',
 'http://ona.coas.oregonstate.edu:8080/thredds/dodsC/NANOOS/OCOS']

**WHY IS ingria BEING EXCLUDED FROM `dap_urls`?** Because it doesn't meet the criteria in
```python
(row['scheme'] == 'OPeNDAP:OPeNDAP' or 'dodsC' in url) and (not '.html' in url)
```

In [37]:
from netCDF4 import Dataset
import gridgeo


grids = {}
for url in dap_urls:
    try:
        nc = Dataset(url)
    except:
        print(f'  - Could not read {url} as OPeNDAP endpoint.')
        continue

    temp = nc.get_variables_by_attributes(standard_name=lambda x: x in cf_names)[0]
    standard_name = temp.standard_name

    try:
        grid = gridgeo.GridGeo(nc, standard_name=standard_name)
        title = getattr(nc, 'title', url)
        print(f'{url}: {title}')
    except Exception:
        print(f'  - Could not get grid for {url}')
        continue
    grids.update({title: grid})

http://amb6400b.stccmop.org:8080/thredds/dodsC/model_data/forecast: CMOP Virtual Columbia River (SELFE); f33
  - Could not read http://www.ncdc.noaa.gov/oa/wct/wct-jnlp-beta.php?singlefile=http://thredds.cencoos.org/thredds/dodsC/AOOS_OSTIA.nc as OPeNDAP endpoint.
  - Could not get grid for http://thredds.cencoos.org/thredds/dodsC/AOOS_OSTIA.nc
  - Could not get grid for http://oos.soest.hawaii.edu/thredds/dodsC/pacioos/hycom/global
  - Could not get grid for http://oos.soest.hawaii.edu/thredds/dodsC/hioos/satellite/dhw_5km
http://ona.coas.oregonstate.edu:8080/thredds/dodsC/NANOOS/OCOS: Regional Ocean Modeling System (ROMS): Oregon Coast


We could not get the global hycom grid but we don't really want it.
Note also that `Regional Ocean Modeling System (ROMS): Oregon Coast` was found in two different endpoints and,
because we are using the title as a key to our dictionary, only one will show up in our map.

I'll leave to you to check if they are indeed the same.

In [38]:
grids

{'CMOP Virtual Columbia River (SELFE); f33': <GridGeo: ugrid>,
 'Regional Ocean Modeling System (ROMS): Oregon Coast': <GridGeo: unknown_2d>}

In [39]:
import folium


tiles = 'http://services.arcgisonline.com/arcgis/rest/services/Ocean/World_Ocean_Base/MapServer/tile/{z}/{y}/{x}'

m = folium.Map(
    tiles=tiles,
    attr='ESRI'
)

for title, grid in grids.items():
    try:
        gj = folium.GeoJson(grid.outline.__geo_interface__, name=title)
        folium.Popup(title).add_to(gj)
        gj.add_to(m)
    except Exception:
        print(f'Could not compute grid outline for {title}')

In [15]:
box = [
    [bbox[1], bbox[0]], [bbox[1], bbox[2]],
    [bbox[3], bbox[2]], [bbox[3], bbox[0]],
    [bbox[1], bbox[0]]
]

folium.PolyLine(box, color='red').add_to(m)

folium.LayerControl().add_to(m);

In [16]:
m.fit_bounds(m.get_bounds())

m