In [1]:
import os
import sys

ioos_tools = os.path.join(*[os.path.pardir]*2)
sys.path.append(ioos_tools)

In [2]:
from datetime import datetime, timedelta

# Region: West coast.
bbox = [-123, 36, -121, 40]
crs = 'urn:ogc:def:crs:OGC:1.3:CRS84'

# Temporal range: Last week.
now = datetime.utcnow()
start, stop = now - timedelta(days=(7)), now

# CF names for sea water temperature.
cf_names = [
    'sea_water_temperature',
    'sea_surface_temperature',
    'sea_water_potential_temperature',
    'equivalent_potential_temperature',
    'sea_water_conservative_temperature',
    'pseudo_equivalent_potential_temperature'
]

# Catalogs to search.
catalog = 'https://dev-catalog.ioos.us/csw'

In [3]:
from owslib import fes
from ioos_tools.ioos import fes_date_filter

kw = dict(wildCard='*', escapeChar='\\',
          singleChar='?', propertyname='apiso:AnyText')

or_filt = fes.Or([fes.PropertyIsLike(literal=('*%s*' % val), **kw)
                  for val in cf_names])


begin, end = fes_date_filter(start, stop)
bbox_crs = fes.BBox(bbox, crs=crs)
filter_list = [fes.And([bbox_crs, begin, end, or_filt])]

In [4]:
from owslib.csw import CatalogueServiceWeb


csw = CatalogueServiceWeb(catalog, timeout=60)

In [5]:
from owslib.fes import SortBy, SortProperty


def get_csw_records(csw, pagesize=10, maxrecords=1000):
    """
    Iterate `maxrecords`/`pagesize` times until the request `maxrecords` is reached.
    FIXME: Some record with the same key disapears when using dict update :-/
    
    """
    # Iterate over sorted results.
    sortby = SortBy([SortProperty('dc:title', 'ASC')])
    csw_records = {}
    startposition = 0
    
    nextrecord = getattr(csw, 'results', 1)
    while nextrecord != 0:
        csw.getrecords2(constraints=filter_list, startposition=startposition,
                        maxrecords=pagesize, sortby=sortby)
        csw_records.update(csw.records)
        if csw.results['nextrecord'] == 0:
            break
        startposition += pagesize + 1  # Last one is included.
        if startposition >= maxrecords:
            break
    csw.records.update(csw_records)

In [6]:
get_csw_records(csw, pagesize=10, maxrecords=1000)

len(csw.records)

16

In [7]:
from geolinks import sniff_link


def _parse_reference(ref, identifier):
    """
    First try to sniff the scheme from the URL in the `ref` dict using geolinks,
    if that fails get the `scheme` field directly.
    
    For all possible identifiers see:
    https://github.com/OSGeo/Cat-Interop/blob/master/LinkPropertyLookupTable.csv
    
    """
    url = None
    scheme = sniff_link(ref['url'])
    if not scheme:
        scheme = ref['scheme']
    if identifier.endswith(':'):
        cond = identifier in scheme
    else:
        cond = identifier == scheme
    if cond:
        url = ref['url']
    return url


def service_urls(records, identifier='OGC:SOS'):
    """
    Extract service ULRs from csw records using geolink identifiers
    (OPeNDAP:OPeNDAP, ERDDAP:griddap, ERDDAP:tabledap, OGC:SOS, etc).

    
    For all possible identifiers see:
    https://github.com/OSGeo/Cat-Interop/blob/master/LinkPropertyLookupTable.csv
    
    If is possible to truncate ambiguous identifiers at the `:`,
    and return everything that startswith that identifier.

    
    Examples
    --------

    """
    
    urls = []
    for key, rec in records.items():
        for ref in rec.references:
            url = _parse_reference(ref, identifier)
            if url:
                urls.append(url)
    return sorted(set(urls))

In [8]:
service_urls(csw.records, identifier='OGC:SOS')

['http://data.ioos.us/thredds/sos/deployments/drudnick/sp025-20160913T1826/sp025-20160913T1826.nc3.nc?service=SOS&version=1.0.0&request=GetCapabilities']

In [9]:
service_urls(csw.records, identifier='OPeNDAP:OPeNDAP')

['http://data.ioos.us/thredds/dodsC/deployments/drudnick/sp025-20160913T1826/sp025-20160913T1826.nc3.nc',
 'http://oos.soest.hawaii.edu/thredds/dodsC/hioos/model/atm/ncep_global/NCEP_Global_Atmospheric_Model_best.ncd',
 'http://oos.soest.hawaii.edu/thredds/dodsC/pacioos/hycom/global']

In [10]:
service_urls(csw.records, identifier='ERDDAP:griddap')

['http://oos.soest.hawaii.edu/erddap/griddap/HYCOM_Global_2D.graph',
 'http://oos.soest.hawaii.edu/erddap/griddap/HYCOM_Global_3D.graph',
 'http://oos.soest.hawaii.edu/erddap/griddap/NCEP_Global_Best.graph']

In [11]:
service_urls(csw.records, identifier='ERDDAP:tabledap')

[]

In [12]:
service_urls(csw.records, identifier='ERDDAP:')

['http://oos.soest.hawaii.edu/erddap/griddap/HYCOM_Global_2D.graph',
 'http://oos.soest.hawaii.edu/erddap/griddap/HYCOM_Global_3D.graph',
 'http://oos.soest.hawaii.edu/erddap/griddap/NCEP_Global_Best.graph']

Check all available schemes.

In [13]:
msg = 'geolink: {geolink}\nscheme: {scheme}\nURL: {url}\n'.format
for key, value in list(csw.records.items()):
    print('[{}]:\n'.format(value.title, key))
    
    for ref in value.references:
        print(msg(geolink=sniff_link(ref['url']), **ref))

[Directional wave and sea surface temperature measurements collected in situ by Datawell Mark 3 directional buoy located near SAN FRANCISCO BAR, CA from 2016/04/20 17:00:00 to 2016/11/21 23:53:57.]:

[Directional wave and sea surface temperature measurements collected in situ by Datawell Mark 3 directional buoy located near SCRIPPS NEARSHORE, CA from 2015/01/07 23:00:00 to 2016/11/22 00:01:07.]:

[HYbrid Coordinate Ocean Model (HYCOM): Global]:

geolink: None
scheme: http
URL: http://oos.soest.hawaii.edu/thredds/idd/ocn_mod.html?dataset=hycom_global

geolink: WWW:LINK
scheme: http
URL: http://oos.soest.hawaii.edu/thredds/dodsC/pacioos/hycom/global.html

geolink: None
scheme: http
URL: http://pacioos.org/voyager/index.html?b=-85.200475%2C-180%2C85.200475%2C180&o=ofore:5:f:d1

geolink: None
scheme: NOAA:LAS
URL: http://oos.soest.hawaii.edu/las/UI.vm?catid=hycom_global&dsid=hycom_global&varid=temperature-hycom_global

geolink: None
scheme: ERDDAP:griddap
URL: http://oos.soest.hawaii.edu/e