# Use CSW to find model data at NODC, NGDC, DATA.GOV, and PACIOOS

In [1]:
from owslib.csw import CatalogueServiceWeb
from owslib import fes
import netCDF4
import numpy as np

## Find model results at NODC (geoportal)

In [2]:
endpoint = 'http://www.nodc.noaa.gov/geoportal/csw'   # NODC/UAF Geoportal: granule level
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version

2.0.2


In [3]:
csw.get_operation_by_name('GetRecords').constraints

[Constraint: SupportedCommonQueryables - ['Subject', 'Title', 'Abstract', 'AnyText', 'Format', 'Identifier', 'Modified', 'Type', 'BoundingBox'],
 Constraint: SupportedISOQueryables - ['apiso:Subject', 'apiso:Title', 'apiso:Abstract', 'apiso:AnyText', 'apiso:Format', 'apiso:Identifier', 'apiso:Modified', 'apiso:Type', 'apiso:BoundingBox', 'apiso:CRS.Authority', 'apiso:CRS.ID', 'apiso:CRS.Version', 'apiso:RevisionDate', 'apiso:AlternateTitle', 'apiso:CreationDate', 'apiso:PublicationDate', 'apiso:OrganizationName', 'apiso:HasSecurityConstraints', 'apiso:Language', 'apiso:ResourceIdentifier', 'apiso:ParentIdentifier', 'apiso:KeywordType', 'apiso:TopicCategory', 'apiso:ResourceLanguage', 'apiso:GeographicDescriptionCode', 'apiso:Denominator', 'apiso:DistanceValue', 'apiso:DistanceUOM', 'apiso:TempExtent_begin', 'apiso:TempExtent_end', 'apiso:ServiceType', 'apiso:ServiceTypeVersion', 'apiso:Operation', 'apiso:OperatesOn', 'apiso:OperatesOnIdentifier', 'apiso:OperatesOnName', 'apiso:Coupling

In [4]:
val = 'level'
filter1 = fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter_list = [ filter1 ]

In [5]:
csw.getrecords2(constraints=filter_list,maxrecords=100,esn='full')
len(csw.records.keys())

100

In [6]:
choice=np.random.choice(list(csw.records.keys()))
print(csw.records[choice].title)
csw.records[choice].references

SAMOS project underway oceanographic and quality-controlled meteorological data collected aboard the MILLER FREEMAN from 2008-02-01 to 2008-02-29 (NODC Accession 0044755)


[{'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink',
  'url': 'http://www.ncei.noaa.gov/'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink',
  'url': 'http://www.nodc.noaa.gov/'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink',
  'url': 'http://www.coaps.fsu.edu/'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink',
  'url': 'http://www.coaps.fsu.edu/'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Thumbnail',
  'url': 'http://data.nodc.noaa.gov/cgi-bin/gfx?id=gov.noaa.nodc:0044755'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document',
  'url': 'http://www.nodc.noaa.gov/geoportal/csw?getxml=%7B3F0C3B7C-CCD5-48CF-A6EB-E68A21E3228B%7D'},
 {'scheme': 'urn:x-esri:specification:ServiceType:TDS',
  'url': 'http://data.nodc.noaa.gov/thredds/catalog/coaps/samos/WTDM/2008/02/'},
 {'scheme': 'urn:x-esri:specification:ServiceType:FTP',
  'url': 'ftp://ft

## Find model results at NGDC  (geoportal)

In [161]:
endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' #  NGDC/IOOS Geoportal
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version

'2.0.2'

In [162]:
csw.get_operation_by_name('GetRecords').constraints

[Constraint: SupportedCommonQueryables - ['Subject', 'Title', 'Abstract', 'AnyText', 'Format', 'Identifier', 'Modified', 'Type', 'BoundingBox'],
 Constraint: SupportedISOQueryables - ['apiso:Subject', 'apiso:Title', 'apiso:Abstract', 'apiso:AnyText', 'apiso:Format', 'apiso:Identifier', 'apiso:Modified', 'apiso:Type', 'apiso:BoundingBox', 'apiso:CRS.Authority', 'apiso:CRS.ID', 'apiso:CRS.Version', 'apiso:RevisionDate', 'apiso:AlternateTitle', 'apiso:CreationDate', 'apiso:PublicationDate', 'apiso:OrganizationName', 'apiso:HasSecurityConstraints', 'apiso:Language', 'apiso:ResourceIdentifier', 'apiso:ParentIdentifier', 'apiso:KeywordType', 'apiso:TopicCategory', 'apiso:ResourceLanguage', 'apiso:GeographicDescriptionCode', 'apiso:Denominator', 'apiso:DistanceValue', 'apiso:DistanceUOM', 'apiso:TempExtent_begin', 'apiso:TempExtent_end', 'apiso:ServiceType', 'apiso:ServiceTypeVersion', 'apiso:Operation', 'apiso:OperatesOn', 'apiso:OperatesOnIdentifier', 'apiso:OperatesOnName', 'apiso:Coupling

In [163]:
try:
    csw.get_operation_by_name('GetDomain')
    csw.getdomain('apiso:ServiceType', 'property')
    print(csw.results['values'])
except:
    print('GetDomain not supported')

GetDomain not supported


In [164]:
val = 'level'
filter1 = fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter_list = [ filter1 ]

In [165]:
val = 'erddap'
filter1 = fes.PropertyIsLike(propertyname='apiso:ServiceType',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter_list = [ filter1 ]

In [166]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())

104

In [167]:
choice=np.random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references

I01_met_all


[{'scheme': 'urn:x-esri:specification:ServiceType:distribution:url',
  'url': 'http://www.neracoos.org/erddap/tabledap/I01_met_all.html'},
 {'scheme': 'urn:x-esri:specification:ServiceType:distribution:url',
  'url': 'http://www.neracoos.org/erddap/tabledap/I01_met_all.graph'},
 {'scheme': 'urn:x-esri:specification:ServiceType:download:url',
  'url': 'http://www.neracoos.org/erddap/tabledap/I01_met_all.html'}]

## Find model data at CATALOG.DATA.GOV (pycsw)

In [56]:
endpoint = 'http://catalog.data.gov/csw-all' #  catalog.data.gov CSW
#endpoint = 'http://csw.data.gov.uk/geonetwork/srv/en/csw' # data.gov.uk
#endpoint = 'http://www.nationaalgeoregister.nl/geonetwork/srv/eng/csw'
#endpoint = 'http://www.rndt.gov.it/RNDT/CSW'
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version

'2.0.2'

In [57]:
for oper in csw.operations:
    print(oper.name)

GetCapabilities
GetRepositoryItem
DescribeRecord
GetDomain
GetRecordById
GetRecords


In [58]:
csw.get_operation_by_name('GetRecords').constraints

[Constraint: SupportedISOQueryables - ['apiso:DistanceValue', 'apiso:Abstract', 'apiso:RevisionDate', 'apiso:Subject', 'apiso:KeywordType', 'apiso:Title', 'apiso:CRS', 'apiso:PublicationDate', 'apiso:Type', 'apiso:AlternateTitle', 'apiso:BoundingBox', 'apiso:AnyText', 'apiso:ParentIdentifier', 'apiso:Modified', 'apiso:Operation', 'apiso:Format', 'apiso:TempExtent_end', 'apiso:DistanceUOM', 'apiso:OrganisationName', 'apiso:ServiceType', 'apiso:TempExtent_begin', 'apiso:ResourceLanguage', 'apiso:ServiceTypeVersion', 'apiso:OperatesOn', 'apiso:Denominator', 'apiso:HasSecurityConstraints', 'apiso:OperatesOnIdentifier', 'apiso:GeographicDescriptionCode', 'apiso:Language', 'apiso:Identifier', 'apiso:OperatesOnName', 'apiso:TopicCategory', 'apiso:CreationDate', 'apiso:CouplingType'],
 Constraint: AdditionalQueryables - ['apiso:Lineage', 'apiso:Classification', 'apiso:Creator', 'apiso:Relation', 'apiso:OtherConstraints', 'apiso:SpecificationTitle', 'apiso:ResponsiblePartyRole', 'apiso:Specific

In [59]:
try:
    csw.get_operation_by_name('GetDomain')
    csw.getdomain('apiso:ServiceType', 'property')
    print(csw.results['values'])
except:
    print('GetDomain not supported')

['ArcGIS REST API for 10', 'urn:ogc:serviceType:WebMapService']


In [60]:
val = 'salinity'
filter1 = fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter_list = [ filter1 ]

In [61]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())

10

In [None]:
csw.getrecords2()

In [51]:
choice=np.random.choice(list(csw.records.keys()))
print csw.records[choice].title
csw.records[choice].references

USGS US Topo 7.5-minute map for Lindstrom Peak, ID 2011


[{'scheme': 'None',
  'url': 'http://ims.er.usgs.gov/gda_services/download?item_id=5243915'}]

## Search at geoport.whoi.edu (geoportal)

In [None]:
endpoint = 'http://geoport.whoi.edu/geoportal/csw' #  catalog.data.gov CSW
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version

In [None]:
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']

In [None]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())

In [None]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references

## Search at PACIOOS (pycsw)

In [175]:
endpoint='http://oos.soest.hawaii.edu/pacioos/ogc/csw.py'

In [176]:
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version

'2.0.2'

In [177]:
csw.get_operation_by_name('GetRecords').constraints

[Constraint: SupportedISOQueryables - ['apiso:DistanceValue', 'apiso:Abstract', 'apiso:RevisionDate', 'apiso:Subject', 'apiso:KeywordType', 'apiso:Title', 'apiso:CRS', 'apiso:PublicationDate', 'apiso:Type', 'apiso:AlternateTitle', 'apiso:BoundingBox', 'apiso:AnyText', 'apiso:ParentIdentifier', 'apiso:Modified', 'apiso:Operation', 'apiso:Format', 'apiso:TempExtent_end', 'apiso:DistanceUOM', 'apiso:OrganisationName', 'apiso:ServiceType', 'apiso:TempExtent_begin', 'apiso:ResourceLanguage', 'apiso:ServiceTypeVersion', 'apiso:OperatesOn', 'apiso:Denominator', 'apiso:HasSecurityConstraints', 'apiso:OperatesOnIdentifier', 'apiso:GeographicDescriptionCode', 'apiso:Language', 'apiso:Identifier', 'apiso:OperatesOnName', 'apiso:TopicCategory', 'apiso:CreationDate', 'apiso:CouplingType'],
 Constraint: AdditionalQueryables - ['apiso:Lineage', 'apiso:Classification', 'apiso:Creator', 'apiso:Relation', 'apiso:OtherConstraints', 'apiso:SpecificationTitle', 'apiso:ResponsiblePartyRole', 'apiso:Specific

In [178]:
try:
    csw.get_operation_by_name('GetDomain')
    csw.getdomain('apiso:ServiceType', 'property')
    print(csw.results['values'])
except:
    print('GetDomain not supported')

GetDomain not supported


In [179]:
val = 'ROMS'
filter1 = fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter_list = [ filter1 ]

In [180]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())

HTTPError: HTTP Error 404: Not Found

In [181]:
choice=np.random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references

AttributeError: CatalogueServiceWeb instance has no attribute 'records'

Working!  Woo hoo!!! 

## EPA

In [None]:
endpoint = 'https://edg.epa.gov/metadata/csw'
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version

In [None]:
# trying to do this search:
# ('roms' OR 'selfe' OR 'adcirc' OR 'ncom' OR 'hycom' OR 'fvcom') AND 'ocean' NOT 'regridded' NOT 'espresso'
# should return 11 records from NODC geoportal

search_text = ['waves','selfe','adcirc','ncom','hycom','fvcom']
filt=[]
for val in search_text:
    filt.append(fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                                   escapeChar='\\',wildCard='*',singleChar='?'))
filter1=fes.Or(filt)

val = 'ocean'
filter2=fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')

val = 'regridded'
filt=fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter3 = fes.Not([filt])

val = 'espresso'
filt=fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter4 = fes.Not([filt])


filter_list = [fes.And([filter1, filter2, filter3, filter4])]

In [None]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())

In [None]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references

In [None]:
try:
    csw.get_operation_by_name('GetDomain')
    csw.getdomain('apiso:SupportedISOQueryables', 'apiso:ServiceType')
    print(csw.results['values'])
except:
    print('GetDomain not supported')

## USGS CGDMS server (geonetwork)

In [None]:
endpoint = 'http://cmgds.marine.usgs.gov/geonetwork/srv/en/csw'
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version

## USGS CIDA Server (geonetwork)

In [None]:
endpoint = 'http://cida.usgs.gov/gdp/geonetwork/srv/en/csw'
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version

In [None]:
foo=csw.get_operation_by_name('GetRecords')
foo.constraints

In [None]:
try:
    csw.get_operation_by_name('GetDomain')
    csw.getdomain('SupportedISOQueryables', 'ServiceType')
    print(csw.results['values'])
except:
    print('GetDomain not supported')

In [None]:
val = 'dap'
service_type = fes.PropertyIsLike(propertyname='ServiceType',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter_list = [ service_type]

In [None]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())

In [None]:
val = 'climate'
filter1 = fes.PropertyIsLike(propertyname='AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter_list = [ filter1 ]

In [None]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())

In [None]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references

## INSPIRE

In [63]:
endpoint ='http://inspire-geoportal.ec.europa.eu/GeoportalProxyWebServices/resources/OGCCSW202/AT'
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version

timeout: timed out

In [None]:
csw.get_operation_by_name('GetRecords').constraints

In [14]:
try:
    csw.get_operation_by_name('GetDomain')
    csw.getdomain('apiso:ServiceType', 'property')
    print(csw.results['values'])
except:
    print('GetDomain not supported')

GetDomain not supported


In [15]:
val = 'data'
filter1 = fes.PropertyIsLike(propertyname='AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter_list = [ filter1 ]

In [16]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())

1000

In [62]:
choice=random.choice(list(csw.records.keys()))
print(csw.records[choice].title)
csw.records[choice].references

20130208-MODIS_A-JPL-L2P-A2013039075500.L2_LAC_GHRSST_N-v01.nc.bz2


[{'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink',
  'url': 'http://www.nodc.noaa.gov/SatelliteData/ghrsst/'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document',
  'url': 'http://www.nodc.noaa.gov/geoportal/csw?getxml=%7BED84C0FD-766F-4E20-812E-5C358AB0D82C%7D'},
 {'scheme': 'urn:x-esri:specification:ServiceType:TDS',
  'url': 'http://data.nodc.noaa.gov/thredds/catalog/ghrsst/L2P/MODIS_A/JPL/2013/039/catalog.html?dataset=ghrsst/L2P/MODIS_A/JPL/2013/039/20130208-MODIS_A-JPL-L2P-A2013039075500.L2_LAC_GHRSST_N-v01.nc.bz2'},
 {'scheme': 'urn:x-esri:specification:ServiceType:FTP',
  'url': 'ftp://ftp.nodc.noaa.gov/pub/data.nodc/ghrsst/L2P/MODIS_A/JPL/2013/039/20130208-MODIS_A-JPL-L2P-A2013039075500.L2_LAC_GHRSST_N-v01.nc.bz2'},
 {'scheme': 'urn:x-esri:specification:ServiceType:OPeNDAP',
  'url': 'http://data.nodc.noaa.gov/opendap/ghrsst/L2P/MODIS_A/JPL/2013/039/20130208-MODIS_A-JPL-L2P-A2013039075500.L2_LAC_GHRSST_N-v01.nc.bz2.html'},
 {'sch

## Italy

In [18]:
endpoint = 'http://www.rndt.gov.it/RNDT/CSW'
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version

'2.0.2'

In [19]:
csw.get_operation_by_name('GetRecords').constraints

[Constraint: SupportedISOQueryables - ['OrganisationName', 'Type', 'ServiceType', 'RevisionDate', 'AnyText', 'PublicationDate', 'ResourceIdentifier', 'ParentIdentifier', 'Identifier', 'TopicCategory', 'DistanceValue', 'DistanceUOM', 'TempExtent_end', 'Subject', 'CreationDate', 'Title', 'Denominator', 'Language', 'TempExtent_begin', 'Abstract'],
 Constraint: AdditionalQueryables - ['SpecificationDate', 'SpecificationDateType', 'ConditionApplyingToAccessAndUse', 'AccessConstraints', 'Classification', 'OtherConstraints', 'Degree', 'Lineage', 'SpecificationTitle', 'ResponsiblePartyRole']]

In [27]:
val = 'water'
filter1 = fes.PropertyIsLike(propertyname='AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter_list = [ filter1 ]

In [28]:
csw.getrecords2(constraints=filter_list,maxrecords=100,esn='full')
len(csw.records.keys())

28

In [53]:
choice=random.choice(list(csw.records.keys()))
print(csw.records[choice].title)
print(csw.records[choice].references)

MOLISEDB.GIS.MO_PTA24G_staz_monit
[]


In [152]:
endpoint='http://172.21.173.15/geonetwork/srv/eng/csw'
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version

URLError: <urlopen error timed out>