diff --git a/CHANGELOG.txt b/CHANGELOG.txt index b4da50b12bb..f90ebacb89a 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -72,6 +72,7 @@ master: (doi: 10.5281/zenodo.165135) #1685). * Adding mappings for the TEXNET (see #1852) and the ICGC (see #1902) services. + * Support for the non-standard EIDA token authentication (see #1928). - obspy.imaging: * The functionality behind the `obspy-scan` command line script has been refactored into a `Scanner` class so that it can be reused in custom diff --git a/obspy/clients/fdsn/client.py b/obspy/clients/fdsn/client.py index 8b5a29458a1..667dc8d830a 100644 --- a/obspy/clients/fdsn/client.py +++ b/obspy/clients/fdsn/client.py @@ -30,6 +30,7 @@ import obspy from obspy import UTCDateTime, read_inventory +from obspy.core.compatibility import urlparse from .header import (DEFAULT_PARAMETERS, DEFAULT_USER_AGENT, FDSNWS, OPTIONAL_PARAMETERS, PARAMETER_ALIASES, URL_MAPPINGS, WADL_PARAMETERS_NOT_TO_BE_PARSED, FDSNException, @@ -138,7 +139,8 @@ def _validate_base_url(cls, base_url): def __init__(self, base_url="IRIS", major_versions=None, user=None, password=None, user_agent=DEFAULT_USER_AGENT, debug=False, - timeout=120, service_mappings=None, force_redirect=False): + timeout=120, service_mappings=None, force_redirect=False, + eida_token=None): """ Initializes an FDSN Web Service client. @@ -193,10 +195,18 @@ def __init__(self, base_url="IRIS", major_versions=None, user=None, when a redirect is discovered. This is done to improve security. Settings this flag to ``True`` will force all redirects to be followed even if credentials are given. + :type eida_token: str + :param eida_token: Token for EIDA authentication mechanism, see + http://geofon.gfz-potsdam.de/waveform/archive/auth/index.php. If a + token is provided, options ``user`` and ``password`` must not be + used. This mechanism is only available on select EIDA nodes. The + token can be provided in form of the PGP message as a string, or + the filename of a local file with the PGP message in it. """ self.debug = debug self.user = user self.timeout = timeout + self._force_redirect = force_redirect # Cache for the webservice versions. This makes interactive use of # the client more convenient. @@ -220,23 +230,7 @@ def __init__(self, base_url="IRIS", major_versions=None, user=None, self.base_url = base_url - # Only add the authentication handler if required. - handlers = [] - if user is not None and password is not None: - # Create an OpenerDirector for HTTP Digest Authentication - password_mgr = urllib_request.HTTPPasswordMgrWithDefaultRealm() - password_mgr.add_password(None, base_url, user, password) - handlers.append(urllib_request.HTTPDigestAuthHandler(password_mgr)) - - if (user is None and password is None) or force_redirect is True: - # Redirect if no credentials are given or the force_redirect - # flag is True. - handlers.append(CustomRedirectHandler()) - else: - handlers.append(NoRedirectionHandler()) - - # Don't install globally to not mess with other codes. - self._url_opener = urllib_request.build_opener(*handlers) + self._set_opener(user, password) self.request_headers = {"User-Agent": user_agent} # Avoid mutable kwarg. @@ -261,6 +255,129 @@ def __init__(self, base_url="IRIS", major_versions=None, user=None, self._discover_services() + # Use EIDA token if provided - this requires setting new url openers. + # + # This can only happen after the services have been discovered as + # the clients needs to know if the fdsnws implementation has support + # for the EIDA token system. + # + # This is a non-standard feature but we support it, given the number + # of EIDA nodes out there. + if eida_token is not None: + # Make sure user/pw are not also given. + if user is not None or password is not None: + msg = ("EIDA authentication token provided, but " + "user and password are also given.") + raise FDSNException(msg) + self.set_eida_token(eida_token) + + @property + def _has_eida_auth(self): + return self.services.get('eida-auth', False) + + def set_credentials(self, user, password): + """ + Set user and password resulting in subsequent web service + requests for waveforms being authenticated for potential access to + restricted data. + + This will overwrite any previously set-up credentials/authentication. + + :type user: str + :param user: User name of credentials. + :type password: str + :param password: Password for given user name. + """ + self._set_opener(user, password) + + def set_eida_token(self, token): + """ + Fetch user and password from the server using the provided token, + resulting in subsequent web service requests for waveforms being + authenticated for potential access to restricted data. + This only works for select EIDA nodes and relies on the auth mechanism + described here: + http://geofon.gfz-potsdam.de/waveform/archive/auth/index.php + + This will overwrite any previously set-up credentials/authentication. + + :type token: str + :param token: Token for EIDA authentication mechanism, see + http://geofon.gfz-potsdam.de/waveform/archive/auth/index.php. + This mechanism is only available on select EIDA nodes. The token + can be provided in form of the PGP message as a string, or the + filename of a local file with the PGP message in it. + """ + user, password = self._resolve_eida_token(token) + self.set_credentials(user, password) + + def _set_opener(self, user, password): + # Only add the authentication handler if required. + handlers = [] + if user is not None and password is not None: + # Create an OpenerDirector for HTTP Digest Authentication + password_mgr = urllib_request.HTTPPasswordMgrWithDefaultRealm() + password_mgr.add_password(None, self.base_url, user, password) + handlers.append(urllib_request.HTTPDigestAuthHandler(password_mgr)) + + if (user is None and password is None) or self._force_redirect is True: + # Redirect if no credentials are given or the force_redirect + # flag is True. + handlers.append(CustomRedirectHandler()) + else: + handlers.append(NoRedirectionHandler()) + + # Don't install globally to not mess with other codes. + self._url_opener = urllib_request.build_opener(*handlers) + if self.debug: + print('Installed new opener with handlers: {!s}'.format(handlers)) + + def _resolve_eida_token(self, token): + """ + Use the token to get credentials. + """ + if not self._has_eida_auth: + msg = ("EIDA token authentication requested but service at '{}' " + "does not specify /dataselect/auth in the " + "dataselect/application.wadl.").format(self.base_url) + raise FDSNException(msg) + + token_file = None + # check if there's a local file that matches the provided string + if os.path.isfile(token): + token_file = token + with open(token_file, 'rb') as fh: + token = fh.read().decode() + # sanity check on the token + if not _validate_eida_token(token): + if token_file: + msg = ("Read EIDA token from file '{}' but it does not " + "seem to contain a valid PGP message.").format( + token_file) + else: + msg = ("EIDA token does not seem to be a valid PGP message. " + "If you passed a filename, make sure the file " + "actually exists.") + raise ValueError(msg) + + # force https so that we don't send around tokens unsecurely + url = 'https://{}/fdsnws/dataselect/1/auth'.format( + urlparse(self.base_url).netloc + urlparse(self.base_url).path) + # paranoid: check again that we only send the token to https + if urlparse(url).scheme != "https": + msg = 'This should not happen, please file a bug report.' + raise Exception(msg) + + # Already does the error checking with fdsnws semantics. + response = self._download(url=url, data=token.encode(), + use_gzip=True, return_string=True) + + user, password = response.decode().split(':') + if self.debug: + print('Got temporary user/pw: {}/{}'.format(user, password)) + + return user, password + def get_events(self, starttime=None, endtime=None, minlatitude=None, maxlatitude=None, minlongitude=None, maxlongitude=None, latitude=None, longitude=None, minradius=None, @@ -1372,7 +1489,15 @@ def run(self): raise FDSNException("Timeout while requesting '%s'." % url) if "dataselect" in url: - self.services["dataselect"] = WADLParser(wadl).parameters + wadl_parser = WADLParser(wadl) + self.services["dataselect"] = wadl_parser.parameters + # check if EIDA auth endpoint is in wadl + # we need to attach it to the discovered services, as these are + # later loaded from cache and just attaching an attribute to + # this client won't help knowing later if EIDA auth is + # supported at the server. a bit ugly but can't be helped. + if wadl_parser._has_eida_auth: + self.services["eida-auth"] = True if self.debug is True: print("Discovered dataselect service") elif "event" in url and "application.wadl" in url: @@ -1390,7 +1515,6 @@ def run(self): except ValueError: msg = "Could not parse the catalogs at '%s'." % url warnings.warn(msg) - elif "event" in url and "contributors" in url: try: self.services["available_event_contributors"] = \ @@ -1776,6 +1900,17 @@ def get_bulk_string(bulk, arguments): return bulk +def _validate_eida_token(token): + """ + Just a basic check if the string contains something that looks like a PGP + message + """ + if re.search(pattern='BEGIN PGP MESSAGE', string=token, + flags=re.IGNORECASE): + return True + return False + + if __name__ == '__main__': import doctest doctest.testmod(exclude_empty=True) diff --git a/obspy/clients/fdsn/routing/eidaws_routing_client.py b/obspy/clients/fdsn/routing/eidaws_routing_client.py index 15cdaa73ad2..20f679aa5d5 100644 --- a/obspy/clients/fdsn/routing/eidaws_routing_client.py +++ b/obspy/clients/fdsn/routing/eidaws_routing_client.py @@ -35,7 +35,7 @@ class EIDAWSRoutingClient(BaseRoutingClient): """ def __init__(self, url="http://www.orfeus-eu.org/eidaws/routing/1", include_providers=None, exclude_providers=None, - debug=False, timeout=120): + debug=False, timeout=120, **kwargs): """ Initialize an EIDAWS router client. @@ -48,7 +48,8 @@ def __init__(self, url="http://www.orfeus-eu.org/eidaws/routing/1", """ BaseRoutingClient.__init__(self, debug=debug, timeout=timeout, include_providers=include_providers, - exclude_providers=exclude_providers) + exclude_providers=exclude_providers, + **kwargs) self._url = url @_assert_filename_not_in_kwargs diff --git a/obspy/clients/fdsn/routing/federator_routing_client.py b/obspy/clients/fdsn/routing/federator_routing_client.py index fce664461fe..08aaad70786 100644 --- a/obspy/clients/fdsn/routing/federator_routing_client.py +++ b/obspy/clients/fdsn/routing/federator_routing_client.py @@ -26,7 +26,7 @@ class FederatorRoutingClient(BaseRoutingClient): def __init__(self, url="http://service.iris.edu/irisws/fedcatalog/1", include_providers=None, exclude_providers=None, - debug=False, timeout=120): + debug=False, timeout=120, **kwargs): """ Initialize a federated routing client. @@ -39,7 +39,8 @@ def __init__(self, url="http://service.iris.edu/irisws/fedcatalog/1", """ BaseRoutingClient.__init__(self, debug=debug, timeout=timeout, include_providers=include_providers, - exclude_providers=exclude_providers) + exclude_providers=exclude_providers, + **kwargs) self._url = url # Parameters the routing service can work with. If this becomes a diff --git a/obspy/clients/fdsn/routing/routing_client.py b/obspy/clients/fdsn/routing/routing_client.py index d165894d361..da6a8e1ede2 100644 --- a/obspy/clients/fdsn/routing/routing_client.py +++ b/obspy/clients/fdsn/routing/routing_client.py @@ -41,6 +41,8 @@ def RoutingClient(routing_type, *args, **kwargs): # NOQA respectively. Remaining ``args`` and ``kwargs`` will be passed to the underlying classes. + For example, credentials can be supported for all underlying data centers. + See :meth:`BaseRoutingClient ` for details. >>> from obspy.clients.fdsn import RoutingClient @@ -84,13 +86,25 @@ def _assert_attach_response_not_in_kwargs(f, *args, **kwargs): def _download_bulk(r): - c = client.Client(r["endpoint"], debug=r["debug"], timeout=r["timeout"]) + # Figure out the passed credentials, if any. Two possibilities: + # (1) User and password, given explicitly for the base URLs (or an + # explicity given `eida_token` key per URL). + # (2) A global EIDA_TOKEN key. It will be used for all services that + # don't have explicit credentials and also support the `/auth` route. + credentials = r["credentials"].get(urlparse(r["endpoint"]).netloc, {}) + c = client.Client(r["endpoint"], debug=r["debug"], timeout=r["timeout"], + **credentials) + if not credentials and "EIDA_TOKEN" in r["credentials"] and \ + c._has_eida_auth: + c.set_eida_token(r["credentials"]["EIDA_TOKEN"]) + if r["data_type"] == "waveform": fct = c.get_waveforms_bulk service = c.services["dataselect"] elif r["data_type"] == "station": fct = c.get_stations_bulk service = c.services["station"] + # Keep only kwargs that are supported by this particular service. kwargs = {k: v for k, v in r["kwargs"].items() if k in service} bulk_str = "" @@ -112,7 +126,7 @@ def _strip_protocol(url): # get_events() but also others). class BaseRoutingClient(HTTPClient): def __init__(self, debug=False, timeout=120, include_providers=None, - exclude_providers=None): + exclude_providers=None, credentials=None): """ :type routing_type: str :param routing_type: The type of @@ -123,11 +137,39 @@ def __init__(self, debug=False, timeout=120, include_providers=None, :type include_providers: str or list of str :param include_providers: Get data only from these providers. Can be the full HTTP address of one of the shortcuts ObsPy knows about. + :type credentials: dict + :param credentials: Credentials for the individual data centers as a + dictionary that maps base url of FDSN web service to either + username/password or EIDA token, e.g. + ``credentials={ + 'geofon.gfz-potsdam.de': {'eida_token': 'my_token_file.txt'}, + 'service.iris.edu': {'user': 'me', 'password': 'my_pass'} + 'EIDA_TOKEN': '/path/to/token.txt' + }`` + The root level ``'EIDA_TOKEN'`` will be applied to all data centers + that claim to support the ``/auth`` route and don't have data + center specific credentials. + You can also use a URL mapping as for the normal FDSN client + instead of the URL. """ HTTPClient.__init__(self, debug=debug, timeout=timeout) self.include_providers = include_providers self.exclude_providers = exclude_providers + # Parse credentials. + self.credentials = {} + for key, value in (credentials or {}).items(): + if key == "EIDA_TOKEN": + self.credentials[key] = value + # Map, if necessary. + if key in URL_MAPPINGS: + key = URL_MAPPINGS[key] + # Make sure urlparse works correctly. + if not key.startswith("http"): + key = "http://" + key + # Only use the location. + self.credentials[urlparse(key).netloc] = value + @property def include_providers(self): return self.__include_providers @@ -197,7 +239,8 @@ def _download_parallel(self, split, data_type, **kwargs): "endpoint": k, "bulk_str": v, "data_type": data_type, - "kwargs": kwargs}) + "kwargs": kwargs, + "credentials": self.credentials}) pool = ThreadPool(processes=len(dl_requests)) results = pool.map(_download_bulk, dl_requests) diff --git a/obspy/clients/fdsn/tests/data/eida_token.txt b/obspy/clients/fdsn/tests/data/eida_token.txt new file mode 100644 index 00000000000..c450a21a931 --- /dev/null +++ b/obspy/clients/fdsn/tests/data/eida_token.txt @@ -0,0 +1,13 @@ +-----BEGIN PGP MESSAGE----- +Version: GnuPG v2.0.9 (GNU/Linux) + +owGbwMvMwMR4USg04vQMEQbG0+5JDOEWH+OrlXITM3OUrBSUSlKLSxzyk4oLKvXy +i9KVdBSUyhJzMlPiS/NKIAqMDIwMdA0MgSjEwMAKjKKUajsZZVgYGJkY2FiZQOYx +cHEKwCx5Lcv+T0W79KhOxauJLEnP7t0768ZTvzk1Z7XbnSSmqvB7/rfsRfOvSrMt +39GjosTgveqB7gHRYz0+jPP36dj073r2s8h6Xvu0+sAtLbfO6C3xNolRDPgQ4tud +m6J8tm573KGFFTG/+uI/hSir56Uav33xN1DBwKPs1pt8H6vFirf81J5Y3eR8wbmm +c9eXV5VeBy/9bYrd+TXsnFkiayjrxViWdm8uUXnj4hzDE5vvTeVZGH/nclVE1v6z +DO3n3STZvjMKsRUxVPKcnTmHcU64qiTX10bRqi1yHzuMs35Gzz0sMpXbTencLduG +hMrp91PMe04c0otcNytOKDFe/uDva3sd9jZKcxSXb3Gs2Rxf2QgA +=VVHX +-----END PGP MESSAGE----- diff --git a/obspy/clients/fdsn/tests/test_client.py b/obspy/clients/fdsn/tests/test_client.py index 39692d7f263..85cbd9af16a 100644 --- a/obspy/clients/fdsn/tests/test_client.py +++ b/obspy/clients/fdsn/tests/test_client.py @@ -12,6 +12,7 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) from future.builtins import * # NOQA +from future.utils import PY2 import io import os @@ -21,13 +22,19 @@ import warnings from difflib import Differ +if PY2: + import urllib2 as urllib_request +else: + import urllib.request as urllib_request + import lxml +import numpy as np import requests -from obspy import UTCDateTime, read, read_inventory +from obspy import UTCDateTime, read, read_inventory, Stream, Trace from obspy.core.compatibility import mock from obspy.core.util.base import NamedTemporaryFile -from obspy.clients.fdsn import Client +from obspy.clients.fdsn import Client, RoutingClient from obspy.clients.fdsn.client import build_url, parse_simple_xml from obspy.clients.fdsn.header import (DEFAULT_USER_AGENT, URL_MAPPINGS, FDSNException, FDSNRedirectException, @@ -1217,6 +1224,135 @@ def test_no_data(self): endtime=UTCDateTime("2001-01-07T01:01:00"), minmagnitude=8) + def test_eida_token_resolution(self): + """ + Tests that EIDA tokens are resolved correctly and new credentials get + installed with the opener of the Client. + """ + token = os.path.join(self.datapath, 'eida_token.txt') + with open(token, 'rb') as fh: + token_data = fh.read().decode() + + def _assert_eida_user_and_password(user, password): + # user/pass is not static for the static test token + for value in user, password: + # seems safe to assume both user and password are at least 10 + # chars long + # example user/password: + # wWGgJnH4GvdVY7gDMH21xEpb wDnzlpljqdaCXlP2 + re.match('^[a-zA-Z0-9]{10,}$', value) + + def _get_http_digest_auth_handler(client): + handlers = [h for h in client._url_opener.handlers + if isinstance(h, urllib_request.HTTPDigestAuthHandler)] + self.assertLessEqual(len(handlers), 1) + return handlers and handlers[0] or None + + def _assert_credentials(client, user, password): + handler = _get_http_digest_auth_handler(client) + self.assertIsInstance(handler, + urllib_request.HTTPDigestAuthHandler) + for user_, password_ in handler.passwd.passwd[None].values(): + self.assertEqual(user, user_) + self.assertEqual(password, password_) + + client = Client('GFZ') + # this is a plain client, so it should not have http digest auth + self.assertEqual(_get_http_digest_auth_handler(client), None) + # now, if we set new user/password, we should get a http digest auth + # handler + user, password = ("spam", "eggs") + client._set_opener(user=user, password=password) + _assert_credentials(client, user, password) + # now, if we resolve the EIDA token, the http digest auth handler + # should change + user, password = client._resolve_eida_token(token=token) + _assert_eida_user_and_password(user, password) + client._set_opener(user=user, password=password) + _assert_credentials(client, user, password) + # do it again, now providing the token data directly as a string (first + # change the authentication again to dummy user/password + client._set_opener(user="foo", password="bar") + _assert_credentials(client, "foo", "bar") + user, password = client._resolve_eida_token(token=token_data) + _assert_eida_user_and_password(user, password) + client.set_eida_token(token_data) + _assert_credentials(client, user, password) + + # Raise if token and user/pw are given. + with self.assertRaises(FDSNException) as err: + Client('GFZ', eida_token=token, user="foo", password="bar") + self.assertEqual( + err.exception.args[0], + "EIDA authentication token provided, but user and password are " + "also given.") + + # now lets test the RoutingClient with credentials.. + credentials_ = {'geofon.gfz-potsdam.de': {'eida_token': token}} + credentials_mapping_ = {'GFZ': {'eida_token': token}} + global_eida_credentials_ = {'EIDA_TOKEN': token} + for credentials, should_have_credentials in zip( + (None, credentials_, credentials_mapping_, + global_eida_credentials_), (False, True, True, True)): + def side_effect(self_, *args, **kwargs): + """ + This mocks out Client.get_waveforms_bulk which gets called by + the routing client, checks authentication handlers and returns + a dummy stream. + """ + # check that we're at the expected FDSN WS server + self.assertEqual('http://geofon.gfz-potsdam.de', + self_.base_url) + # check if credentials were used + # eida auth availability should be positive in all cases + self.assertTrue(self_._has_eida_auth) + # depending on whether we specified credentials, the + # underlying FDSN client should have EIDA authentication + # flag and should also have a HTTP digest handler with + # appropriate user/password + handler = _get_http_digest_auth_handler(self_) + if should_have_credentials: + for user, password in handler.passwd.passwd[None].values(): + _assert_eida_user_and_password(user, password) + else: + self.assertEqual(handler, None) + # just always return some dummy stream, we're not + # interested in checking the data downloading which + # succeeds regardless if auth is used or not as it's public + # data + return Stream([Trace(data=np.ones(2))]) + + with mock.patch( + 'obspy.clients.fdsn.client.Client.get_waveforms_bulk', + autospec=True) as p: + + p.side_effect = side_effect + + routing_client = RoutingClient('eida-routing', + credentials=credentials) + # do a waveform request on the routing client which internally + # connects to the GFZ FDSNWS. this should be done using the + # above supplied credentials, i.e. should use the given EIDA + # token to resolve user/password for the normal FDSN queryauth + # mechanism + routing_client.get_waveforms( + network="GE", station="KMBO", location="00", channel="BHZ", + starttime=UTCDateTime("2010-02-27T06:30:00.000"), + endtime=UTCDateTime("2010-02-27T06:40:00.000")) + + # test invalid token/token file + with self.assertRaisesRegexp( + ValueError, + 'EIDA token does not seem to be a valid PGP message'): + client = Client('GFZ', eida_token="spam") + with self.assertRaisesRegexp( + ValueError, + "Read EIDA token from file '[^']*event_helpstring.txt' but it " + "does not seem to contain a valid PGP message."): + client = Client( + 'GFZ', eida_token=os.path.join(self.datapath, + 'event_helpstring.txt')) + def suite(): return unittest.makeSuite(ClientTestCase, 'test') diff --git a/obspy/clients/fdsn/wadl_parser.py b/obspy/clients/fdsn/wadl_parser.py index d8c05f010ce..d336f89cf7c 100644 --- a/obspy/clients/fdsn/wadl_parser.py +++ b/obspy/clients/fdsn/wadl_parser.py @@ -33,11 +33,13 @@ def __init__(self, wadl_string): self.nsmap = doc.nsmap self._ns = self.nsmap.get(None, None) self.parameters = {} + self._has_eida_auth = False # Get the url. url = self._xpath(doc, "/application/resources")[0].get("base") if "dataselect" in url: self._wadl_type = "dataselect" + self._has_eida_auth = self._check_for_eida_auth(doc) elif "station" in url: self._wadl_type = "station" elif "event" in url: @@ -237,6 +239,10 @@ def _xpath(self, doc, expr): nsmap[default_abbreviation] = self._ns return doc.xpath(expr, namespaces=nsmap) + def _check_for_eida_auth(self, doc): + auth_endpoint = self._xpath(doc, '//resources/resource[@path="auth"]') + return auth_endpoint and True or False + if __name__ == '__main__': import doctest