diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 5301be070c..8f830a8919 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -18,7 +18,9 @@ Changes: from scipy with version 1.6, add new window 'dpss' that scipy promotes as replacement but might need other parameters passed in (see #3331) - obspy.clients.filesystem: - * update syntax for SQLAlchemy 2.0 compatibility (see #3269) + * tsindex: update syntax for SQLAlchemy 2.0 compatibility (see #3269) + * tsindex: leap second handling was deactivated as it is not needed with + current msindex (see #3403) - obspy.clients.fdsn * Natural Resources Canada (NRCAN) added to list of known clients - obspy.clients.seedlink: diff --git a/obspy/clients/filesystem/tests/test_tsindex.py b/obspy/clients/filesystem/tests/test_tsindex.py index 48ddf89b0d..8623be6082 100644 --- a/obspy/clients/filesystem/tests/test_tsindex.py +++ b/obspy/clients/filesystem/tests/test_tsindex.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import os import re -import requests import tempfile import uuid from collections import namedtuple @@ -13,7 +12,6 @@ from sqlalchemy.orm import sessionmaker from obspy import read, UTCDateTime -from obspy.core.util.misc import TemporaryWorkingDirectory from obspy.clients.filesystem.tsindex import Client, Indexer, \ TSIndexDatabaseHandler @@ -568,7 +566,7 @@ def test_bad_rootpath(self, filepath): database = os.path.join(filepath, 'timeseries.sqlite') - # test that a bad leap second file path raises an error + # test that a bad root path raises an error with pytest.raises(OSError, match="^Root path.*does not exists.$"): Indexer("/some/bad/path", database=database, filename_pattern="*.mseed", parallel=2) @@ -592,70 +590,6 @@ def test_bad_database(self, filepath): with pytest.raises(ValueError, match=match): Indexer(filepath, database=None, filename_pattern="*.mseed") - def test_download_leap_seconds_file(self): - with TemporaryWorkingDirectory() as tempdir: - database = os.path.join(tempdir, 'timeseries.sqlite') - indexer = Indexer(tempdir, - database=database) - # mock actually downloading the file since this requires a internet - # connection - indexer._download = mock.MagicMock( - return_value=requests.Response()) - # create a empty leap-seconds.list file - test_file = os.path.join( - os.path.dirname(database), "leap-seconds.list") - file_path = indexer.download_leap_seconds_file(test_file) - # assert that the file was put in the same location as the - # sqlite db - assert os.path.isfile(file_path) - assert file_path == test_file - - def test_download_leap_seconds_file_no_path_given(self): - with TemporaryWorkingDirectory() as tempdir: - database = os.path.join(tempdir, 'timeseries.sqlite') - indexer = Indexer(tempdir, - database=database) - # mock actually downloading the file since this requires a internet - # connection - indexer._download = mock.MagicMock( - return_value=requests.Response()) - file_path = indexer.download_leap_seconds_file() - - assert os.path.normpath(file_path) == \ - os.path.normpath(os.path.join(os.path.dirname(database), - "leap-seconds.list")) - - # assert that the file was put in the same location as the - # sqlite db - assert os.path.isfile(file_path) - - def test__get_leap_seconds_file(self, filepath): - database = os.path.join(filepath, 'timeseries.sqlite') - indexer = Indexer(filepath, - database=database) - - # test that a bad leap second file path raises an error - with pytest.raises(OSError, - match="^No leap seconds file exists at.*$"): - Indexer(filepath, database=database, - leap_seconds_file="/some/bad/path/") - with pytest.raises(OSError, - match="^No leap seconds file exists at.*$"): - indexer._get_leap_seconds_file("/some/bad/path/") - - # test search - # create a empty leap-seconds.list file - with TemporaryWorkingDirectory() as tempdir: - database = os.path.join(tempdir, 'timeseries.sqlite') - indexer = Indexer(tempdir, - database=database) - test_file = os.path.normpath(os.path.join( - os.path.dirname(database), "leap-seconds.list")) - open(test_file, 'a').close() - file_path = os.path.normpath( - indexer._get_leap_seconds_file("SEARCH")) - assert file_path == test_file - def test_build_file_list(self, filepath): database = os.path.join(filepath, 'timeseries.sqlite') indexer = Indexer(filepath, @@ -689,8 +623,7 @@ def test_build_file_list(self, filepath): # this time pass a TSIndexDatabaseHandler instance as the database indexer = Indexer(filepath, database=TSIndexDatabaseHandler(database=database), - filename_pattern="*.mseed", - leap_seconds_file=None) + filename_pattern="*.mseed") file_list = indexer.build_file_list(reindex=True) file_list.sort() assert len(file_list) == 3 @@ -769,16 +702,16 @@ def test_run(self, filepath): expected_tsindex_data = \ [ NamedRow( - "CU", "TGUH", "00", "BHZ", "M", - "2018-01-01T00:00:00.000000", - "2018-01-01T00:01:00.000000", 40.0, + "CU", "TGUH", "00", "BHZ", None, + "2018-01-01T00:00:00", + "2018-01-01T00:01:00", 40.0, "CU/2018/001/" "CU.TGUH.00.BHZ.2018.001_first_minute.mseed", 0, 4096, "aaaac5315f84cdd174fd8360002a1e3a", "1514764800.000000=>0,latest=>1", "[1514764800.000000:1514764860.000000]", None, None), NamedRow( - "IU", "ANMO", "10", "BHZ", "M", + "IU", "ANMO", "10", "BHZ", None, "2018-01-01T00:00:00.019500", "2018-01-01T00:00:59.994536", 40.0, "IU/2018/001/" @@ -787,7 +720,7 @@ def test_run(self, filepath): "1514764800.019500=>0,latest=>1", "[1514764800.019500:1514764859.994536]", None, None), NamedRow( - "IU", "COLA", "10", "BHZ", "M", + "IU", "COLA", "10", "BHZ", None, "2018-01-01T00:00:00.019500", "2018-01-01T00:00:59.994538", 40.0, "IU/2018/001/" diff --git a/obspy/clients/filesystem/tsindex.py b/obspy/clients/filesystem/tsindex.py index e867719262..084c6f7dfa 100644 --- a/obspy/clients/filesystem/tsindex.py +++ b/obspy/clients/filesystem/tsindex.py @@ -6,7 +6,7 @@ The obspy.clients.filesystem.tsindex module includes a timeseries extraction :class:`Client` class for a database created by the EarthScope `mseedindex `_ program, as well as, -a :class:`Indexer` class for creating a SQLite3 database that follows the +a :class:`Indexer` class to create a SQLite3 database, following the EarthScope `tsindex database schema `_\. @@ -151,10 +151,8 @@ import copyreg import datetime -import time import logging import os -import requests import sqlalchemy as sa import subprocess import types @@ -208,6 +206,7 @@ def __init__(self, database, datapath_replace=None, loglevel=None): :param datapath_replace: A ``tuple(str, str)``, where any occurrence of the first value will be replaced with the second value in filename paths from the index. + :param loglevel: DEPRECATED and without effect """ # setup handler for database if isinstance(database, str): @@ -896,11 +895,12 @@ class Indexer(object): is not already in the index. After all new files are indexed a summary table is generated with the extents of each timeseries. """ + @deprecated_keywords({"leap_seconds_file": None}) @deprecated_keywords({"loglevel": None}) def __init__(self, root_path, database="timeseries.sqlite", - leap_seconds_file="SEARCH", index_cmd='mseedindex', + index_cmd='mseedindex', bulk_params=None, filename_pattern='*', parallel=5, - loglevel=None): + leap_seconds_file=None, loglevel=None): """ Initializes the Indexer. @@ -911,21 +911,6 @@ def __init__(self, root_path, database="timeseries.sqlite", :param database: Path to SQLite tsindex database or a TSIndexDatabaseHandler object. A database will be created if one does not already exists at the specified path. - :type leap_seconds_file: str - :param leap_seconds_file: Path to leap seconds file. If set to - "SEARCH" (default), then the program looks for a leap seconds file - in the same directory as the SQLite database. If set to "DOWNLOAD", - a leap seconds file will be downloaded from the IETF (if expired). - If set to `None` then no leap seconds file will be used. - - In :meth:`~Indexer.run` the leap - seconds listed in this file will be used to adjust the time - coverage for records that contain a leap second. Also, leap second - indicators in the miniSEED headers will be ignored. See the - `mseedindex wiki leap second - `_ for more" - "for more information regarding this file. :type index_cmd: str :param index_cmd: Command to be run for each target file found that is not already in the index @@ -936,6 +921,8 @@ def __init__(self, root_path, database="timeseries.sqlite", :type parallel: int :param parallel: Max number of ``index_cmd`` instances to run in parallel. By default a max of 5 parallel process are run. + :param loglevel: DEPRECATED and without effect + :param leap_seconds_file: DEPRECATED and without effect """ self.index_cmd = index_cmd if bulk_params is None: @@ -953,8 +940,6 @@ def __init__(self, root_path, database="timeseries.sqlite", raise ValueError("Database must be a string or " "TSIndexDatabaseHandler object.") - self.leap_seconds_file = self._get_leap_seconds_file(leap_seconds_file) - self.root_path = os.path.abspath(root_path) if not os.path.isdir(self.root_path): raise OSError("Root path `{}` does not exists." @@ -1085,56 +1070,6 @@ def build_file_list(self, relative_paths=False, reindex=False): self.root_path)) return result - def download_leap_seconds_file(self, file_path=None, url=None): - """ - Attempt to download leap-seconds.list from Internet Engineering Task - Force (IETF) and save to a file. - - :type file_path: str - :param file_path: Optional path to where the leap seconds file should - be downloaded. By default the file is downloaded to the same - directory as the - :class:`~Indexer` instances - sqlite3 timeseries index database path. - :type url: str - :param url: Optional URL to download from, default is from the IETF: - https://www.ietf.org/timezones/data/leap-seconds.list - - :rtype: str - :returns: Path to downloaded leap seconds file. - """ - if url is None: - url = "https://www.ietf.org/timezones/data/leap-seconds.list" - - try: - if file_path is None: - if self.request_handler.database is not None: - file_path = os.path.join( - os.path.dirname(self.request_handler.database), - "leap-seconds.list") - logger.debug("No leap seconds file path specified. " - "Attempting to create a leap seconds file " - "at {}." - .format(file_path)) - else: - raise OSError("No leap seconds file specified and no " - "database path to generate one from") - - logger.info("Downloading leap seconds file from {}.".format(url)) - r = self._download(url) - except Exception as e: # pragma: no cover - raise OSError( - ("Failed to download leap seconds file due to: {}. " - "No leap seconds file will be used.").format(str(e))) - try: - logger.debug("Writing leap seconds file to {}.".format(file_path)) - with open(file_path, "w") as fh: - fh.write(r.text) - except Exception as e: # pragma: no cover - raise OSError("Failed to create leap seconds file at {} due to {}." - .format(file_path, str(e))) - return os.path.abspath(file_path) - def _get_rootpath_files(self, relative_paths=False): """ Return a list of absolute paths to files under the rootpath that @@ -1152,131 +1087,6 @@ def _get_rootpath_files(self, relative_paths=False): else: return file_list - def _download(self, url): - return requests.get(url) - - def _get_leap_seconds_file(self, leap_seconds_file): - """ - Return path to leap second file and set appropriate environment - variable for mseedindex. - - :type leap_seconds_file: str or None - :param leap_seconds_file: Leap second file options defined in the - :class:`~Indexer` constructor. - """ - file_path = None - - if leap_seconds_file is not None: - if leap_seconds_file == "SEARCH": - file_path = self._find_leap_seconds_file(None, download=False) - elif leap_seconds_file == "DOWNLOAD": - file_path = self._find_leap_seconds_file(None, download=True) - elif not os.path.isfile(leap_seconds_file): - raise OSError("No leap seconds file exists at `{}`. " - .format(leap_seconds_file)) - else: - file_path = self._find_leap_seconds_file(leap_seconds_file) - - if file_path is None: - logger.warning("No leap second file found, none will be used") - file_path = "NONE" - else: - logger.debug("Using leap second file: {}".format(file_path)) - os.environ["LIBMSEED_LEAPSECOND_FILE"] = file_path - else: - # warn user and don't use a leap seconds file - logger.warning("No leap second file specified. " - "Use is recommended.") - os.environ["LIBMSEED_LEAPSECOND_FILE"] = "NONE" - return file_path - - def _find_leap_seconds_file(self, leap_seconds_file=None, download=False, - download_url=None): - """Search for leap seconds file and return path. - - :type leap_seconds_file: str or None - :param leap_seconds_file: Leap seconds file location. If ``None`` - a file location will be generated in same directory as a - SQLite database location if present. - :type download: bool - :param download: If ``download`` is ``True`` and any existing file - has expired, a new file will be downloaded. - :type download_url: str or None - :param download_url: URL to download from, passed to - :meth:`~Indexer.download_leap_seconds_file`. - - """ - - if leap_seconds_file is None: - # Determine file location from database location if not set - if self.request_handler.database is not None: - dbpath = os.path.dirname(self.request_handler.database) - leap_seconds_file = os.path.join(dbpath, "leap-seconds.list") - # Otherwise there is nothing to find - else: - return None - - expired = True - exists = os.path.isfile(leap_seconds_file) - - if exists: - expired = self._leap_seconds_file_expired(leap_seconds_file) - if expired: - logger.warning("Leap seconds file `{}` expired". - format(leap_seconds_file)) - - if download and (not exists or expired): - leap_seconds_file = self.download_leap_seconds_file( - file_path=leap_seconds_file, url=download_url) - - if os.path.isfile(leap_seconds_file): - return os.path.abspath(leap_seconds_file) - else: - logger.warning("Leap seconds file `{}` not found.". - format(leap_seconds_file)) - return None - - def _leap_seconds_file_expired(self, file_path): - """ - Test expiration status of specified leap seconds file. - - The file is expected to be a leap-seconds.list as published by the - Internet Engineering Task Force (IETF), which contains a line starting - with `#@` followed by the expiration time stamp in NTP format. - - :type file_path: str - :param file_path: Path to leap seconds file to test - - :rtype: bool - :returns: Expiration status of leap seconds file. - """ - - # The expiration is expected as a line like the following, - # with time in NTP format: - # "#@ 3833827200" - # The NTP time scale is offset from the POSIX epoch by 2208988800 - # NTP 3833827200 == POSIX 1624838400 == 2021-06-28T00:00:00Z - - expired = None - - logger.info("Testing expiration of leap seconds file: {}". - format(file_path)) - with open(file_path) as fp: - for line in fp: - if line.startswith('#@'): - expiration = int(line.split()[1]) - 2208988800 - expired = expiration < int(time.time()) - break - - if expired is not None: - isostring = ( - datetime.datetime.fromtimestamp( - expiration, tz=datetime.timezone.utc).isoformat()) - logger.debug("Leap seconds file `{}` expires: {}, expired: {}". - format(file_path, isostring, expired)) - - return expired - def _is_index_cmd_installed(self): """ Checks if the index command (e.g. mseedindex) is installed. @@ -1550,16 +1360,14 @@ def _fetch_index_rows(self, query_rows=None, bulk_params=None): sa.literal(b).label("station"), sa.literal(c).label("location"), sa.literal(d).label("channel"), - sa.case( - (sa.literal(e) == '*', - sa.literal('0000-00-00T00:00:00')), - (sa.literal(e) != '*', sa.literal(e)) - ).label("starttime"), - sa.case( - (sa.literal(f) == '*', - sa.literal('5000-00-00T00:00:00')), - (sa.literal(f) != '*', sa.literal(f)) - ).label("endtime") + sa.case((sa.literal(e) == '*', + sa.literal('0000-00-00T00:00:00')), + else_=sa.literal(e) + ).label("starttime"), + sa.case((sa.literal(f) == '*', + sa.literal('5000-00-00T00:00:00')), + else_=sa.literal(f) + ).label("endtime") ) for idx, (a, b, c, d, e, f) in enumerate(query_rows) ] @@ -1586,21 +1394,14 @@ def _fetch_index_rows(self, query_rows=None, bulk_params=None): self.TSIndexSummaryTable.station, self.TSIndexSummaryTable.location, self.TSIndexSummaryTable.channel, - self.TSIndexSummaryTable.network, - sa.case([ - (requests_cte.c.starttime == '*', - self.TSIndexSummaryTable.earliest), - (requests_cte.c.starttime != '*', - requests_cte.c.starttime) - ]) - .label('starttime'), - sa.case([ - (requests_cte.c.endtime == '*', - self.TSIndexSummaryTable.latest), - (requests_cte.c.endtime != '*', - requests_cte.c.endtime) - ]) - .label('endtime')) + sa.case((requests_cte.c.starttime == '*', + self.TSIndexSummaryTable.earliest), + else_=requests_cte.c.starttime + ).label('starttime'), + sa.case((requests_cte.c.endtime == '*', + self.TSIndexSummaryTable.latest), + else_=requests_cte.c.endtime + ).label('endtime')) .filter(self.TSIndexSummaryTable.network.op('GLOB') (requests_cte.c.network)) .filter(self.TSIndexSummaryTable.station.op('GLOB') @@ -1734,16 +1535,14 @@ def _fetch_summary_rows(self, query_rows): sa.literal(b).label("station"), sa.literal(c).label("location"), sa.literal(d).label("channel"), - sa.case( - (sa.literal(e) == '*', - sa.literal('0000-00-00T00:00:00')), - (sa.literal(e) != '*', sa.literal(e)) - ).label("starttime"), - sa.case( - (sa.literal(f) == '*', - sa.literal('5000-00-00T00:00:00')), - (sa.literal(f) != '*', sa.literal(f)) - ).label("endtime") + sa.case((sa.literal(e) == '*', + sa.literal('0000-00-00T00:00:00')), + else_=sa.literal(e) + ).label("starttime"), + sa.case((sa.literal(f) == '*', + sa.literal('5000-00-00T00:00:00')), + else_=sa.literal(f) + ).label("endtime") ) for idx, (a, b, c, d, e, f) in enumerate(query_rows) ] @@ -1896,11 +1695,11 @@ def _set_sqlite_pragma(self): # setup the sqlite database session = self.session() # https://www.sqlite.org/foreignkeys.html - session.execute('PRAGMA foreign_keys = ON') + session.execute(sa.text('PRAGMA foreign_keys = ON')) # as used by mseedindex - session.execute('PRAGMA case_sensitive_like = ON') + session.execute(sa.text('PRAGMA case_sensitive_like = ON')) # enable Write-Ahead Log for better concurrency support - session.execute('PRAGMA journal_mode=WAL') + session.execute(sa.text('PRAGMA journal_mode=WAL')) except Exception: raise OSError("Failed to setup SQLite database for indexing.")