diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index 5301be070c..8f830a8919 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -18,7 +18,9 @@ Changes:
from scipy with version 1.6, add new window 'dpss' that scipy promotes as
replacement but might need other parameters passed in (see #3331)
- obspy.clients.filesystem:
- * update syntax for SQLAlchemy 2.0 compatibility (see #3269)
+ * tsindex: update syntax for SQLAlchemy 2.0 compatibility (see #3269)
+ * tsindex: leap second handling was deactivated as it is not needed with
+ current msindex (see #3403)
- obspy.clients.fdsn
* Natural Resources Canada (NRCAN) added to list of known clients
- obspy.clients.seedlink:
diff --git a/obspy/clients/filesystem/tests/test_tsindex.py b/obspy/clients/filesystem/tests/test_tsindex.py
index 48ddf89b0d..8623be6082 100644
--- a/obspy/clients/filesystem/tests/test_tsindex.py
+++ b/obspy/clients/filesystem/tests/test_tsindex.py
@@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-
import os
import re
-import requests
import tempfile
import uuid
from collections import namedtuple
@@ -13,7 +12,6 @@
from sqlalchemy.orm import sessionmaker
from obspy import read, UTCDateTime
-from obspy.core.util.misc import TemporaryWorkingDirectory
from obspy.clients.filesystem.tsindex import Client, Indexer, \
TSIndexDatabaseHandler
@@ -568,7 +566,7 @@ def test_bad_rootpath(self, filepath):
database = os.path.join(filepath,
'timeseries.sqlite')
- # test that a bad leap second file path raises an error
+ # test that a bad root path raises an error
with pytest.raises(OSError, match="^Root path.*does not exists.$"):
Indexer("/some/bad/path", database=database,
filename_pattern="*.mseed", parallel=2)
@@ -592,70 +590,6 @@ def test_bad_database(self, filepath):
with pytest.raises(ValueError, match=match):
Indexer(filepath, database=None, filename_pattern="*.mseed")
- def test_download_leap_seconds_file(self):
- with TemporaryWorkingDirectory() as tempdir:
- database = os.path.join(tempdir, 'timeseries.sqlite')
- indexer = Indexer(tempdir,
- database=database)
- # mock actually downloading the file since this requires a internet
- # connection
- indexer._download = mock.MagicMock(
- return_value=requests.Response())
- # create a empty leap-seconds.list file
- test_file = os.path.join(
- os.path.dirname(database), "leap-seconds.list")
- file_path = indexer.download_leap_seconds_file(test_file)
- # assert that the file was put in the same location as the
- # sqlite db
- assert os.path.isfile(file_path)
- assert file_path == test_file
-
- def test_download_leap_seconds_file_no_path_given(self):
- with TemporaryWorkingDirectory() as tempdir:
- database = os.path.join(tempdir, 'timeseries.sqlite')
- indexer = Indexer(tempdir,
- database=database)
- # mock actually downloading the file since this requires a internet
- # connection
- indexer._download = mock.MagicMock(
- return_value=requests.Response())
- file_path = indexer.download_leap_seconds_file()
-
- assert os.path.normpath(file_path) == \
- os.path.normpath(os.path.join(os.path.dirname(database),
- "leap-seconds.list"))
-
- # assert that the file was put in the same location as the
- # sqlite db
- assert os.path.isfile(file_path)
-
- def test__get_leap_seconds_file(self, filepath):
- database = os.path.join(filepath, 'timeseries.sqlite')
- indexer = Indexer(filepath,
- database=database)
-
- # test that a bad leap second file path raises an error
- with pytest.raises(OSError,
- match="^No leap seconds file exists at.*$"):
- Indexer(filepath, database=database,
- leap_seconds_file="/some/bad/path/")
- with pytest.raises(OSError,
- match="^No leap seconds file exists at.*$"):
- indexer._get_leap_seconds_file("/some/bad/path/")
-
- # test search
- # create a empty leap-seconds.list file
- with TemporaryWorkingDirectory() as tempdir:
- database = os.path.join(tempdir, 'timeseries.sqlite')
- indexer = Indexer(tempdir,
- database=database)
- test_file = os.path.normpath(os.path.join(
- os.path.dirname(database), "leap-seconds.list"))
- open(test_file, 'a').close()
- file_path = os.path.normpath(
- indexer._get_leap_seconds_file("SEARCH"))
- assert file_path == test_file
-
def test_build_file_list(self, filepath):
database = os.path.join(filepath, 'timeseries.sqlite')
indexer = Indexer(filepath,
@@ -689,8 +623,7 @@ def test_build_file_list(self, filepath):
# this time pass a TSIndexDatabaseHandler instance as the database
indexer = Indexer(filepath,
database=TSIndexDatabaseHandler(database=database),
- filename_pattern="*.mseed",
- leap_seconds_file=None)
+ filename_pattern="*.mseed")
file_list = indexer.build_file_list(reindex=True)
file_list.sort()
assert len(file_list) == 3
@@ -769,16 +702,16 @@ def test_run(self, filepath):
expected_tsindex_data = \
[
NamedRow(
- "CU", "TGUH", "00", "BHZ", "M",
- "2018-01-01T00:00:00.000000",
- "2018-01-01T00:01:00.000000", 40.0,
+ "CU", "TGUH", "00", "BHZ", None,
+ "2018-01-01T00:00:00",
+ "2018-01-01T00:01:00", 40.0,
"CU/2018/001/"
"CU.TGUH.00.BHZ.2018.001_first_minute.mseed",
0, 4096, "aaaac5315f84cdd174fd8360002a1e3a",
"1514764800.000000=>0,latest=>1",
"[1514764800.000000:1514764860.000000]", None, None),
NamedRow(
- "IU", "ANMO", "10", "BHZ", "M",
+ "IU", "ANMO", "10", "BHZ", None,
"2018-01-01T00:00:00.019500",
"2018-01-01T00:00:59.994536", 40.0,
"IU/2018/001/"
@@ -787,7 +720,7 @@ def test_run(self, filepath):
"1514764800.019500=>0,latest=>1",
"[1514764800.019500:1514764859.994536]", None, None),
NamedRow(
- "IU", "COLA", "10", "BHZ", "M",
+ "IU", "COLA", "10", "BHZ", None,
"2018-01-01T00:00:00.019500",
"2018-01-01T00:00:59.994538", 40.0,
"IU/2018/001/"
diff --git a/obspy/clients/filesystem/tsindex.py b/obspy/clients/filesystem/tsindex.py
index e867719262..084c6f7dfa 100644
--- a/obspy/clients/filesystem/tsindex.py
+++ b/obspy/clients/filesystem/tsindex.py
@@ -6,7 +6,7 @@
The obspy.clients.filesystem.tsindex module includes a timeseries extraction
:class:`Client` class for a database created by the EarthScope
`mseedindex `_ program, as well as,
-a :class:`Indexer` class for creating a SQLite3 database that follows the
+a :class:`Indexer` class to create a SQLite3 database, following the
EarthScope `tsindex database schema
`_\.
@@ -151,10 +151,8 @@
import copyreg
import datetime
-import time
import logging
import os
-import requests
import sqlalchemy as sa
import subprocess
import types
@@ -208,6 +206,7 @@ def __init__(self, database, datapath_replace=None, loglevel=None):
:param datapath_replace: A ``tuple(str, str)``, where any
occurrence of the first value will be replaced with the second
value in filename paths from the index.
+ :param loglevel: DEPRECATED and without effect
"""
# setup handler for database
if isinstance(database, str):
@@ -896,11 +895,12 @@ class Indexer(object):
is not already in the index. After all new files are indexed a summary
table is generated with the extents of each timeseries.
"""
+ @deprecated_keywords({"leap_seconds_file": None})
@deprecated_keywords({"loglevel": None})
def __init__(self, root_path, database="timeseries.sqlite",
- leap_seconds_file="SEARCH", index_cmd='mseedindex',
+ index_cmd='mseedindex',
bulk_params=None, filename_pattern='*', parallel=5,
- loglevel=None):
+ leap_seconds_file=None, loglevel=None):
"""
Initializes the Indexer.
@@ -911,21 +911,6 @@ def __init__(self, root_path, database="timeseries.sqlite",
:param database: Path to SQLite tsindex database or a
TSIndexDatabaseHandler object. A database will be created
if one does not already exists at the specified path.
- :type leap_seconds_file: str
- :param leap_seconds_file: Path to leap seconds file. If set to
- "SEARCH" (default), then the program looks for a leap seconds file
- in the same directory as the SQLite database. If set to "DOWNLOAD",
- a leap seconds file will be downloaded from the IETF (if expired).
- If set to `None` then no leap seconds file will be used.
-
- In :meth:`~Indexer.run` the leap
- seconds listed in this file will be used to adjust the time
- coverage for records that contain a leap second. Also, leap second
- indicators in the miniSEED headers will be ignored. See the
- `mseedindex wiki leap second
- `_ for more"
- "for more information regarding this file.
:type index_cmd: str
:param index_cmd: Command to be run for each target file found that
is not already in the index
@@ -936,6 +921,8 @@ def __init__(self, root_path, database="timeseries.sqlite",
:type parallel: int
:param parallel: Max number of ``index_cmd`` instances to run in
parallel. By default a max of 5 parallel process are run.
+ :param loglevel: DEPRECATED and without effect
+ :param leap_seconds_file: DEPRECATED and without effect
"""
self.index_cmd = index_cmd
if bulk_params is None:
@@ -953,8 +940,6 @@ def __init__(self, root_path, database="timeseries.sqlite",
raise ValueError("Database must be a string or "
"TSIndexDatabaseHandler object.")
- self.leap_seconds_file = self._get_leap_seconds_file(leap_seconds_file)
-
self.root_path = os.path.abspath(root_path)
if not os.path.isdir(self.root_path):
raise OSError("Root path `{}` does not exists."
@@ -1085,56 +1070,6 @@ def build_file_list(self, relative_paths=False, reindex=False):
self.root_path))
return result
- def download_leap_seconds_file(self, file_path=None, url=None):
- """
- Attempt to download leap-seconds.list from Internet Engineering Task
- Force (IETF) and save to a file.
-
- :type file_path: str
- :param file_path: Optional path to where the leap seconds file should
- be downloaded. By default the file is downloaded to the same
- directory as the
- :class:`~Indexer` instances
- sqlite3 timeseries index database path.
- :type url: str
- :param url: Optional URL to download from, default is from the IETF:
- https://www.ietf.org/timezones/data/leap-seconds.list
-
- :rtype: str
- :returns: Path to downloaded leap seconds file.
- """
- if url is None:
- url = "https://www.ietf.org/timezones/data/leap-seconds.list"
-
- try:
- if file_path is None:
- if self.request_handler.database is not None:
- file_path = os.path.join(
- os.path.dirname(self.request_handler.database),
- "leap-seconds.list")
- logger.debug("No leap seconds file path specified. "
- "Attempting to create a leap seconds file "
- "at {}."
- .format(file_path))
- else:
- raise OSError("No leap seconds file specified and no "
- "database path to generate one from")
-
- logger.info("Downloading leap seconds file from {}.".format(url))
- r = self._download(url)
- except Exception as e: # pragma: no cover
- raise OSError(
- ("Failed to download leap seconds file due to: {}. "
- "No leap seconds file will be used.").format(str(e)))
- try:
- logger.debug("Writing leap seconds file to {}.".format(file_path))
- with open(file_path, "w") as fh:
- fh.write(r.text)
- except Exception as e: # pragma: no cover
- raise OSError("Failed to create leap seconds file at {} due to {}."
- .format(file_path, str(e)))
- return os.path.abspath(file_path)
-
def _get_rootpath_files(self, relative_paths=False):
"""
Return a list of absolute paths to files under the rootpath that
@@ -1152,131 +1087,6 @@ def _get_rootpath_files(self, relative_paths=False):
else:
return file_list
- def _download(self, url):
- return requests.get(url)
-
- def _get_leap_seconds_file(self, leap_seconds_file):
- """
- Return path to leap second file and set appropriate environment
- variable for mseedindex.
-
- :type leap_seconds_file: str or None
- :param leap_seconds_file: Leap second file options defined in the
- :class:`~Indexer` constructor.
- """
- file_path = None
-
- if leap_seconds_file is not None:
- if leap_seconds_file == "SEARCH":
- file_path = self._find_leap_seconds_file(None, download=False)
- elif leap_seconds_file == "DOWNLOAD":
- file_path = self._find_leap_seconds_file(None, download=True)
- elif not os.path.isfile(leap_seconds_file):
- raise OSError("No leap seconds file exists at `{}`. "
- .format(leap_seconds_file))
- else:
- file_path = self._find_leap_seconds_file(leap_seconds_file)
-
- if file_path is None:
- logger.warning("No leap second file found, none will be used")
- file_path = "NONE"
- else:
- logger.debug("Using leap second file: {}".format(file_path))
- os.environ["LIBMSEED_LEAPSECOND_FILE"] = file_path
- else:
- # warn user and don't use a leap seconds file
- logger.warning("No leap second file specified. "
- "Use is recommended.")
- os.environ["LIBMSEED_LEAPSECOND_FILE"] = "NONE"
- return file_path
-
- def _find_leap_seconds_file(self, leap_seconds_file=None, download=False,
- download_url=None):
- """Search for leap seconds file and return path.
-
- :type leap_seconds_file: str or None
- :param leap_seconds_file: Leap seconds file location. If ``None``
- a file location will be generated in same directory as a
- SQLite database location if present.
- :type download: bool
- :param download: If ``download`` is ``True`` and any existing file
- has expired, a new file will be downloaded.
- :type download_url: str or None
- :param download_url: URL to download from, passed to
- :meth:`~Indexer.download_leap_seconds_file`.
-
- """
-
- if leap_seconds_file is None:
- # Determine file location from database location if not set
- if self.request_handler.database is not None:
- dbpath = os.path.dirname(self.request_handler.database)
- leap_seconds_file = os.path.join(dbpath, "leap-seconds.list")
- # Otherwise there is nothing to find
- else:
- return None
-
- expired = True
- exists = os.path.isfile(leap_seconds_file)
-
- if exists:
- expired = self._leap_seconds_file_expired(leap_seconds_file)
- if expired:
- logger.warning("Leap seconds file `{}` expired".
- format(leap_seconds_file))
-
- if download and (not exists or expired):
- leap_seconds_file = self.download_leap_seconds_file(
- file_path=leap_seconds_file, url=download_url)
-
- if os.path.isfile(leap_seconds_file):
- return os.path.abspath(leap_seconds_file)
- else:
- logger.warning("Leap seconds file `{}` not found.".
- format(leap_seconds_file))
- return None
-
- def _leap_seconds_file_expired(self, file_path):
- """
- Test expiration status of specified leap seconds file.
-
- The file is expected to be a leap-seconds.list as published by the
- Internet Engineering Task Force (IETF), which contains a line starting
- with `#@` followed by the expiration time stamp in NTP format.
-
- :type file_path: str
- :param file_path: Path to leap seconds file to test
-
- :rtype: bool
- :returns: Expiration status of leap seconds file.
- """
-
- # The expiration is expected as a line like the following,
- # with time in NTP format:
- # "#@ 3833827200"
- # The NTP time scale is offset from the POSIX epoch by 2208988800
- # NTP 3833827200 == POSIX 1624838400 == 2021-06-28T00:00:00Z
-
- expired = None
-
- logger.info("Testing expiration of leap seconds file: {}".
- format(file_path))
- with open(file_path) as fp:
- for line in fp:
- if line.startswith('#@'):
- expiration = int(line.split()[1]) - 2208988800
- expired = expiration < int(time.time())
- break
-
- if expired is not None:
- isostring = (
- datetime.datetime.fromtimestamp(
- expiration, tz=datetime.timezone.utc).isoformat())
- logger.debug("Leap seconds file `{}` expires: {}, expired: {}".
- format(file_path, isostring, expired))
-
- return expired
-
def _is_index_cmd_installed(self):
"""
Checks if the index command (e.g. mseedindex) is installed.
@@ -1550,16 +1360,14 @@ def _fetch_index_rows(self, query_rows=None, bulk_params=None):
sa.literal(b).label("station"),
sa.literal(c).label("location"),
sa.literal(d).label("channel"),
- sa.case(
- (sa.literal(e) == '*',
- sa.literal('0000-00-00T00:00:00')),
- (sa.literal(e) != '*', sa.literal(e))
- ).label("starttime"),
- sa.case(
- (sa.literal(f) == '*',
- sa.literal('5000-00-00T00:00:00')),
- (sa.literal(f) != '*', sa.literal(f))
- ).label("endtime")
+ sa.case((sa.literal(e) == '*',
+ sa.literal('0000-00-00T00:00:00')),
+ else_=sa.literal(e)
+ ).label("starttime"),
+ sa.case((sa.literal(f) == '*',
+ sa.literal('5000-00-00T00:00:00')),
+ else_=sa.literal(f)
+ ).label("endtime")
)
for idx, (a, b, c, d, e, f) in enumerate(query_rows)
]
@@ -1586,21 +1394,14 @@ def _fetch_index_rows(self, query_rows=None, bulk_params=None):
self.TSIndexSummaryTable.station,
self.TSIndexSummaryTable.location,
self.TSIndexSummaryTable.channel,
- self.TSIndexSummaryTable.network,
- sa.case([
- (requests_cte.c.starttime == '*',
- self.TSIndexSummaryTable.earliest),
- (requests_cte.c.starttime != '*',
- requests_cte.c.starttime)
- ])
- .label('starttime'),
- sa.case([
- (requests_cte.c.endtime == '*',
- self.TSIndexSummaryTable.latest),
- (requests_cte.c.endtime != '*',
- requests_cte.c.endtime)
- ])
- .label('endtime'))
+ sa.case((requests_cte.c.starttime == '*',
+ self.TSIndexSummaryTable.earliest),
+ else_=requests_cte.c.starttime
+ ).label('starttime'),
+ sa.case((requests_cte.c.endtime == '*',
+ self.TSIndexSummaryTable.latest),
+ else_=requests_cte.c.endtime
+ ).label('endtime'))
.filter(self.TSIndexSummaryTable.network.op('GLOB')
(requests_cte.c.network))
.filter(self.TSIndexSummaryTable.station.op('GLOB')
@@ -1734,16 +1535,14 @@ def _fetch_summary_rows(self, query_rows):
sa.literal(b).label("station"),
sa.literal(c).label("location"),
sa.literal(d).label("channel"),
- sa.case(
- (sa.literal(e) == '*',
- sa.literal('0000-00-00T00:00:00')),
- (sa.literal(e) != '*', sa.literal(e))
- ).label("starttime"),
- sa.case(
- (sa.literal(f) == '*',
- sa.literal('5000-00-00T00:00:00')),
- (sa.literal(f) != '*', sa.literal(f))
- ).label("endtime")
+ sa.case((sa.literal(e) == '*',
+ sa.literal('0000-00-00T00:00:00')),
+ else_=sa.literal(e)
+ ).label("starttime"),
+ sa.case((sa.literal(f) == '*',
+ sa.literal('5000-00-00T00:00:00')),
+ else_=sa.literal(f)
+ ).label("endtime")
)
for idx, (a, b, c, d, e, f) in enumerate(query_rows)
]
@@ -1896,11 +1695,11 @@ def _set_sqlite_pragma(self):
# setup the sqlite database
session = self.session()
# https://www.sqlite.org/foreignkeys.html
- session.execute('PRAGMA foreign_keys = ON')
+ session.execute(sa.text('PRAGMA foreign_keys = ON'))
# as used by mseedindex
- session.execute('PRAGMA case_sensitive_like = ON')
+ session.execute(sa.text('PRAGMA case_sensitive_like = ON'))
# enable Write-Ahead Log for better concurrency support
- session.execute('PRAGMA journal_mode=WAL')
+ session.execute(sa.text('PRAGMA journal_mode=WAL'))
except Exception:
raise OSError("Failed to setup SQLite database for indexing.")