diff --git a/docs/environment.yml b/docs/environment.yml index 90900378..5d0f69ff 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -195,6 +195,7 @@ dependencies: - zlib=1.2.11=h166bdaf_1014 - zstd=1.5.2=ha95c52a_0 - pip: + - acres - bids-validator==1.9.3 - docopt==0.6.2 - formulaic==0.3.4 diff --git a/pyproject.toml b/pyproject.toml index 59f87c39..6d06522f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,8 +21,8 @@ classifiers = [ license = {file = "LICENSE"} requires-python = ">=3.9" dependencies = [ + "acres >= 0.5.0", "pybids >= 0.15.2", - "importlib_resources >= 5.7; python_version < '3.11'", "requests", "tqdm", ] diff --git a/templateflow/_loader.py b/templateflow/_loader.py deleted file mode 100644 index 37d33f9e..00000000 --- a/templateflow/_loader.py +++ /dev/null @@ -1,193 +0,0 @@ -# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- -# vi: set ft=python sts=4 ts=4 sw=4 et: -# -# Copyright 2024 The NiPreps Developers -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# We support and encourage derived works from this project, please read -# about our expectations at -# -# https://www.nipreps.org/community/licensing/ -# -"""Resource loader module - -.. autoclass:: Loader -""" - -from __future__ import annotations - -import atexit -import os -from contextlib import AbstractContextManager, ExitStack -from functools import cached_property -from pathlib import Path -from types import ModuleType - -try: - from functools import cache -except ImportError: # PY38 # pragma: no cover - from functools import lru_cache as cache - -try: # Prefer backport to leave consistency to dependency spec - from importlib_resources import as_file, files -except ImportError: # pragma: no cover - from importlib.resources import as_file, files # type: ignore - -try: # Prefer stdlib so Sphinx can link to authoritative documentation - from importlib.resources.abc import Traversable -except ImportError: # pragma: no cover - from importlib_resources.abc import Traversable - -__all__ = ['Loader'] - - -class Loader: - """A loader for package files relative to a module - - This class wraps :mod:`importlib.resources` to provide a getter - function with an interpreter-lifetime scope. For typical packages - it simply passes through filesystem paths as :class:`~pathlib.Path` - objects. For zipped distributions, it will unpack the files into - a temporary directory that is cleaned up on interpreter exit. - - This loader accepts a fully-qualified module name or a module - object. - - Expected usage:: - - '''Data package - - .. autofunction:: load_data - - .. automethod:: load_data.readable - - .. automethod:: load_data.as_path - - .. automethod:: load_data.cached - ''' - - from templateflow._loader import Loader - - load_data = Loader(__package__) - - :class:`~Loader` objects implement the :func:`callable` interface - and generate a docstring, and are intended to be treated and documented - as functions. - - For greater flexibility and improved readability over the ``importlib.resources`` - interface, explicit methods are provided to access resources. - - +---------------+----------------+------------------+ - | On-filesystem | Lifetime | Method | - +---------------+----------------+------------------+ - | `True` | Interpreter | :meth:`cached` | - +---------------+----------------+------------------+ - | `True` | `with` context | :meth:`as_path` | - +---------------+----------------+------------------+ - | `False` | n/a | :meth:`readable` | - +---------------+----------------+------------------+ - - It is also possible to use ``Loader`` directly:: - - from templateflow._loader import Loader - - Loader(other_package).readable('data/resource.ext').read_text() - - with Loader(other_package).as_path('data') as pkgdata: - # Call function that requires full Path implementation - func(pkgdata) - - # contrast to - - from importlib_resources import files, as_file - - files(other_package).joinpath('data/resource.ext').read_text() - - with as_file(files(other_package) / 'data') as pkgdata: - func(pkgdata) - - .. automethod:: readable - - .. automethod:: as_path - - .. automethod:: cached - """ - - def __init__(self, anchor: str | ModuleType): - self._anchor = anchor - self.files = files(anchor) - self.exit_stack = ExitStack() - atexit.register(self.exit_stack.close) - # Allow class to have a different docstring from instances - self.__doc__ = self._doc - - @cached_property - def _doc(self): - """Construct docstring for instances - - Lists the public top-level paths inside the location, where - non-public means has a `.` or `_` prefix or is a 'tests' - directory. - """ - top_level = sorted( - os.path.relpath(p, self.files) + '/'[: p.is_dir()] - for p in self.files.iterdir() - if p.name[0] not in ('.', '_') and p.name != 'tests' - ) - doclines = [ - f'Load package files relative to ``{self._anchor}``.', - '', - 'This package contains the following (top-level) files/directories:', - '', - *(f'* ``{path}``' for path in top_level), - ] - - return '\n'.join(doclines) - - def readable(self, *segments) -> Traversable: - """Provide read access to a resource through a Path-like interface. - - This file may or may not exist on the filesystem, and may be - efficiently used for read operations, including directory traversal. - - This result is not cached or copied to the filesystem in cases where - that would be necessary. - """ - return self.files.joinpath(*segments) - - def as_path(self, *segments) -> AbstractContextManager[Path]: - """Ensure data is available as a :class:`~pathlib.Path`. - - This method generates a context manager that yields a Path when - entered. - - This result is not cached, and any temporary files that are created - are deleted when the context is exited. - """ - return as_file(self.files.joinpath(*segments)) - - @cache # noqa: B019 - def cached(self, *segments) -> Path: - """Ensure data is available as a :class:`~pathlib.Path`. - - Any temporary files that are created remain available throughout - the duration of the program, and are deleted when Python exits. - - Results are cached so that multiple calls do not unpack the same - data multiple times, but the cache is sensitive to the specific - argument(s) passed. - """ - return self.exit_stack.enter_context(as_file(self.files.joinpath(*segments))) - - __call__ = cached diff --git a/templateflow/api.py b/templateflow/api.py index 304fa5e9..a193e2e6 100644 --- a/templateflow/api.py +++ b/templateflow/api.py @@ -159,6 +159,9 @@ def get(template, raise_empty=False, **kwargs): if raise_empty and not out_file: raise Exception('No results found') + # Truncate possible S3 error files from previous attempts + _truncate_s3_errors(out_file) + # Try DataLad first dl_missing = [p for p in out_file if not p.is_file()] if TF_USE_DATALAD and dl_missing: @@ -320,6 +323,8 @@ def _s3_get(filepath): print(f'Downloading {url}', file=stderr) # Streaming, so we can iterate over the response. r = requests.get(url, stream=True, timeout=TF_GET_TIMEOUT) + if r.status_code != 200: + raise RuntimeError(f'Failed to download {url} with status code {r.status_code}') # Total size in bytes. total_size = int(r.headers.get('content-length', 0)) @@ -393,3 +398,20 @@ def _normalize_ext(value): if isinstance(value, str): return f'{"" if value.startswith(".") else "."}{value}' return [_normalize_ext(v) for v in value] + + +def _truncate_s3_errors(filepaths): + """ + Truncate XML error bodies saved by previous versions of TemplateFlow. + + Parameters + ---------- + filepaths : list of Path + List of file paths to check and truncate if necessary. + """ + for filepath in filepaths: + if filepath.is_file(follow_symlinks=False) and 0 < filepath.stat().st_size < 1024: + with open(filepath, 'rb') as f: + content = f.read(100) + if content.startswith(b'' in content: + filepath.write_bytes(b'') # Truncate file to zero bytes diff --git a/templateflow/cli.py b/templateflow/cli.py index 59453ced..6df312d6 100644 --- a/templateflow/cli.py +++ b/templateflow/cli.py @@ -31,7 +31,7 @@ from click.decorators import FC, Option, _param_memo from templateflow import __package__, api -from templateflow._loader import Loader as _Loader +from acres import Loader as _Loader from templateflow.conf import TF_AUTOUPDATE, TF_HOME, TF_USE_DATALAD load_data = _Loader(__package__) @@ -58,7 +58,7 @@ def _nulls(s): def entity_opts(): """Attaches all entities as options to the command.""" - entities = json.loads(Path(load_data('conf/config.json')).read_text())['entities'] + entities = json.loads(load_data('conf/config.json').read_text())['entities'] args = [ (f'--{e["name"]}', *ENTITY_SHORTHANDS.get(e['name'], ())) diff --git a/templateflow/conf/__init__.py b/templateflow/conf/__init__.py index 85940474..ab045240 100644 --- a/templateflow/conf/__init__.py +++ b/templateflow/conf/__init__.py @@ -7,9 +7,9 @@ from pathlib import Path from warnings import warn -from .._loader import Loader +from acres import Loader -load_data = Loader(__package__) +load_data = Loader(__spec__.name) def _env_to_bool(envvar: str, default: bool) -> bool: diff --git a/templateflow/tests/data/__init__.py b/templateflow/tests/data/__init__.py new file mode 100644 index 00000000..461c9781 --- /dev/null +++ b/templateflow/tests/data/__init__.py @@ -0,0 +1,3 @@ +from acres import Loader + +load_data = Loader(__spec__.name) diff --git a/templateflow/tests/data/error_response.xml b/templateflow/tests/data/error_response.xml new file mode 100644 index 00000000..d2ba1905 --- /dev/null +++ b/templateflow/tests/data/error_response.xml @@ -0,0 +1,2 @@ + +InvalidArgumentInvalid version id specifiedversionIdtestBKT12MP069SFQGH3DIljS3MUsLCEa27wSyqAxsZZE3MhqEWYf3lRbH2Rl19VV0pGe/61Hh3MzSBeS45VltnZDzliHaTMxjnGPvKOOk+SY/it3Ond \ No newline at end of file diff --git a/templateflow/tests/test_s3.py b/templateflow/tests/test_s3.py index e8984438..8c904e23 100644 --- a/templateflow/tests/test_s3.py +++ b/templateflow/tests/test_s3.py @@ -25,10 +25,14 @@ from importlib import reload from pathlib import Path +import pytest import requests +from templateflow import api as tf from templateflow import conf as tfc +from .data import load_data + def test_get_skel_file(tmp_path, monkeypatch): """Exercise the skeleton file generation.""" @@ -87,3 +91,61 @@ def test_update_s3(tmp_path, monkeypatch): for p in (newhome / 'tpl-MNI152NLin6Sym').glob('*.nii.gz'): p.unlink() assert tfc._s3.update(newhome, local=False, overwrite=False) + + +def mock_get(*args, **kwargs): + class MockResponse: + status_code = 400 + + return MockResponse() + + +def test_s3_400_error(monkeypatch): + """Simulate a 400 error when fetching the skeleton file.""" + + reload(tfc) + reload(tf) + + monkeypatch.setattr(requests, 'get', mock_get) + with pytest.raises(RuntimeError, match=r'Failed to download .* code 400'): + tf._s3_get( + Path(tfc.TF_LAYOUT.root) + / 'tpl-MNI152NLin2009cAsym/tpl-MNI152NLin2009cAsym_res-02_T1w.nii.gz' + ) + + +def test_bad_skeleton(tmp_path, monkeypatch): + newhome = (tmp_path / 's3-update').resolve() + monkeypatch.setattr(tfc, 'TF_USE_DATALAD', False) + monkeypatch.setattr(tfc, 'TF_HOME', newhome) + monkeypatch.setattr(tfc, 'TF_LAYOUT', None) + + tfc._init_cache() + tfc.init_layout() + + assert tfc.TF_LAYOUT is not None + assert tfc.TF_LAYOUT.root == str(newhome) + + # Instead of reloading + monkeypatch.setattr(tf, 'TF_LAYOUT', tfc.TF_LAYOUT) + + paths = tf.ls('MNI152NLin2009cAsym', resolution='02', suffix='T1w', desc=None) + assert paths + path = Path(paths[0]) + assert path.read_bytes() == b'' + + error_file = load_data.readable('error_response.xml') + path.write_bytes(error_file.read_bytes()) + + # Test directly before testing through API paths + tf._truncate_s3_errors(paths) + assert path.read_bytes() == b'' + + path.write_bytes(error_file.read_bytes()) + + monkeypatch.setattr(requests, 'get', mock_get) + with pytest.raises(RuntimeError): + tf.get('MNI152NLin2009cAsym', resolution='02', suffix='T1w', desc=None) + + # Running get clears bad files before attempting to download + assert path.read_bytes() == b''