From 6ac4c26ab9497a92d0863718f2291d3797e7ce55 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Tue, 7 Oct 2025 12:50:12 -0400 Subject: [PATCH 01/20] rf: Factor templateflow.api into classes --- templateflow/cli.py | 2 +- templateflow/client.py | 380 +++++++++++++++++++++++++++++++++++++ templateflow/conf/cache.py | 137 +++++++++++++ templateflow/conf/env.py | 37 ++++ 4 files changed, 555 insertions(+), 1 deletion(-) create mode 100644 templateflow/client.py create mode 100644 templateflow/conf/cache.py create mode 100644 templateflow/conf/env.py diff --git a/templateflow/cli.py b/templateflow/cli.py index 6df312d6..f1a2c8a1 100644 --- a/templateflow/cli.py +++ b/templateflow/cli.py @@ -28,10 +28,10 @@ from pathlib import Path import click +from acres import Loader as _Loader from click.decorators import FC, Option, _param_memo from templateflow import __package__, api -from acres import Loader as _Loader from templateflow.conf import TF_AUTOUPDATE, TF_HOME, TF_USE_DATALAD load_data = _Loader(__package__) diff --git a/templateflow/client.py b/templateflow/client.py new file mode 100644 index 00000000..ac7fa4c7 --- /dev/null +++ b/templateflow/client.py @@ -0,0 +1,380 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +# +# Copyright 2024 The NiPreps Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# We support and encourage derived works from this project, please read +# about our expectations at +# +# https://www.nipreps.org/community/licensing/ +# +"""TemplateFlow's Python Client.""" + +from __future__ import annotations + +import sys +from json import loads +from pathlib import Path + +from bids.layout import Query + +from .conf.cache import CacheConfig, TemplateFlowCache + + +class TemplateFlowClient: + def __init__(self, cache=None, config=None): + if cache is None: + if config is None: + config = CacheConfig() + cache = TemplateFlowCache(config) + self.cache = cache + + def __getattr__(self, name: str): + name = name.replace('ls_', 'get_') + try: + if name.startswith('get_') and name not in dir(self.cache.layout): + return getattr(self.cache.layout, name) + except AttributeError: + pass + msg = f"'{self.__class__.__name__}' object has no attribute '{name}'" + raise AttributeError(msg) from None + + def ls(self, template, **kwargs): + """ + List files pertaining to one or more templates. + + Parameters + ---------- + template : str + A template identifier (e.g., ``MNI152NLin2009cAsym``). + + Keyword Arguments + ----------------- + resolution: int or None + Index to an specific spatial resolution of the template. + suffix : str or None + BIDS suffix + atlas : str or None + Name of a particular atlas + hemi : str or None + Hemisphere + space : str or None + Space template is mapped to + density : str or None + Surface density + desc : str or None + Description field + + Examples + -------- + >>> ls('MNI152Lin', resolution=1, suffix='T1w', desc=None) # doctest: +ELLIPSIS + [PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz')] + + >>> ls('MNI152Lin', resolution=2, suffix='T1w', desc=None) # doctest: +ELLIPSIS + [PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz')] + + >>> ls('MNI152Lin', suffix='T1w', desc=None) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + [PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz'), + PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz')] + + >>> ls('fsLR', space=None, hemi='L', + ... density='32k', suffix='sphere') # doctest: +ELLIPSIS + [PosixPath('.../tpl-fsLR_hemi-L_den-32k_sphere.surf.gii')] + + >>> ls('fsLR', space='madeup') + [] + + """ + # Normalize extensions to always have leading dot + if 'extension' in kwargs: + kwargs['extension'] = _normalize_ext(kwargs['extension']) + + return [ + Path(p) + for p in self.cache.layout.get( + template=Query.ANY if template is None else template, return_type='file', **kwargs + ) + ] + + def get(self, template, raise_empty=False, **kwargs): + """ + Pull files pertaining to one or more templates down. + + Parameters + ---------- + template : str + A template identifier (e.g., ``MNI152NLin2009cAsym``). + raise_empty : bool, optional + Raise exception if no files were matched + + Keyword Arguments + ----------------- + resolution: int or None + Index to an specific spatial resolution of the template. + suffix : str or None + BIDS suffix + atlas : str or None + Name of a particular atlas + hemi : str or None + Hemisphere + space : str or None + Space template is mapped to + density : str or None + Surface density + desc : str or None + Description field + + Examples + -------- + >>> str(get('MNI152Lin', resolution=1, suffix='T1w', desc=None)) # doctest: +ELLIPSIS + '.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz' + + >>> str(get('MNI152Lin', resolution=2, suffix='T1w', desc=None)) # doctest: +ELLIPSIS + '.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz' + + >>> [str(p) for p in get( + ... 'MNI152Lin', suffix='T1w', desc=None)] # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + ['.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz', + '.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz'] + + >>> str(get('fsLR', space=None, hemi='L', + ... density='32k', suffix='sphere')) # doctest: +ELLIPSIS + '.../tpl-fsLR_hemi-L_den-32k_sphere.surf.gii' + + >>> get('fsLR', space='madeup') + [] + + >>> get('fsLR', raise_empty=True, space='madeup') # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + Exception: + ... + + """ + # List files available + out_file = self.ls(template, **kwargs) + + if raise_empty and not out_file: + raise Exception('No results found') + + # Truncate possible S3 error files from previous attempts + _truncate_s3_errors(out_file) + + # Try DataLad first + dl_missing = [p for p in out_file if not p.is_file()] + if self.cache.config.use_datalad and dl_missing: + for filepath in dl_missing: + _datalad_get(self.cache.config, filepath) + dl_missing.remove(filepath) + + # Fall-back to S3 if some files are still missing + s3_missing = [p for p in out_file if p.is_file() and p.stat().st_size == 0] + for filepath in s3_missing + dl_missing: + _s3_get(self.cache.config, filepath) + + not_fetched = [str(p) for p in out_file if not p.is_file() or p.stat().st_size == 0] + + if not_fetched: + msg = 'Could not fetch template files: {}.'.format(', '.join(not_fetched)) + if dl_missing and not self.cache.config.use_datalad: + msg += f"""\ + The $TEMPLATEFLOW_HOME folder {self.cache.config.root} seems to contain an initiated DataLad \ + dataset, but the environment variable $TEMPLATEFLOW_USE_DATALAD is not \ + set or set to one of (false, off, 0). Please set $TEMPLATEFLOW_USE_DATALAD \ + on (possible values: true, on, 1).""" + + if s3_missing and self.cache.config.use_datalad: + msg += f"""\ + The $TEMPLATEFLOW_HOME folder {self.cache.layout.root} seems to contain an plain \ + dataset, but the environment variable $TEMPLATEFLOW_USE_DATALAD is \ + set to one of (true, on, 1). Please set $TEMPLATEFLOW_USE_DATALAD \ + off (possible values: false, off, 0).""" + + raise RuntimeError(msg) + + if len(out_file) == 1: + return out_file[0] + return out_file + + def get_metadata(self, template): + """ + Fetch one file from one template. + + Parameters + ---------- + template : str + A template identifier (e.g., ``MNI152NLin2009cAsym``). + + Examples + -------- + >>> get_metadata('MNI152Lin')['Name'] + 'Linear ICBM Average Brain (ICBM152) Stereotaxic Registration Model' + + """ + tf_home = Path(self.cache.layout.root) + filepath = tf_home / (f'tpl-{template}') / 'template_description.json' + + # Ensure that template is installed and file is available + if not filepath.is_file(): + _datalad_get(filepath) + return loads(filepath.read_text()) + + def get_citations(self, template, bibtex=False): + """ + Fetch template citations + + Parameters + ---------- + template : :obj:`str` + A template identifier (e.g., ``MNI152NLin2009cAsym``). + bibtex : :obj:`bool`, optional + Generate citations in BibTeX format. + + """ + data = self.get_metadata(template) + refs = data.get('ReferencesAndLinks', []) + if isinstance(refs, dict): + refs = list(refs.values()) + + if not bibtex: + return refs + + return [ + _to_bibtex(ref, template, idx, self.cache.config.timeout).rstrip() + for idx, ref in enumerate(refs, 1) + ] + + +def _datalad_get(config: CacheConfig, filepath: Path): + if not filepath: + return + + from datalad import api + from datalad.support.exceptions import IncompleteResultsError + + try: + api.get(filepath, dataset=config.root) + except IncompleteResultsError as exc: + if exc.failed[0]['message'] == 'path not associated with any dataset': + api.install(path=config.root, source=config.origin, recursive=True) + api.get(filepath, dataset=config.root) + else: + raise + + +def _s3_get(config: CacheConfig, filepath: Path): + from sys import stderr + from urllib.parse import quote + + import requests + from tqdm import tqdm + + path = quote(filepath.relative_to(config.root).as_posix()) + url = f'{config.http_root}/{path}' + + print(f'Downloading {url}', file=stderr) + # Streaming, so we can iterate over the response. + r = requests.get(url, stream=True, timeout=config.timeout) + if r.status_code != 200: + raise RuntimeError(f'Failed to download {url} with status code {r.status_code}') + + # Total size in bytes. + total_size = int(r.headers.get('content-length', 0)) + block_size = 1024 + wrote = 0 + if not filepath.is_file(): + filepath.unlink() + + with filepath.open('wb') as f: + with tqdm(total=total_size, unit='B', unit_scale=True) as t: + for data in r.iter_content(block_size): + wrote = wrote + len(data) + f.write(data) + t.update(len(data)) + + if total_size != 0 and wrote != total_size: + raise RuntimeError('ERROR, something went wrong') + + +def _to_bibtex(doi, template, idx, timeout): + if 'doi.org' not in doi: + return doi + + # Is a DOI URL + import requests + + response = requests.post( + doi, + headers={'Accept': 'application/x-bibtex; charset=utf-8'}, + timeout=timeout, + ) + if not response.ok: + print( + f'Failed to convert DOI <{doi}> to bibtex, returning URL.', + file=sys.stderr, + ) + return doi + + # doi.org may not honor requested charset, to safeguard force a bytestream with + # response.content, then decode into UTF-8. + bibtex = response.content.decode() + + # doi.org / crossref may still point to the no longer preferred proxy service + return bibtex.replace('http://dx.doi.org/', 'https://doi.org/') + + +def _normalize_ext(value): + """ + Normalize extensions to have a leading dot. + + Examples + -------- + >>> _normalize_ext(".nii.gz") + '.nii.gz' + >>> _normalize_ext("nii.gz") + '.nii.gz' + >>> _normalize_ext(("nii", ".nii.gz")) + ['.nii', '.nii.gz'] + >>> _normalize_ext(("", ".nii.gz")) + ['', '.nii.gz'] + >>> _normalize_ext((None, ".nii.gz")) + [None, '.nii.gz'] + >>> _normalize_ext([]) + [] + + """ + + if not value: + return value + + if isinstance(value, str): + return f'{"" if value.startswith(".") else "."}{value}' + return [_normalize_ext(v) for v in value] + + +def _truncate_s3_errors(filepaths): + """ + Truncate XML error bodies saved by previous versions of TemplateFlow. + + Parameters + ---------- + filepaths : list of Path + List of file paths to check and truncate if necessary. + """ + for filepath in filepaths: + if filepath.is_file(follow_symlinks=False) and 0 < filepath.stat().st_size < 1024: + with open(filepath, 'rb') as f: + content = f.read(100) + if content.startswith(b'' in content: + filepath.write_bytes(b'') # Truncate file to zero bytes diff --git a/templateflow/conf/cache.py b/templateflow/conf/cache.py new file mode 100644 index 00000000..3e757147 --- /dev/null +++ b/templateflow/conf/cache.py @@ -0,0 +1,137 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from functools import cached_property +from pathlib import Path +from warnings import warn + +from .bids import Layout +from .env import env_to_bool, get_templateflow_home + + +@dataclass +class CacheConfig: + root: Path = field(default_factory=get_templateflow_home) + origin: str = field(default='https://github.com/templateflow/templateflow.git') + http_root: str = field(default='https://templateflow.s3.amazonaws.com') + use_datalad: bool = field(default_factory=env_to_bool('TEMPLATEFLOW_USE_DATALAD', False)) + autoupdate: bool = field(default_factory=env_to_bool('TEMPLATEFLOW_AUTOUPDATE', True)) + timeout: int = field(default=10) + + def __post_init__(self): + if self.use_datalad: + from importlib.util import find_spec + + self.use_datalad = find_spec('datalad') is not None + + +@dataclass +class S3Manager: + http_root: str + + def install(self, path: Path, overwrite: bool, timeout: int): + from ._s3 import update + + update(path, local=True, overwrite=overwrite, silent=True, timeout=timeout) + + def update(self, path: Path, local: bool, overwrite: bool, silent: bool, timeout: int) -> bool: + from ._s3 import update as _update_s3 + + return _update_s3(path, local=local, overwrite=overwrite, silent=silent, timeout=timeout) + + def wipe(self, path: Path): + from shutil import rmtree + + def _onerror(func, path, excinfo): + from pathlib import Path + + if Path(path).exists(): + print(f'Warning: could not delete <{path}>, please clear the cache manually.') + + rmtree(path, onerror=_onerror) + + +@dataclass +class DataladManager: + source: str + + def install(self, path: Path, overwrite: bool, timeout: int): + from datalad.api import install + + install(path=path, source=self.source, recursive=True) + + def update(self, path: Path, local: bool, overwrite: bool, silent: bool, timeout: int) -> bool: + from datalad.api import update + + print('Updating TEMPLATEFLOW_HOME using DataLad ...') + try: + update(dataset=path, recursive=True, merge=True) + except Exception as e: # noqa: BLE001 + warn( + f"Error updating TemplateFlow's home directory (using DataLad): {e}", + stacklevel=2, + ) + return False + return True + + def wipe(self, path: Path): + print('TemplateFlow is configured in DataLad mode, wipe() has no effect') + + +@dataclass +class TemplateFlowCache: + config: CacheConfig + precached: bool = field(init=False) + manager: DataladManager | S3Manager = field(init=False) + + def __post_init__(self): + self.manager = ( + DataladManager(self.config.origin) + if self.config.use_datalad + else S3Manager(self.config.http_root) + ) + # cache.cached checks live, precached stores state at init + self.precached = self.cached + + @property + def cached(self) -> bool: + return self.config.root.is_dir() and any(self.config.root.iterdir()) + + @cached_property + def layout(self) -> Layout: + import re + + from bids.layout.index import BIDSLayoutIndexer + + self.ensure() + return Layout( + self.config.root, + validate=False, + config='templateflow', + indexer=BIDSLayoutIndexer( + validate=False, + ignore=(re.compile(r'scripts/'), re.compile(r'/\.'), re.compile(r'^\.')), + ), + ) + + def ensure(self): + if not self.cached: + self.manager.install( + self.config.root, overwrite=self.config.autoupdate, timeout=self.config.timeout + ) + + def update(self, local: bool = False, overwrite: bool = True, silent: bool = False): + if self.manager.update( + self.config.root, + local=local, + overwrite=overwrite, + silent=silent, + timeout=self.config.timeout, + ): + self.__dict__.pop('layout', None) # Uncache property + return True + return False + + def wipe(self): + self.__dict__.pop('layout', None) # Uncache property + self.manager.wipe(self.config.root) diff --git a/templateflow/conf/env.py b/templateflow/conf/env.py new file mode 100644 index 00000000..d4bd6155 --- /dev/null +++ b/templateflow/conf/env.py @@ -0,0 +1,37 @@ +import os +from functools import partial +from pathlib import Path +from typing import Callable + +from platformdirs import user_cache_dir + + +def _env_to_bool(envvar: str, default: bool) -> bool: + """Check for environment variable switches and convert to booleans.""" + switches = { + 'on': {'true', 'on', '1', 'yes', 'y'}, + 'off': {'false', 'off', '0', 'no', 'n'}, + } + + val = os.getenv(envvar, default) + if isinstance(val, str): + if val.lower() in switches['on']: + return True + elif val.lower() in switches['off']: + return False + else: + # TODO: Create templateflow logger + print( + f'{envvar} is set to unknown value <{val}>. ' + f'Falling back to default value <{default}>' + ) + return default + return bool(val) + + +def get_templateflow_home() -> Path: + return Path(os.getenv('TEMPLATEFLOW_HOME', user_cache_dir('templateflow'))).absolute() + + +def env_to_bool(envvar: str, default: bool) -> Callable[[], bool]: + return partial(_env_to_bool, envvar, default) From 6bf7af1480f3e09a0a4349ae3002c695eb182241 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Tue, 7 Oct 2025 16:21:51 -0400 Subject: [PATCH 02/20] chore: Add platformdirs to dependencies --- docs/environment.yml | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/environment.yml b/docs/environment.yml index 5d0f69ff..89a46f85 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -202,6 +202,7 @@ dependencies: - nibabel==3.2.2 - nipreps-versions==1.0.3 - pandas==1.4.2 + - platformdirs - pybids==0.15.2 - sqlalchemy==1.3.24 - hatchling diff --git a/pyproject.toml b/pyproject.toml index 6d06522f..1f749bb3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ license = {file = "LICENSE"} requires-python = ">=3.9" dependencies = [ "acres >= 0.5.0", + "platformdirs >= 4.0", "pybids >= 0.15.2", "requests", "tqdm", From aa13e0eeacf7d56e47d9e46a7d9c85f72410f7bb Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Wed, 8 Oct 2025 08:01:43 -0400 Subject: [PATCH 03/20] rf: Remove unnecessary global constants from _s3 --- templateflow/conf/_s3.py | 22 ++++++++------- templateflow/conf/bids.py | 3 ++- templateflow/tests/test_s3.py | 51 +++++++++++++++++++---------------- 3 files changed, 42 insertions(+), 34 deletions(-) diff --git a/templateflow/conf/_s3.py b/templateflow/conf/_s3.py index eda04909..9be194b6 100644 --- a/templateflow/conf/_s3.py +++ b/templateflow/conf/_s3.py @@ -25,34 +25,35 @@ from pathlib import Path from tempfile import mkstemp -from templateflow.conf import TF_GET_TIMEOUT, load_data +from acres import Loader + +load_data = Loader(__spec__.parent) TF_SKEL_URL = ( 'https://raw.githubusercontent.com/templateflow/python-client/' '{release}/templateflow/conf/templateflow-skel.{ext}' ).format -TF_SKEL_PATH = load_data('templateflow-skel.zip') -TF_SKEL_MD5 = load_data.readable('templateflow-skel.md5').read_text() -def update(dest, local=True, overwrite=True, silent=False): +def update(dest, local=True, overwrite=True, silent=False, *, timeout: int): """Update an S3-backed TEMPLATEFLOW_HOME repository.""" - skel_file = Path((_get_skeleton_file() if not local else None) or TF_SKEL_PATH) + skel_zip = load_data('templateflow-skel.zip') + skel_file = Path((_get_skeleton_file(timeout) if not local else None) or skel_zip) retval = _update_skeleton(skel_file, dest, overwrite=overwrite, silent=silent) - if skel_file != TF_SKEL_PATH: + if skel_file != skel_zip: skel_file.unlink() return retval -def _get_skeleton_file(): +def _get_skeleton_file(timeout: int): import requests try: r = requests.get( TF_SKEL_URL(release='master', ext='md5'), allow_redirects=True, - timeout=TF_GET_TIMEOUT, + timeout=timeout, ) except requests.exceptions.ConnectionError: return @@ -60,11 +61,12 @@ def _get_skeleton_file(): if not r.ok: return - if r.content.decode().split()[0] != TF_SKEL_MD5: + md5 = load_data.readable('templateflow-skel.md5').read_bytes() + if r.content != md5: r = requests.get( TF_SKEL_URL(release='master', ext='zip'), allow_redirects=True, - timeout=TF_GET_TIMEOUT, + timeout=timeout, ) if r.ok: from os import close diff --git a/templateflow/conf/bids.py b/templateflow/conf/bids.py index a143e4de..46108669 100644 --- a/templateflow/conf/bids.py +++ b/templateflow/conf/bids.py @@ -22,9 +22,10 @@ # """Extending pyBIDS for querying TemplateFlow.""" +from acres import Loader from bids.layout import BIDSLayout, add_config_paths -from templateflow.conf import load_data +load_data = Loader(__spec__.parent) add_config_paths(templateflow=load_data('config.json')) diff --git a/templateflow/tests/test_s3.py b/templateflow/tests/test_s3.py index 8c904e23..66d54669 100644 --- a/templateflow/tests/test_s3.py +++ b/templateflow/tests/test_s3.py @@ -44,31 +44,36 @@ def test_get_skel_file(tmp_path, monkeypatch): # First execution, the S3 stub is created (or datalad install) reload(tfc) - local_md5 = tfc._s3.TF_SKEL_MD5 - monkeypatch.setattr(tfc._s3, 'TF_SKEL_MD5', 'invent') - new_skel = tfc._s3._get_skeleton_file() + md5content = b'anything' + + def mock_get(*args, **kwargs): + class MockResponse: + status_code = 200 + ok = True + content = md5content + + return MockResponse() + + monkeypatch.setattr(requests, 'get', mock_get) + + # Mismatching the local MD5 causes an update + new_skel = tfc._s3._get_skeleton_file(timeout=10) assert new_skel is not None assert Path(new_skel).exists() - assert Path(new_skel).stat().st_size > 0 + assert Path(new_skel).read_bytes() == b'anything' - latest_md5 = ( - requests.get( - tfc._s3.TF_SKEL_URL(release='master', ext='md5', allow_redirects=True), timeout=10 - ) - .content.decode() - .split()[0] - ) - monkeypatch.setattr(tfc._s3, 'TF_SKEL_MD5', latest_md5) - assert tfc._s3._get_skeleton_file() is None + md5content = tfc._s3.load_data.readable('templateflow-skel.md5').read_bytes() + # Matching the local MD5 skips the update + assert tfc._s3._get_skeleton_file(timeout=10) is None - monkeypatch.setattr(tfc._s3, 'TF_SKEL_MD5', local_md5) + # Bad URL fails to update monkeypatch.setattr(tfc._s3, 'TF_SKEL_URL', 'http://weird/{release}/{ext}'.format) - assert tfc._s3._get_skeleton_file() is None + assert tfc._s3._get_skeleton_file(timeout=10) is None monkeypatch.setattr( tfc._s3, 'TF_SKEL_URL', tfc._s3.TF_SKEL_URL(release='{release}', ext='{ext}z').format ) - assert tfc._s3._get_skeleton_file() is None + assert tfc._s3._get_skeleton_file(timeout=10) is None def test_update_s3(tmp_path, monkeypatch): @@ -78,19 +83,19 @@ def test_update_s3(tmp_path, monkeypatch): monkeypatch.setenv('TEMPLATEFLOW_USE_DATALAD', 'off') monkeypatch.setenv('TEMPLATEFLOW_HOME', str(newhome)) - assert tfc._s3.update(newhome) - assert not tfc._s3.update(newhome, overwrite=False) + assert tfc._s3.update(newhome, timeout=10) + assert not tfc._s3.update(newhome, overwrite=False, timeout=10) for p in (newhome / 'tpl-MNI152NLin6Sym').glob('*.nii.gz'): p.unlink() - assert tfc._s3.update(newhome, overwrite=False) + assert tfc._s3.update(newhome, overwrite=False, timeout=10) # This should cover the remote zip file fetching - monkeypatch.setattr(tfc._s3, 'TF_SKEL_MD5', 'invent') - assert tfc._s3.update(newhome, local=False) - assert not tfc._s3.update(newhome, local=False, overwrite=False) + # monkeypatch.setattr(tfc._s3, 'TF_SKEL_MD5', 'invent') + assert tfc._s3.update(newhome, local=False, timeout=10) + assert not tfc._s3.update(newhome, local=False, overwrite=False, timeout=10) for p in (newhome / 'tpl-MNI152NLin6Sym').glob('*.nii.gz'): p.unlink() - assert tfc._s3.update(newhome, local=False, overwrite=False) + assert tfc._s3.update(newhome, local=False, overwrite=False, timeout=10) def mock_get(*args, **kwargs): From c8c8ad4086595909efbdc5ce03b2e75a321b7ab4 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Wed, 8 Oct 2025 08:02:45 -0400 Subject: [PATCH 04/20] rf: Use client throughout, make tf.api one auto-instance --- templateflow/api.py | 403 ++-------------------------------- templateflow/cli.py | 35 +-- templateflow/conf/__init__.py | 203 ++++------------- templateflow/tests/test_s3.py | 31 ++- 4 files changed, 94 insertions(+), 578 deletions(-) diff --git a/templateflow/api.py b/templateflow/api.py index a193e2e6..15829591 100644 --- a/templateflow/api.py +++ b/templateflow/api.py @@ -22,396 +22,23 @@ # """TemplateFlow's Python Client.""" -import sys -from json import loads -from pathlib import Path +__all__ = [ + 'get', + 'ls', + 'get_metadata', + 'get_citations', +] -from bids.layout import Query +from .client import TemplateFlowClient +from .conf import _cache -from templateflow.conf import ( - TF_GET_TIMEOUT, - TF_LAYOUT, - TF_S3_ROOT, - TF_USE_DATALAD, - requires_layout, -) +_client = TemplateFlowClient(cache=_cache) -_layout_dir = tuple(item for item in dir(TF_LAYOUT) if item.startswith('get_')) - - -@requires_layout -def ls(template, **kwargs): - """ - List files pertaining to one or more templates. - - Parameters - ---------- - template : str - A template identifier (e.g., ``MNI152NLin2009cAsym``). - - Keyword Arguments - ----------------- - resolution: int or None - Index to an specific spatial resolution of the template. - suffix : str or None - BIDS suffix - atlas : str or None - Name of a particular atlas - hemi : str or None - Hemisphere - space : str or None - Space template is mapped to - density : str or None - Surface density - desc : str or None - Description field - - Examples - -------- - >>> ls('MNI152Lin', resolution=1, suffix='T1w', desc=None) # doctest: +ELLIPSIS - [PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz')] - - >>> ls('MNI152Lin', resolution=2, suffix='T1w', desc=None) # doctest: +ELLIPSIS - [PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz')] - - >>> ls('MNI152Lin', suffix='T1w', desc=None) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE - [PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz'), - PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz')] - - >>> ls('fsLR', space=None, hemi='L', - ... density='32k', suffix='sphere') # doctest: +ELLIPSIS - [PosixPath('.../tpl-fsLR_hemi-L_den-32k_sphere.surf.gii')] - - >>> ls('fsLR', space='madeup') - [] - - """ - # Normalize extensions to always have leading dot - if 'extension' in kwargs: - kwargs['extension'] = _normalize_ext(kwargs['extension']) - - return [ - Path(p) - for p in TF_LAYOUT.get( - template=Query.ANY if template is None else template, return_type='file', **kwargs - ) - ] - - -@requires_layout -def get(template, raise_empty=False, **kwargs): - """ - Pull files pertaining to one or more templates down. - - Parameters - ---------- - template : str - A template identifier (e.g., ``MNI152NLin2009cAsym``). - raise_empty : bool, optional - Raise exception if no files were matched - - Keyword Arguments - ----------------- - resolution: int or None - Index to an specific spatial resolution of the template. - suffix : str or None - BIDS suffix - atlas : str or None - Name of a particular atlas - hemi : str or None - Hemisphere - space : str or None - Space template is mapped to - density : str or None - Surface density - desc : str or None - Description field - - Examples - -------- - >>> str(get('MNI152Lin', resolution=1, suffix='T1w', desc=None)) # doctest: +ELLIPSIS - '.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz' - - >>> str(get('MNI152Lin', resolution=2, suffix='T1w', desc=None)) # doctest: +ELLIPSIS - '.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz' - - >>> [str(p) for p in get( - ... 'MNI152Lin', suffix='T1w', desc=None)] # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE - ['.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz', - '.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz'] - - >>> str(get('fsLR', space=None, hemi='L', - ... density='32k', suffix='sphere')) # doctest: +ELLIPSIS - '.../tpl-fsLR_hemi-L_den-32k_sphere.surf.gii' - - >>> get('fsLR', space='madeup') - [] - - >>> get('fsLR', raise_empty=True, space='madeup') # doctest: +IGNORE_EXCEPTION_DETAIL - Traceback (most recent call last): - Exception: - ... - - """ - # List files available - out_file = ls(template, **kwargs) - - if raise_empty and not out_file: - raise Exception('No results found') - - # Truncate possible S3 error files from previous attempts - _truncate_s3_errors(out_file) - - # Try DataLad first - dl_missing = [p for p in out_file if not p.is_file()] - if TF_USE_DATALAD and dl_missing: - for filepath in dl_missing: - _datalad_get(filepath) - dl_missing.remove(filepath) - - # Fall-back to S3 if some files are still missing - s3_missing = [p for p in out_file if p.is_file() and p.stat().st_size == 0] - for filepath in s3_missing + dl_missing: - _s3_get(filepath) - - not_fetched = [str(p) for p in out_file if not p.is_file() or p.stat().st_size == 0] - - if not_fetched: - msg = 'Could not fetch template files: {}.'.format(', '.join(not_fetched)) - if dl_missing and not TF_USE_DATALAD: - msg += f"""\ -The $TEMPLATEFLOW_HOME folder {TF_LAYOUT.root} seems to contain an initiated DataLad \ -dataset, but the environment variable $TEMPLATEFLOW_USE_DATALAD is not \ -set or set to one of (false, off, 0). Please set $TEMPLATEFLOW_USE_DATALAD \ -on (possible values: true, on, 1).""" - - if s3_missing and TF_USE_DATALAD: - msg += f"""\ -The $TEMPLATEFLOW_HOME folder {TF_LAYOUT.root} seems to contain an plain \ -dataset, but the environment variable $TEMPLATEFLOW_USE_DATALAD is \ -set to one of (true, on, 1). Please set $TEMPLATEFLOW_USE_DATALAD \ -off (possible values: false, off, 0).""" - - raise RuntimeError(msg) - - if len(out_file) == 1: - return out_file[0] - return out_file - - -@requires_layout -def templates(**kwargs): - """ - Return a list of available templates. - - Keyword Arguments - ----------------- - resolution: int or None - Index to an specific spatial resolution of the template. - suffix : str or None - BIDS suffix - atlas : str - Name of a particular atlas - desc : str - Description field - - Examples - -------- - >>> base = ['MNI152Lin', 'MNI152NLin2009cAsym', 'NKI', 'OASIS30ANTs'] - >>> tpls = templates() - >>> all([t in tpls for t in base]) - True - - >>> sorted(set(base).intersection(templates(suffix='PD'))) - ['MNI152Lin', 'MNI152NLin2009cAsym'] - - """ - return sorted(TF_LAYOUT.get_templates(**kwargs)) - - -@requires_layout -def get_metadata(template): - """ - Fetch one file from one template. - - Parameters - ---------- - template : str - A template identifier (e.g., ``MNI152NLin2009cAsym``). - - Examples - -------- - >>> get_metadata('MNI152Lin')['Name'] - 'Linear ICBM Average Brain (ICBM152) Stereotaxic Registration Model' - - """ - tf_home = Path(TF_LAYOUT.root) - filepath = tf_home / (f'tpl-{template}') / 'template_description.json' - - # Ensure that template is installed and file is available - if not filepath.is_file(): - _datalad_get(filepath) - return loads(filepath.read_text()) - - -def get_citations(template, bibtex=False): - """ - Fetch template citations - - Parameters - ---------- - template : :obj:`str` - A template identifier (e.g., ``MNI152NLin2009cAsym``). - bibtex : :obj:`bool`, optional - Generate citations in BibTeX format. - - """ - data = get_metadata(template) - refs = data.get('ReferencesAndLinks', []) - if isinstance(refs, dict): - refs = list(refs.values()) - - if not bibtex: - return refs - - return [_to_bibtex(ref, template, idx).rstrip() for idx, ref in enumerate(refs, 1)] - - -@requires_layout -def __getattr__(key: str): - key = key.replace('ls_', 'get_') - if ( - key.startswith('get_') - and key not in ('get_metadata', 'get_citations') - and key not in _layout_dir - ): - return TF_LAYOUT.__getattr__(key) - - # Spit out default message if we get this far - raise AttributeError(f"module '{__name__}' has no attribute '{key}'") - - -def _datalad_get(filepath): - if not filepath: - return - - from datalad import api - from datalad.support.exceptions import IncompleteResultsError +def __getattr__(name: str): + if name == 'TF_LAYOUT': + return _cache.layout try: - api.get(filepath, dataset=str(TF_LAYOUT.root)) - except IncompleteResultsError as exc: - if exc.failed[0]['message'] == 'path not associated with any dataset': - from .conf import TF_GITHUB_SOURCE - - api.install(path=TF_LAYOUT.root, source=TF_GITHUB_SOURCE, recursive=True) - api.get(filepath, dataset=str(TF_LAYOUT.root)) - else: - raise - - -def _s3_get(filepath): - from sys import stderr - from urllib.parse import quote - - import requests - from tqdm import tqdm - - path = quote(filepath.relative_to(TF_LAYOUT.root).as_posix()) - url = f'{TF_S3_ROOT}/{path}' - - print(f'Downloading {url}', file=stderr) - # Streaming, so we can iterate over the response. - r = requests.get(url, stream=True, timeout=TF_GET_TIMEOUT) - if r.status_code != 200: - raise RuntimeError(f'Failed to download {url} with status code {r.status_code}') - - # Total size in bytes. - total_size = int(r.headers.get('content-length', 0)) - block_size = 1024 - wrote = 0 - if not filepath.is_file(): - filepath.unlink() - - with filepath.open('wb') as f: - with tqdm(total=total_size, unit='B', unit_scale=True) as t: - for data in r.iter_content(block_size): - wrote = wrote + len(data) - f.write(data) - t.update(len(data)) - - if total_size != 0 and wrote != total_size: - raise RuntimeError('ERROR, something went wrong') - - -def _to_bibtex(doi, template, idx): - if 'doi.org' not in doi: - return doi - - # Is a DOI URL - import requests - - response = requests.post( - doi, - headers={'Accept': 'application/x-bibtex; charset=utf-8'}, - timeout=TF_GET_TIMEOUT, - ) - if not response.ok: - print( - f'Failed to convert DOI <{doi}> to bibtex, returning URL.', - file=sys.stderr, - ) - return doi - - # doi.org may not honor requested charset, to safeguard force a bytestream with - # response.content, then decode into UTF-8. - bibtex = response.content.decode() - - # doi.org / crossref may still point to the no longer preferred proxy service - return bibtex.replace('http://dx.doi.org/', 'https://doi.org/') - - -def _normalize_ext(value): - """ - Normalize extensions to have a leading dot. - - Examples - -------- - >>> _normalize_ext(".nii.gz") - '.nii.gz' - >>> _normalize_ext("nii.gz") - '.nii.gz' - >>> _normalize_ext(("nii", ".nii.gz")) - ['.nii', '.nii.gz'] - >>> _normalize_ext(("", ".nii.gz")) - ['', '.nii.gz'] - >>> _normalize_ext((None, ".nii.gz")) - [None, '.nii.gz'] - >>> _normalize_ext([]) - [] - - """ - - if not value: - return value - - if isinstance(value, str): - return f'{"" if value.startswith(".") else "."}{value}' - return [_normalize_ext(v) for v in value] - - -def _truncate_s3_errors(filepaths): - """ - Truncate XML error bodies saved by previous versions of TemplateFlow. - - Parameters - ---------- - filepaths : list of Path - List of file paths to check and truncate if necessary. - """ - for filepath in filepaths: - if filepath.is_file(follow_symlinks=False) and 0 < filepath.stat().st_size < 1024: - with open(filepath, 'rb') as f: - content = f.read(100) - if content.startswith(b'' in content: - filepath.write_bytes(b'') # Truncate file to zero bytes + return getattr(_client, name) + except AttributeError: + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") from None diff --git a/templateflow/cli.py b/templateflow/cli.py index f1a2c8a1..08b1da23 100644 --- a/templateflow/cli.py +++ b/templateflow/cli.py @@ -31,10 +31,9 @@ from acres import Loader as _Loader from click.decorators import FC, Option, _param_memo -from templateflow import __package__, api -from templateflow.conf import TF_AUTOUPDATE, TF_HOME, TF_USE_DATALAD +from templateflow.client import TemplateFlowClient -load_data = _Loader(__package__) +load_data = _Loader(__spec__.parent) ENTITY_SHORTHANDS = { # 'template': ('--tpl', '-t'), @@ -48,7 +47,13 @@ 'segmentation': ('--seg',), } ENTITY_EXCLUDE = {'template', 'description'} -TEMPLATE_LIST = api.get_templates() + +CLIENT = TemplateFlowClient() +CACHE = CLIENT.cache +CONFIG = CACHE.config +CACHE.ensure() + +TEMPLATE_LIST = [d.name[4:] for d in CONFIG.root.iterdir() if d.name.startswith('tpl-')] def _nulls(s): @@ -86,30 +91,30 @@ def config(): """Print-out configuration.""" click.echo(f"""Current TemplateFlow settings: - TEMPLATEFLOW_HOME={TF_HOME} - TEMPLATEFLOW_USE_DATALAD={'on' if TF_USE_DATALAD else 'off'} - TEMPLATEFLOW_AUTOUPDATE={'on' if TF_AUTOUPDATE else 'off'} + TEMPLATEFLOW_HOME={CONFIG.root} + TEMPLATEFLOW_USE_DATALAD={'on' if CONFIG.use_datalad else 'off'} + TEMPLATEFLOW_AUTOUPDATE={'on' if CONFIG.autoupdate else 'off'} """) @main.command() def wipe(): """Wipe out a local S3 (direct-download) TemplateFlow Archive.""" - click.echo(f'This will wipe out all data downloaded into {TF_HOME}.') + click.echo(f'This will wipe out all data downloaded into {CONFIG.root}.') if click.confirm('Do you want to continue?'): value = click.prompt( - f'Please write the path of your local archive ({TF_HOME})', + f'Please write the path of your local archive ({CONFIG.root})', default='(abort)', show_default=False, ) - if value.strip() == str(TF_HOME): + if value.strip() == str(CONFIG.root): from templateflow.conf import wipe wipe() - click.echo(f'{TF_HOME} was wiped out.') + click.echo(f'{CONFIG.root} was wiped out.') return - click.echo(f'Aborted! {TF_HOME} WAS NOT wiped out.') + click.echo(f'Aborted! {CONFIG.root} WAS NOT wiped out.') @main.command() @@ -120,7 +125,7 @@ def update(local, overwrite): from templateflow.conf import update as _update click.echo( - f'Successfully updated local TemplateFlow Archive: {TF_HOME}.' + f'Successfully updated local TemplateFlow Archive: {CONFIG.root}.' if _update(local=local, overwrite=overwrite) else 'TemplateFlow Archive not updated.' ) @@ -132,7 +137,7 @@ def update(local, overwrite): def ls(template, **kwargs): """List the assets corresponding to template and optional filters.""" entities = {k: _nulls(v) for k, v in kwargs.items() if v != ''} - click.echo('\n'.join(f'{match}' for match in api.ls(template, **entities))) + click.echo('\n'.join(f'{match}' for match in CLIENT.ls(template, **entities))) @main.command() @@ -141,7 +146,7 @@ def ls(template, **kwargs): def get(template, **kwargs): """Fetch the assets corresponding to template and optional filters.""" entities = {k: _nulls(v) for k, v in kwargs.items() if v != ''} - paths = api.get(template, **entities) + paths = CLIENT.get(template, **entities) filenames = [str(paths)] if isinstance(paths, Path) else [str(file) for file in paths] click.echo('\n'.join(filenames)) diff --git a/templateflow/conf/__init__.py b/templateflow/conf/__init__.py index ab045240..488afaf1 100644 --- a/templateflow/conf/__init__.py +++ b/templateflow/conf/__init__.py @@ -1,81 +1,52 @@ """Configuration and settings.""" -import re -from contextlib import suppress from functools import wraps -from os import getenv -from pathlib import Path from warnings import warn from acres import Loader -load_data = Loader(__spec__.name) +from .cache import CacheConfig, TemplateFlowCache +from .env import _env_to_bool +load_data = Loader(__spec__.name) -def _env_to_bool(envvar: str, default: bool) -> bool: - """Check for environment variable switches and convert to booleans.""" - switches = { - 'on': {'true', 'on', '1', 'yes', 'y'}, - 'off': {'false', 'off', '0', 'no', 'n'}, - } - - val = getenv(envvar, default) - if isinstance(val, str): - if val.lower() in switches['on']: - return True - elif val.lower() in switches['off']: - return False - else: - # TODO: Create templateflow logger - print( - f'{envvar} is set to unknown value <{val}>. ' - f'Falling back to default value <{default}>' - ) - return default - return bool(val) - - -TF_DEFAULT_HOME = Path.home() / '.cache' / 'templateflow' -TF_HOME = Path(getenv('TEMPLATEFLOW_HOME', str(TF_DEFAULT_HOME))).absolute() -TF_GITHUB_SOURCE = 'https://github.com/templateflow/templateflow.git' -TF_S3_ROOT = 'https://templateflow.s3.amazonaws.com' -TF_USE_DATALAD = _env_to_bool('TEMPLATEFLOW_USE_DATALAD', False) -TF_AUTOUPDATE = _env_to_bool('TEMPLATEFLOW_AUTOUPDATE', True) -TF_CACHED = True -TF_GET_TIMEOUT = 10 - -if TF_USE_DATALAD: - try: - from datalad.api import install - except ImportError: - warn('DataLad is not installed ➔ disabled.', stacklevel=2) - TF_USE_DATALAD = False - -if not TF_USE_DATALAD: - from templateflow.conf._s3 import update as _update_s3 - - -def _init_cache(): - global TF_CACHED - - if not TF_HOME.exists() or not list(TF_HOME.iterdir()): - TF_CACHED = False - warn( - f"""\ -TemplateFlow: repository not found at <{TF_HOME}>. Populating a new TemplateFlow stub. +_cache = TemplateFlowCache(config=CacheConfig()) + + +def __getattr__(name: str): + if name == 'TF_HOME': + return _cache.config.root + elif name == 'TF_GITHUB_SOURCE': + return _cache.config.origin + elif name == 'TF_S3_ROOT': + return _cache.config.http_root + elif name == 'TF_USE_DATALAD': + return _cache.config.use_datalad + elif name == 'TF_AUTOUPDATE': + return _cache.config.autoupdate + elif name == 'TF_CACHED': + return _cache.precached + elif name == 'TF_GET_TIMEOUT': + return _cache.config.timeout + elif name == 'TF_LAYOUT': + return _cache.layout + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") + + +if _env_to_bool('TEMPLATEFLOW_USE_DATALAD', False) and not _cache.config.use_datalad: + warn('DataLad is not installed ➔ disabled.', stacklevel=2) + + +if not _cache.precached: + warn( + f"""\ +TemplateFlow: repository not found at <{_cache.config.root}>. Populating a new TemplateFlow stub. If the path reported above is not the desired location for TemplateFlow, \ please set the TEMPLATEFLOW_HOME environment variable.""", - ResourceWarning, - stacklevel=2, - ) - if TF_USE_DATALAD: - TF_HOME.parent.mkdir(exist_ok=True, parents=True) - install(path=str(TF_HOME), source=TF_GITHUB_SOURCE, recursive=True) - else: - _update_s3(TF_HOME, local=True, overwrite=TF_AUTOUPDATE, silent=True) - - -_init_cache() + ResourceWarning, + stacklevel=2, + ) + _cache.ensure() def requires_layout(func): @@ -83,9 +54,7 @@ def requires_layout(func): @wraps(func) def wrapper(*args, **kwargs): - from templateflow.conf import TF_LAYOUT - - if TF_LAYOUT is None: + if _cache.layout is None: from bids import __version__ raise RuntimeError(f'A layout with PyBIDS <{__version__}> could not be initiated') @@ -94,101 +63,17 @@ def wrapper(*args, **kwargs): return wrapper -def update(local=False, overwrite=True, silent=False): - """Update an existing DataLad or S3 home.""" - if TF_USE_DATALAD: - success = _update_datalad() - else: - from ._s3 import update as _update_s3 - - success = _update_s3(TF_HOME, local=local, overwrite=overwrite, silent=silent) - - # update Layout only if necessary - if success and TF_LAYOUT is not None: - init_layout() - # ensure the api uses the updated layout - import importlib - - from .. import api - - importlib.reload(api) - return success - - -def wipe(): - """Clear the cache if functioning in S3 mode.""" - - if TF_USE_DATALAD: - print('TemplateFlow is configured in DataLad mode, wipe() has no effect') - return - - import importlib - from shutil import rmtree - - from templateflow import api - - def _onerror(func, path, excinfo): - from pathlib import Path - - if Path(path).exists(): - print(f'Warning: could not delete <{path}>, please clear the cache manually.') - - rmtree(TF_HOME, onerror=_onerror) - _init_cache() - - importlib.reload(api) +update = _cache.update +wipe = _cache.wipe def setup_home(force=False): """Initialize/update TF's home if necessary.""" - if not force and not TF_CACHED: + if not force and not _cache.precached: print( f"""\ -TemplateFlow was not cached (TEMPLATEFLOW_HOME={TF_HOME}), \ +TemplateFlow was not cached (TEMPLATEFLOW_HOME={_cache.config.root}), \ a fresh initialization was done.""" ) return False - return update(local=True, overwrite=False) - - -def _update_datalad(): - from datalad.api import update - - print('Updating TEMPLATEFLOW_HOME using DataLad ...') - try: - update(dataset=str(TF_HOME), recursive=True, merge=True) - except Exception as e: # noqa: BLE001 - warn( - f"Error updating TemplateFlow's home directory (using DataLad): {e}", - stacklevel=2, - ) - return False - return True - - -TF_LAYOUT = None - - -def init_layout(): - from bids.layout.index import BIDSLayoutIndexer - - from templateflow.conf.bids import Layout - - global TF_LAYOUT - TF_LAYOUT = Layout( - TF_HOME, - validate=False, - config='templateflow', - indexer=BIDSLayoutIndexer( - validate=False, - ignore=( - re.compile(r'scripts/'), - re.compile(r'/\.'), - re.compile(r'^\.'), - ), - ), - ) - - -with suppress(ImportError): - init_layout() + return _cache.update(local=True, overwrite=False) diff --git a/templateflow/tests/test_s3.py b/templateflow/tests/test_s3.py index 66d54669..0dac396a 100644 --- a/templateflow/tests/test_s3.py +++ b/templateflow/tests/test_s3.py @@ -28,6 +28,8 @@ import pytest import requests +import templateflow +import templateflow.conf._s3 from templateflow import api as tf from templateflow import conf as tfc @@ -113,28 +115,25 @@ def test_s3_400_error(monkeypatch): monkeypatch.setattr(requests, 'get', mock_get) with pytest.raises(RuntimeError, match=r'Failed to download .* code 400'): - tf._s3_get( + templateflow.client._s3_get( + tfc._cache.config, Path(tfc.TF_LAYOUT.root) - / 'tpl-MNI152NLin2009cAsym/tpl-MNI152NLin2009cAsym_res-02_T1w.nii.gz' + / 'tpl-MNI152NLin2009cAsym/tpl-MNI152NLin2009cAsym_res-02_T1w.nii.gz', ) def test_bad_skeleton(tmp_path, monkeypatch): newhome = (tmp_path / 's3-update').resolve() - monkeypatch.setattr(tfc, 'TF_USE_DATALAD', False) - monkeypatch.setattr(tfc, 'TF_HOME', newhome) - monkeypatch.setattr(tfc, 'TF_LAYOUT', None) - - tfc._init_cache() - tfc.init_layout() - - assert tfc.TF_LAYOUT is not None - assert tfc.TF_LAYOUT.root == str(newhome) + client = templateflow.client.TemplateFlowClient( + config=tfc.cache.CacheConfig( + root=newhome, + use_datalad=False, + ) + ) - # Instead of reloading - monkeypatch.setattr(tf, 'TF_LAYOUT', tfc.TF_LAYOUT) + assert client.cache.layout.root == str(newhome) - paths = tf.ls('MNI152NLin2009cAsym', resolution='02', suffix='T1w', desc=None) + paths = client.ls('MNI152NLin2009cAsym', resolution='02', suffix='T1w', desc=None) assert paths path = Path(paths[0]) assert path.read_bytes() == b'' @@ -143,14 +142,14 @@ def test_bad_skeleton(tmp_path, monkeypatch): path.write_bytes(error_file.read_bytes()) # Test directly before testing through API paths - tf._truncate_s3_errors(paths) + templateflow.client._truncate_s3_errors(paths) assert path.read_bytes() == b'' path.write_bytes(error_file.read_bytes()) monkeypatch.setattr(requests, 'get', mock_get) with pytest.raises(RuntimeError): - tf.get('MNI152NLin2009cAsym', resolution='02', suffix='T1w', desc=None) + client.get('MNI152NLin2009cAsym', resolution='02', suffix='T1w', desc=None) # Running get clears bad files before attempting to download assert path.read_bytes() == b'' From b57b031f685d19c220f77aae933e5a44d8d6e5d6 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Wed, 8 Oct 2025 13:21:47 -0500 Subject: [PATCH 05/20] test: monkeypatch cache --- templateflow/tests/test_conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templateflow/tests/test_conf.py b/templateflow/tests/test_conf.py index c30ea877..009fbcd2 100644 --- a/templateflow/tests/test_conf.py +++ b/templateflow/tests/test_conf.py @@ -156,7 +156,7 @@ def mock_import(name, globals=None, locals=None, fromlist=(), level=0): # noqa: return oldimport(name, globals=globals, locals=locals, fromlist=fromlist, level=level) with monkeypatch.context() as m: - m.setattr(tfc, 'TF_LAYOUT', None) + m.setattr(tfc._cache, 'layout', None) with pytest.raises(RuntimeError): myfunc() From 25045a454fc5efb1893ce79cc16d6eaf9b5f9856 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Thu, 9 Oct 2025 07:35:47 -0500 Subject: [PATCH 06/20] test: Update doctests --- .circleci/config.yml | 2 +- templateflow/client.py | 32 +++++++++++++++++++------------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index cd1217da..25c6840c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -109,7 +109,7 @@ jobs: export TEMPLATEFLOW_USE_DATALAD=on python -m pytest \ --junit-xml=~/tests/datalad.xml --cov templateflow --doctest-modules \ - templateflow/api.py + templateflow/client.py coverage run --append -m templateflow.cli config coverage run --append -m templateflow.cli ls MNI152NLin2009cAsym --suffix T1w diff --git a/templateflow/client.py b/templateflow/client.py index ac7fa4c7..3a260b43 100644 --- a/templateflow/client.py +++ b/templateflow/client.py @@ -79,21 +79,23 @@ def ls(self, template, **kwargs): Examples -------- - >>> ls('MNI152Lin', resolution=1, suffix='T1w', desc=None) # doctest: +ELLIPSIS + >>> client = TemplateFlowClient() + + >>> client.ls('MNI152Lin', resolution=1, suffix='T1w', desc=None) # doctest: +ELLIPSIS [PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz')] - >>> ls('MNI152Lin', resolution=2, suffix='T1w', desc=None) # doctest: +ELLIPSIS + >>> client.ls('MNI152Lin', resolution=2, suffix='T1w', desc=None) # doctest: +ELLIPSIS [PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz')] - >>> ls('MNI152Lin', suffix='T1w', desc=None) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + >>> client.ls('MNI152Lin', suffix='T1w', desc=None) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE [PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz'), PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz')] - >>> ls('fsLR', space=None, hemi='L', - ... density='32k', suffix='sphere') # doctest: +ELLIPSIS + >>> client.ls('fsLR', space=None, hemi='L', + ... density='32k', suffix='sphere') # doctest: +ELLIPSIS [PosixPath('.../tpl-fsLR_hemi-L_den-32k_sphere.surf.gii')] - >>> ls('fsLR', space='madeup') + >>> client.ls('fsLR', space='madeup') [] """ @@ -138,25 +140,27 @@ def get(self, template, raise_empty=False, **kwargs): Examples -------- - >>> str(get('MNI152Lin', resolution=1, suffix='T1w', desc=None)) # doctest: +ELLIPSIS + >>> client = TemplateFlowClient() + + >>> str(client.get('MNI152Lin', resolution=1, suffix='T1w', desc=None)) # doctest: +ELLIPSIS '.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz' - >>> str(get('MNI152Lin', resolution=2, suffix='T1w', desc=None)) # doctest: +ELLIPSIS + >>> str(client.get('MNI152Lin', resolution=2, suffix='T1w', desc=None)) # doctest: +ELLIPSIS '.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz' - >>> [str(p) for p in get( + >>> [str(p) for p in client.get( ... 'MNI152Lin', suffix='T1w', desc=None)] # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE ['.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz', '.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz'] - >>> str(get('fsLR', space=None, hemi='L', + >>> str(client.get('fsLR', space=None, hemi='L', ... density='32k', suffix='sphere')) # doctest: +ELLIPSIS '.../tpl-fsLR_hemi-L_den-32k_sphere.surf.gii' - >>> get('fsLR', space='madeup') + >>> client.get('fsLR', space='madeup') [] - >>> get('fsLR', raise_empty=True, space='madeup') # doctest: +IGNORE_EXCEPTION_DETAIL + >>> client.get('fsLR', raise_empty=True, space='madeup') # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): Exception: ... @@ -218,7 +222,9 @@ def get_metadata(self, template): Examples -------- - >>> get_metadata('MNI152Lin')['Name'] + >>> client = TemplateFlowClient() + + >>> client.get_metadata('MNI152Lin')['Name'] 'Linear ICBM Average Brain (ICBM152) Stereotaxic Registration Model' """ From 45e7a5b59af218c9629a0207b07c78d870a77048 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Thu, 9 Oct 2025 10:43:54 -0500 Subject: [PATCH 07/20] fix: Re-add client.templates() --- templateflow/client.py | 47 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/templateflow/client.py b/templateflow/client.py index 3a260b43..0aa4465c 100644 --- a/templateflow/client.py +++ b/templateflow/client.py @@ -79,7 +79,10 @@ def ls(self, template, **kwargs): Examples -------- - >>> client = TemplateFlowClient() + + .. testsetup:: + + >>> client = TemplateFlowClient() >>> client.ls('MNI152Lin', resolution=1, suffix='T1w', desc=None) # doctest: +ELLIPSIS [PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz')] @@ -140,7 +143,10 @@ def get(self, template, raise_empty=False, **kwargs): Examples -------- - >>> client = TemplateFlowClient() + + .. testsetup:: + + >>> client = TemplateFlowClient() >>> str(client.get('MNI152Lin', resolution=1, suffix='T1w', desc=None)) # doctest: +ELLIPSIS '.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz' @@ -211,6 +217,38 @@ def get(self, template, raise_empty=False, **kwargs): return out_file[0] return out_file + def templates(self, **kwargs): + """ + Return a list of available templates. + + Keyword Arguments + ----------------- + resolution: int or None + Index to an specific spatial resolution of the template. + suffix : str or None + BIDS suffix + atlas : str + Name of a particular atlas + desc : str + Description field + + Examples + -------- + + .. testsetup:: + + >>> client = TemplateFlowClient() + + >>> base = ['MNI152Lin', 'MNI152NLin2009cAsym', 'NKI', 'OASIS30ANTs'] + >>> tpls = client.templates() + >>> all([t in tpls for t in base]) + True + + >>> sorted(set(base).intersection(client.templates(suffix='PD'))) + ['MNI152Lin', 'MNI152NLin2009cAsym'] + """ + return sorted(self.get_templates(**kwargs)) + def get_metadata(self, template): """ Fetch one file from one template. @@ -222,7 +260,10 @@ def get_metadata(self, template): Examples -------- - >>> client = TemplateFlowClient() + + .. testsetup:: + + >>> client = TemplateFlowClient() >>> client.get_metadata('MNI152Lin')['Name'] 'Linear ICBM Average Brain (ICBM152) Stereotaxic Registration Model' From 4df98537d0af6a157ff137f465fe89d4c5f60f86 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Thu, 9 Oct 2025 10:50:54 -0500 Subject: [PATCH 08/20] doc/rf: Import Client class into root, update docs --- docs/api.rst | 1 + templateflow/__init__.py | 2 ++ templateflow/api.py | 32 ++++++++++++++++++++++++-------- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 4e1ca2d8..90f17651 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -5,5 +5,6 @@ Information on specific functions, classes, and methods. .. toctree:: api/templateflow.cli + api/templateflow.client api/templateflow.api api/templateflow.conf diff --git a/templateflow/__init__.py b/templateflow/__init__.py index ee150506..f407eaf8 100644 --- a/templateflow/__init__.py +++ b/templateflow/__init__.py @@ -40,12 +40,14 @@ del PackageNotFoundError from templateflow import api +from templateflow.client import TemplateFlowClient from templateflow.conf import update __all__ = [ '__copyright__', '__packagename__', '__version__', + 'TemplateFlowClient', 'api', 'update', ] diff --git a/templateflow/api.py b/templateflow/api.py index 15829591..3b0be03a 100644 --- a/templateflow/api.py +++ b/templateflow/api.py @@ -20,14 +20,30 @@ # # https://www.nipreps.org/community/licensing/ # -"""TemplateFlow's Python Client.""" - -__all__ = [ - 'get', - 'ls', - 'get_metadata', - 'get_citations', -] +"""TemplateFlow's Python Client. + +``templateflow.api`` provides a global, high-level interface to query the TemplateFlow archive. + +There are two methods to initialize a client: + + >>> from templateflow import api as client + + >>> from templateflow import TemplateFlowClient + >>> client = TemplateFlowClient() + +The latter method allows additional configuration for the client, +while ``templateflow.api`` is only configurable through environment variables. + +.. autofunction:: get + +.. autofunction:: ls + +.. autofunction:: templates + +.. autofunction:: get_metadata + +.. autofunction:: get_citations +""" from .client import TemplateFlowClient from .conf import _cache From b738afa49a1df7612fc0f28121ccfdd079a8ce7b Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Thu, 9 Oct 2025 11:44:22 -0500 Subject: [PATCH 09/20] rf: Improve TemplateFlowClient init signature --- templateflow/client.py | 19 +++++++++++++++---- templateflow/tests/test_s3.py | 7 +------ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/templateflow/client.py b/templateflow/client.py index 0aa4465c..b4b03384 100644 --- a/templateflow/client.py +++ b/templateflow/client.py @@ -24,6 +24,7 @@ from __future__ import annotations +import os import sys from json import loads from pathlib import Path @@ -34,11 +35,21 @@ class TemplateFlowClient: - def __init__(self, cache=None, config=None): + def __init__( + self, + root: os.PathLike[str] | str | None = None, + *, + cache: TemplateFlowCache | None = None, + **config_kwargs, + ): if cache is None: - if config is None: - config = CacheConfig() - cache = TemplateFlowCache(config) + if root: + config_kwargs['root'] = root + cache = TemplateFlowCache(CacheConfig(**config_kwargs)) + elif root or config_kwargs: + raise ValueError( + 'If `cache` is provided, `root` and other config kwargs cannot be used.' + ) self.cache = cache def __getattr__(self, name: str): diff --git a/templateflow/tests/test_s3.py b/templateflow/tests/test_s3.py index 0dac396a..7dca67b5 100644 --- a/templateflow/tests/test_s3.py +++ b/templateflow/tests/test_s3.py @@ -124,12 +124,7 @@ def test_s3_400_error(monkeypatch): def test_bad_skeleton(tmp_path, monkeypatch): newhome = (tmp_path / 's3-update').resolve() - client = templateflow.client.TemplateFlowClient( - config=tfc.cache.CacheConfig( - root=newhome, - use_datalad=False, - ) - ) + client = templateflow.client.TemplateFlowClient(root=newhome, use_datalad=False) assert client.cache.layout.root == str(newhome) From 7c805f1aa8f83a7b75ab6bae117f2027596cec69 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Thu, 9 Oct 2025 12:51:42 -0500 Subject: [PATCH 10/20] test: More cleanup --- templateflow/tests/test_conf.py | 17 ++++++++--------- templateflow/tests/test_s3.py | 7 ------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/templateflow/tests/test_conf.py b/templateflow/tests/test_conf.py index 009fbcd2..53ab9b81 100644 --- a/templateflow/tests/test_conf.py +++ b/templateflow/tests/test_conf.py @@ -23,12 +23,15 @@ """Tests the config module.""" from importlib import reload +from importlib.util import find_spec from shutil import rmtree import pytest from templateflow import conf as tfc +have_datalad = find_spec('datalad') is not None + def _find_message(lines, msg, reverse=True): if isinstance(lines, str): @@ -61,10 +64,6 @@ def test_conf_init(monkeypatch, tmp_path, use_datalad): def test_setup_home(monkeypatch, tmp_path, capsys, use_datalad): """Check the correct functioning of the installation hook.""" - if use_datalad == 'on': - # ImportError if not installed - pass - home = (tmp_path / f'setup-home-{use_datalad}').absolute() monkeypatch.setenv('TEMPLATEFLOW_USE_DATALAD', use_datalad) monkeypatch.setenv('TEMPLATEFLOW_HOME', str(home)) @@ -76,7 +75,7 @@ def test_setup_home(monkeypatch, tmp_path, capsys, use_datalad): reload(tfc) # Ensure mocks are up-to-date - assert tfc.TF_USE_DATALAD is (use_datalad == 'on') + assert tfc.TF_USE_DATALAD is (use_datalad == 'on' and have_datalad) assert str(tfc.TF_HOME) == str(home) # First execution, the S3 stub is created (or datalad install) assert tfc.TF_CACHED is False @@ -92,11 +91,11 @@ def test_setup_home(monkeypatch, tmp_path, capsys, use_datalad): out = capsys.readouterr()[0] assert _find_message(out, 'TemplateFlow was not cached') is False - if use_datalad == 'on': + if use_datalad == 'on' and have_datalad: assert _find_message(out, 'Updating TEMPLATEFLOW_HOME using DataLad') assert updated is True - elif use_datalad == 'off': + else: # At this point, S3 should be up-to-date assert updated is False assert _find_message(out, 'TEMPLATEFLOW_HOME directory (S3 type) was up-to-date.') @@ -114,11 +113,11 @@ def test_setup_home(monkeypatch, tmp_path, capsys, use_datalad): out = capsys.readouterr()[0] assert not _find_message(out, 'TemplateFlow was not cached') - if use_datalad == 'on': + if use_datalad == 'on' and have_datalad: assert _find_message(out, 'Updating TEMPLATEFLOW_HOME using DataLad') assert updated is True - elif use_datalad == 'off': + else: # At this point, S3 should be up-to-date assert updated is False assert _find_message(out, 'TEMPLATEFLOW_HOME directory (S3 type) was up-to-date.') diff --git a/templateflow/tests/test_s3.py b/templateflow/tests/test_s3.py index 7dca67b5..a6ae43dc 100644 --- a/templateflow/tests/test_s3.py +++ b/templateflow/tests/test_s3.py @@ -40,11 +40,6 @@ def test_get_skel_file(tmp_path, monkeypatch): """Exercise the skeleton file generation.""" home = (tmp_path / 's3-skel-file').resolve() - monkeypatch.setenv('TEMPLATEFLOW_USE_DATALAD', 'off') - monkeypatch.setenv('TEMPLATEFLOW_HOME', str(home)) - - # First execution, the S3 stub is created (or datalad install) - reload(tfc) md5content = b'anything' @@ -82,8 +77,6 @@ def test_update_s3(tmp_path, monkeypatch): """Exercise updating the S3 skeleton.""" newhome = (tmp_path / 's3-update').resolve() - monkeypatch.setenv('TEMPLATEFLOW_USE_DATALAD', 'off') - monkeypatch.setenv('TEMPLATEFLOW_HOME', str(newhome)) assert tfc._s3.update(newhome, timeout=10) assert not tfc._s3.update(newhome, overwrite=False, timeout=10) From 14b3c3295662fabddda10766515bff9e5cc4e0a8 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Thu, 9 Oct 2025 13:22:36 -0500 Subject: [PATCH 11/20] rename: http_root -> s3_root --- templateflow/client.py | 2 +- templateflow/conf/__init__.py | 2 +- templateflow/conf/cache.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/templateflow/client.py b/templateflow/client.py index b4b03384..e72f41a8 100644 --- a/templateflow/client.py +++ b/templateflow/client.py @@ -339,7 +339,7 @@ def _s3_get(config: CacheConfig, filepath: Path): from tqdm import tqdm path = quote(filepath.relative_to(config.root).as_posix()) - url = f'{config.http_root}/{path}' + url = f'{config.s3_root}/{path}' print(f'Downloading {url}', file=stderr) # Streaming, so we can iterate over the response. diff --git a/templateflow/conf/__init__.py b/templateflow/conf/__init__.py index 488afaf1..0756ca3d 100644 --- a/templateflow/conf/__init__.py +++ b/templateflow/conf/__init__.py @@ -19,7 +19,7 @@ def __getattr__(name: str): elif name == 'TF_GITHUB_SOURCE': return _cache.config.origin elif name == 'TF_S3_ROOT': - return _cache.config.http_root + return _cache.config.s3_root elif name == 'TF_USE_DATALAD': return _cache.config.use_datalad elif name == 'TF_AUTOUPDATE': diff --git a/templateflow/conf/cache.py b/templateflow/conf/cache.py index 3e757147..a73156c8 100644 --- a/templateflow/conf/cache.py +++ b/templateflow/conf/cache.py @@ -13,7 +13,7 @@ class CacheConfig: root: Path = field(default_factory=get_templateflow_home) origin: str = field(default='https://github.com/templateflow/templateflow.git') - http_root: str = field(default='https://templateflow.s3.amazonaws.com') + s3_root: str = field(default='https://templateflow.s3.amazonaws.com') use_datalad: bool = field(default_factory=env_to_bool('TEMPLATEFLOW_USE_DATALAD', False)) autoupdate: bool = field(default_factory=env_to_bool('TEMPLATEFLOW_AUTOUPDATE', True)) timeout: int = field(default=10) @@ -27,7 +27,7 @@ def __post_init__(self): @dataclass class S3Manager: - http_root: str + s3_root: str def install(self, path: Path, overwrite: bool, timeout: int): from ._s3 import update @@ -88,7 +88,7 @@ def __post_init__(self): self.manager = ( DataladManager(self.config.origin) if self.config.use_datalad - else S3Manager(self.config.http_root) + else S3Manager(self.config.s3_root) ) # cache.cached checks live, precached stores state at init self.precached = self.cached From c2547da55604f62067afdf9f78a8bcaad49252df Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Sun, 19 Oct 2025 11:32:52 -0400 Subject: [PATCH 12/20] rf: Defer pybids imports --- templateflow/client.py | 4 ++-- templateflow/conf/cache.py | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/templateflow/client.py b/templateflow/client.py index e72f41a8..36524288 100644 --- a/templateflow/client.py +++ b/templateflow/client.py @@ -29,8 +29,6 @@ from json import loads from pathlib import Path -from bids.layout import Query - from .conf.cache import CacheConfig, TemplateFlowCache @@ -113,6 +111,8 @@ def ls(self, template, **kwargs): [] """ + from bids.layout import Query + # Normalize extensions to always have leading dot if 'extension' in kwargs: kwargs['extension'] = _normalize_ext(kwargs['extension']) diff --git a/templateflow/conf/cache.py b/templateflow/conf/cache.py index a73156c8..30251abe 100644 --- a/templateflow/conf/cache.py +++ b/templateflow/conf/cache.py @@ -5,9 +5,12 @@ from pathlib import Path from warnings import warn -from .bids import Layout from .env import env_to_bool, get_templateflow_home +TYPE_CHECKING = False +if TYPE_CHECKING: + from bids.layout import BIDSLayout + @dataclass class CacheConfig: @@ -98,11 +101,13 @@ def cached(self) -> bool: return self.config.root.is_dir() and any(self.config.root.iterdir()) @cached_property - def layout(self) -> Layout: + def layout(self) -> BIDSLayout: import re from bids.layout.index import BIDSLayoutIndexer + from .bids import Layout + self.ensure() return Layout( self.config.root, From afb3587880c6dba7334305c075d16d14ba1c8bcf Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Sun, 19 Oct 2025 11:58:21 -0400 Subject: [PATCH 13/20] rf: Warn on all failed attempts to configure use_datalad --- templateflow/cli.py | 3 ++- templateflow/conf/__init__.py | 5 ----- templateflow/conf/cache.py | 24 +++++++++++++++++++----- templateflow/tests/test_conf.py | 2 +- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/templateflow/cli.py b/templateflow/cli.py index 08b1da23..3d479f78 100644 --- a/templateflow/cli.py +++ b/templateflow/cli.py @@ -32,6 +32,7 @@ from click.decorators import FC, Option, _param_memo from templateflow.client import TemplateFlowClient +from templateflow.conf import _cache load_data = _Loader(__spec__.parent) @@ -48,7 +49,7 @@ } ENTITY_EXCLUDE = {'template', 'description'} -CLIENT = TemplateFlowClient() +CLIENT = TemplateFlowClient(cache=_cache) CACHE = CLIENT.cache CONFIG = CACHE.config CACHE.ensure() diff --git a/templateflow/conf/__init__.py b/templateflow/conf/__init__.py index 0756ca3d..14ff35e8 100644 --- a/templateflow/conf/__init__.py +++ b/templateflow/conf/__init__.py @@ -6,7 +6,6 @@ from acres import Loader from .cache import CacheConfig, TemplateFlowCache -from .env import _env_to_bool load_data = Loader(__spec__.name) @@ -33,10 +32,6 @@ def __getattr__(name: str): raise AttributeError(f"module '{__name__}' has no attribute '{name}'") -if _env_to_bool('TEMPLATEFLOW_USE_DATALAD', False) and not _cache.config.use_datalad: - warn('DataLad is not installed ➔ disabled.', stacklevel=2) - - if not _cache.precached: warn( f"""\ diff --git a/templateflow/conf/cache.py b/templateflow/conf/cache.py index 30251abe..89db59f8 100644 --- a/templateflow/conf/cache.py +++ b/templateflow/conf/cache.py @@ -1,7 +1,7 @@ from __future__ import annotations from dataclasses import dataclass, field -from functools import cached_property +from functools import cache, cached_property from pathlib import Path from warnings import warn @@ -12,6 +12,19 @@ from bids.layout import BIDSLayout +# The first CacheConfig is initialized during import, so we need a higher +# level of indirection for warnings to point to the user code. +# After that, we will set the stack level to point to the CacheConfig() caller. +STACKLEVEL = 6 + + +@cache +def _have_datalad(): + import importlib.util + + return importlib.util.find_spec('datalad') is not None + + @dataclass class CacheConfig: root: Path = field(default_factory=get_templateflow_home) @@ -22,10 +35,11 @@ class CacheConfig: timeout: int = field(default=10) def __post_init__(self): - if self.use_datalad: - from importlib.util import find_spec - - self.use_datalad = find_spec('datalad') is not None + global STACKLEVEL + if self.use_datalad and not _have_datalad(): + self.use_datalad = False + warn('DataLad is not installed ➔ disabled.', stacklevel=STACKLEVEL) + STACKLEVEL = 3 @dataclass diff --git a/templateflow/tests/test_conf.py b/templateflow/tests/test_conf.py index 53ab9b81..59aeb86c 100644 --- a/templateflow/tests/test_conf.py +++ b/templateflow/tests/test_conf.py @@ -68,7 +68,7 @@ def test_setup_home(monkeypatch, tmp_path, capsys, use_datalad): monkeypatch.setenv('TEMPLATEFLOW_USE_DATALAD', use_datalad) monkeypatch.setenv('TEMPLATEFLOW_HOME', str(home)) - use_post = tfc._env_to_bool('TEMPLATEFLOW_USE_DATALAD', False) + use_post = tfc.env._env_to_bool('TEMPLATEFLOW_USE_DATALAD', False) assert use_post is (use_datalad == 'on') with capsys.disabled(): From bc449e4ba2ec8480dab48855df7d2e4db5174136 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Sun, 19 Oct 2025 12:06:53 -0400 Subject: [PATCH 14/20] typ: Update type annotations --- templateflow/client.py | 21 +++++++++------------ templateflow/conf/cache.py | 20 ++++++++++---------- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/templateflow/client.py b/templateflow/client.py index 36524288..0f4b9c3b 100644 --- a/templateflow/client.py +++ b/templateflow/client.py @@ -60,7 +60,7 @@ def __getattr__(self, name: str): msg = f"'{self.__class__.__name__}' object has no attribute '{name}'" raise AttributeError(msg) from None - def ls(self, template, **kwargs): + def ls(self, template, **kwargs) -> list[Path]: """ List files pertaining to one or more templates. @@ -124,7 +124,7 @@ def ls(self, template, **kwargs): ) ] - def get(self, template, raise_empty=False, **kwargs): + def get(self, template, raise_empty=False, **kwargs) -> list[Path]: """ Pull files pertaining to one or more templates down. @@ -228,7 +228,7 @@ def get(self, template, raise_empty=False, **kwargs): return out_file[0] return out_file - def templates(self, **kwargs): + def templates(self, **kwargs) -> list[str]: """ Return a list of available templates. @@ -260,7 +260,7 @@ def templates(self, **kwargs): """ return sorted(self.get_templates(**kwargs)) - def get_metadata(self, template): + def get_metadata(self, template) -> dict[str, str]: """ Fetch one file from one template. @@ -288,7 +288,7 @@ def get_metadata(self, template): _datalad_get(filepath) return loads(filepath.read_text()) - def get_citations(self, template, bibtex=False): + def get_citations(self, template, bibtex=False) -> list[str]: """ Fetch template citations @@ -308,13 +308,10 @@ def get_citations(self, template, bibtex=False): if not bibtex: return refs - return [ - _to_bibtex(ref, template, idx, self.cache.config.timeout).rstrip() - for idx, ref in enumerate(refs, 1) - ] + return [_to_bibtex(ref, template, self.cache.config.timeout).rstrip() for ref in refs] -def _datalad_get(config: CacheConfig, filepath: Path): +def _datalad_get(config: CacheConfig, filepath: Path) -> None: if not filepath: return @@ -331,7 +328,7 @@ def _datalad_get(config: CacheConfig, filepath: Path): raise -def _s3_get(config: CacheConfig, filepath: Path): +def _s3_get(config: CacheConfig, filepath: Path) -> None: from sys import stderr from urllib.parse import quote @@ -365,7 +362,7 @@ def _s3_get(config: CacheConfig, filepath: Path): raise RuntimeError('ERROR, something went wrong') -def _to_bibtex(doi, template, idx, timeout): +def _to_bibtex(doi: str, template: str, timeout: float) -> str: if 'doi.org' not in doi: return doi diff --git a/templateflow/conf/cache.py b/templateflow/conf/cache.py index 89db59f8..238405c3 100644 --- a/templateflow/conf/cache.py +++ b/templateflow/conf/cache.py @@ -19,7 +19,7 @@ @cache -def _have_datalad(): +def _have_datalad() -> bool: import importlib.util return importlib.util.find_spec('datalad') is not None @@ -34,7 +34,7 @@ class CacheConfig: autoupdate: bool = field(default_factory=env_to_bool('TEMPLATEFLOW_AUTOUPDATE', True)) timeout: int = field(default=10) - def __post_init__(self): + def __post_init__(self) -> None: global STACKLEVEL if self.use_datalad and not _have_datalad(): self.use_datalad = False @@ -46,7 +46,7 @@ def __post_init__(self): class S3Manager: s3_root: str - def install(self, path: Path, overwrite: bool, timeout: int): + def install(self, path: Path, overwrite: bool, timeout: int) -> None: from ._s3 import update update(path, local=True, overwrite=overwrite, silent=True, timeout=timeout) @@ -56,7 +56,7 @@ def update(self, path: Path, local: bool, overwrite: bool, silent: bool, timeout return _update_s3(path, local=local, overwrite=overwrite, silent=silent, timeout=timeout) - def wipe(self, path: Path): + def wipe(self, path: Path) -> None: from shutil import rmtree def _onerror(func, path, excinfo): @@ -72,7 +72,7 @@ def _onerror(func, path, excinfo): class DataladManager: source: str - def install(self, path: Path, overwrite: bool, timeout: int): + def install(self, path: Path, overwrite: bool, timeout: int) -> None: from datalad.api import install install(path=path, source=self.source, recursive=True) @@ -91,7 +91,7 @@ def update(self, path: Path, local: bool, overwrite: bool, silent: bool, timeout return False return True - def wipe(self, path: Path): + def wipe(self, path: Path) -> None: print('TemplateFlow is configured in DataLad mode, wipe() has no effect') @@ -101,7 +101,7 @@ class TemplateFlowCache: precached: bool = field(init=False) manager: DataladManager | S3Manager = field(init=False) - def __post_init__(self): + def __post_init__(self) -> None: self.manager = ( DataladManager(self.config.origin) if self.config.use_datalad @@ -133,13 +133,13 @@ def layout(self) -> BIDSLayout: ), ) - def ensure(self): + def ensure(self) -> None: if not self.cached: self.manager.install( self.config.root, overwrite=self.config.autoupdate, timeout=self.config.timeout ) - def update(self, local: bool = False, overwrite: bool = True, silent: bool = False): + def update(self, local: bool = False, overwrite: bool = True, silent: bool = False) -> bool: if self.manager.update( self.config.root, local=local, @@ -151,6 +151,6 @@ def update(self, local: bool = False, overwrite: bool = True, silent: bool = Fal return True return False - def wipe(self): + def wipe(self) -> None: self.__dict__.pop('layout', None) # Uncache property self.manager.wipe(self.config.root) From 03ca64e2db2d98e9f676909aa3dcb2022370a9b9 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Sun, 19 Oct 2025 12:11:52 -0400 Subject: [PATCH 15/20] chore: Address flake8 complaints --- templateflow/client.py | 21 +++++++++------------ templateflow/tests/test_s3.py | 2 -- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/templateflow/client.py b/templateflow/client.py index 0f4b9c3b..7e018aea 100644 --- a/templateflow/client.py +++ b/templateflow/client.py @@ -93,18 +93,17 @@ def ls(self, template, **kwargs) -> list[Path]: >>> client = TemplateFlowClient() - >>> client.ls('MNI152Lin', resolution=1, suffix='T1w', desc=None) # doctest: +ELLIPSIS + >>> client.ls('MNI152Lin', resolution=1, suffix='T1w', desc=None) [PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz')] - >>> client.ls('MNI152Lin', resolution=2, suffix='T1w', desc=None) # doctest: +ELLIPSIS + >>> client.ls('MNI152Lin', resolution=2, suffix='T1w', desc=None) [PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz')] - >>> client.ls('MNI152Lin', suffix='T1w', desc=None) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + >>> client.ls('MNI152Lin', suffix='T1w', desc=None) [PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz'), PosixPath('.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz')] - >>> client.ls('fsLR', space=None, hemi='L', - ... density='32k', suffix='sphere') # doctest: +ELLIPSIS + >>> client.ls('fsLR', space=None, hemi='L', density='32k', suffix='sphere') [PosixPath('.../tpl-fsLR_hemi-L_den-32k_sphere.surf.gii')] >>> client.ls('fsLR', space='madeup') @@ -159,25 +158,23 @@ def get(self, template, raise_empty=False, **kwargs) -> list[Path]: >>> client = TemplateFlowClient() - >>> str(client.get('MNI152Lin', resolution=1, suffix='T1w', desc=None)) # doctest: +ELLIPSIS + >>> str(client.get('MNI152Lin', resolution=1, suffix='T1w', desc=None)) '.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz' - >>> str(client.get('MNI152Lin', resolution=2, suffix='T1w', desc=None)) # doctest: +ELLIPSIS + >>> str(client.get('MNI152Lin', resolution=2, suffix='T1w', desc=None)) '.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz' - >>> [str(p) for p in client.get( - ... 'MNI152Lin', suffix='T1w', desc=None)] # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + >>> [str(p) for p in client.get('MNI152Lin', suffix='T1w', desc=None)] ['.../tpl-MNI152Lin/tpl-MNI152Lin_res-01_T1w.nii.gz', '.../tpl-MNI152Lin/tpl-MNI152Lin_res-02_T1w.nii.gz'] - >>> str(client.get('fsLR', space=None, hemi='L', - ... density='32k', suffix='sphere')) # doctest: +ELLIPSIS + >>> str(client.get('fsLR', space=None, hemi='L', density='32k', suffix='sphere')) '.../tpl-fsLR_hemi-L_den-32k_sphere.surf.gii' >>> client.get('fsLR', space='madeup') [] - >>> client.get('fsLR', raise_empty=True, space='madeup') # doctest: +IGNORE_EXCEPTION_DETAIL + >>> client.get('fsLR', raise_empty=True, space='madeup') Traceback (most recent call last): Exception: ... diff --git a/templateflow/tests/test_s3.py b/templateflow/tests/test_s3.py index a6ae43dc..7b0288ed 100644 --- a/templateflow/tests/test_s3.py +++ b/templateflow/tests/test_s3.py @@ -39,8 +39,6 @@ def test_get_skel_file(tmp_path, monkeypatch): """Exercise the skeleton file generation.""" - home = (tmp_path / 's3-skel-file').resolve() - md5content = b'anything' def mock_get(*args, **kwargs): From a60d9268bf332a9dd0e9aab697b4f05e171133e7 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Sun, 19 Oct 2025 12:42:19 -0400 Subject: [PATCH 16/20] feat: Add repr to indicate location and type of cache --- templateflow/client.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/templateflow/client.py b/templateflow/client.py index 7e018aea..46f5006d 100644 --- a/templateflow/client.py +++ b/templateflow/client.py @@ -50,6 +50,10 @@ def __init__( ) self.cache = cache + def __repr__(self) -> str: + cache_type = 'DataLad' if self.cache.config.use_datalad else 'S3' + return f'<{self.__class__.__name__}[{cache_type}] cache="{self.cache.config.root}">' + def __getattr__(self, name: str): name = name.replace('ls_', 'get_') try: From 1924477397ab6b66130fcfe33e6490da7e1269f3 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Sun, 19 Oct 2025 13:02:53 -0400 Subject: [PATCH 17/20] doc: Add docstring for TemplateFlowClient --- templateflow/client.py | 43 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/templateflow/client.py b/templateflow/client.py index 46f5006d..71dded4d 100644 --- a/templateflow/client.py +++ b/templateflow/client.py @@ -33,6 +33,49 @@ class TemplateFlowClient: + """TemplateFlow client for querying and retrieving template files. + + If instantiated without arguments, uses the default cache, which is + located at a platform-dependent location (e.g., ``$HOME/.cache/templateflow`` on + most Unix-like systems), or at the location specified by the ``TEMPLATEFLOW_HOME`` + environment variable: + + >>> client = TemplateFlowClient() + >>> client + + + To select a custom cache location, provide the ``root`` argument: + + >>> client = TemplateFlowClient(root='/path/to/templateflow_cache') + + Additional configuration options can be provided as keyword arguments. + + Parameters + ---------- + root: :class:`os.PathLike` or :class:`str`, optional + Path to the root of the TemplateFlow cache (will be created if it does not exist). + + Keyword Arguments + ----------------- + use_datalad: :class:`bool`, optional + Whether to use DataLad for managing the cache. Defaults to ``False`` or + the value of the ``TEMPLATEFLOW_USE_DATALAD`` environment variable + (1/True/on/yes to enable, 0/False/off/no to disable). + autoupdate: :class:`bool`, optional + Whether to automatically update the cache on first load. + Defaults to ``True`` or the value of the ``TEMPLATEFLOW_AUTOUPDATE`` + environment variable (1/True/on/yes to enable, 0/False/off/no to disable). + timeout: :class:`float`, optional + Timeout in seconds for network operations. Default is ``10.0`` seconds. + origin: :class:`str`, optional + Git repository URL for DataLad installations. Default is + . + s3_root: :class:`str`, optional + Base URL for S3 downloads. Default is . + cache: :class:`TemplateFlowCache`, optional + A pre-configured TemplateFlowCache instance. If provided, `root` and other + configuration keyword arguments cannot be used. + """ def __init__( self, root: os.PathLike[str] | str | None = None, From e7c9aae888c16f69d8617eeb602c104b43597db0 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Mon, 20 Oct 2025 12:58:21 -0400 Subject: [PATCH 18/20] fix: Coerce root to Path on init --- templateflow/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templateflow/client.py b/templateflow/client.py index 71dded4d..b10fcf08 100644 --- a/templateflow/client.py +++ b/templateflow/client.py @@ -85,7 +85,7 @@ def __init__( ): if cache is None: if root: - config_kwargs['root'] = root + config_kwargs['root'] = Path(root) cache = TemplateFlowCache(CacheConfig(**config_kwargs)) elif root or config_kwargs: raise ValueError( From 050fbf6c7ae1513c335e4a0c9d58982b94311e87 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Mon, 20 Oct 2025 17:08:02 -0400 Subject: [PATCH 19/20] Apply suggestions from code review Co-authored-by: Oscar Esteban --- templateflow/client.py | 2 +- templateflow/conf/__init__.py | 2 +- templateflow/conf/cache.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/templateflow/client.py b/templateflow/client.py index b10fcf08..365e90e6 100644 --- a/templateflow/client.py +++ b/templateflow/client.py @@ -29,7 +29,7 @@ from json import loads from pathlib import Path -from .conf.cache import CacheConfig, TemplateFlowCache +from templateflow.conf.cache import CacheConfig, TemplateFlowCache class TemplateFlowClient: diff --git a/templateflow/conf/__init__.py b/templateflow/conf/__init__.py index 14ff35e8..ab7c24f2 100644 --- a/templateflow/conf/__init__.py +++ b/templateflow/conf/__init__.py @@ -5,7 +5,7 @@ from acres import Loader -from .cache import CacheConfig, TemplateFlowCache +from templateflow.conf.cache import CacheConfig, TemplateFlowCache load_data = Loader(__spec__.name) diff --git a/templateflow/conf/cache.py b/templateflow/conf/cache.py index 238405c3..56cd1a52 100644 --- a/templateflow/conf/cache.py +++ b/templateflow/conf/cache.py @@ -5,7 +5,7 @@ from pathlib import Path from warnings import warn -from .env import env_to_bool, get_templateflow_home +from templateflow.conf.env import env_to_bool, get_templateflow_home TYPE_CHECKING = False if TYPE_CHECKING: From 050132de2444058f57dc305bdc3a4f54ce03bc5c Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Mon, 20 Oct 2025 17:09:17 -0400 Subject: [PATCH 20/20] Add banner to conf.cache, use current year --- templateflow/client.py | 2 +- templateflow/conf/cache.py | 22 ++++++++++++++++++++++ templateflow/conf/env.py | 22 ++++++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/templateflow/client.py b/templateflow/client.py index 365e90e6..e0fc33ee 100644 --- a/templateflow/client.py +++ b/templateflow/client.py @@ -1,7 +1,7 @@ # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- # vi: set ft=python sts=4 ts=4 sw=4 et: # -# Copyright 2024 The NiPreps Developers +# Copyright 2025 The NiPreps Developers # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/templateflow/conf/cache.py b/templateflow/conf/cache.py index 56cd1a52..62e6ea1b 100644 --- a/templateflow/conf/cache.py +++ b/templateflow/conf/cache.py @@ -1,3 +1,25 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +# +# Copyright 2025 The NiPreps Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# We support and encourage derived works from this project, please read +# about our expectations at +# +# https://www.nipreps.org/community/licensing/ +# from __future__ import annotations from dataclasses import dataclass, field diff --git a/templateflow/conf/env.py b/templateflow/conf/env.py index d4bd6155..3623e56d 100644 --- a/templateflow/conf/env.py +++ b/templateflow/conf/env.py @@ -1,3 +1,25 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +# +# Copyright 2025 The NiPreps Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# We support and encourage derived works from this project, please read +# about our expectations at +# +# https://www.nipreps.org/community/licensing/ +# import os from functools import partial from pathlib import Path