diff --git a/MANIFEST.in b/MANIFEST.in index cb31a0e4..69533d73 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,7 @@ # data include templateflow/conf/config.json include templateflow/conf/templateflow-skel.zip +include templateflow/conf/templateflow-skel.md5 # misc include requirements.txt diff --git a/setup.cfg b/setup.cfg index d1df7d1e..b58c5f02 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,11 +31,6 @@ test_requires = packages = find: include_package_data = True -[options.package_data] -templateflow = - conf/config.json - conf/templateflow-skel.zip - [options.packages.find] exclude = *.tests diff --git a/setup.py b/setup.py index bb5b9966..e0a0512d 100644 --- a/setup.py +++ b/setup.py @@ -19,8 +19,8 @@ def make_cmdclass(basecmd): base_run = basecmd.run def new_run(self): - from templateflow.conf import update_home - update_home() + from templateflow.conf import setup_home + setup_home() base_run(self) basecmd.run = new_run diff --git a/templateflow/api.py b/templateflow/api.py index 964278e9..1a8898a9 100644 --- a/templateflow/api.py +++ b/templateflow/api.py @@ -1,6 +1,4 @@ -""" -TemplateFlow's Python Client -""" +"""TemplateFlow's Python Client.""" from json import loads from pathlib import Path import re diff --git a/templateflow/conf/__init__.py b/templateflow/conf/__init__.py index 0c3aa4ff..e6514d75 100644 --- a/templateflow/conf/__init__.py +++ b/templateflow/conf/__init__.py @@ -2,7 +2,6 @@ from os import getenv from warnings import warn from pathlib import Path -from pkg_resources import resource_filename TF_DEFAULT_HOME = Path.home() / '.cache' / 'templateflow' TF_HOME = Path(getenv('TEMPLATEFLOW_HOME', str(TF_DEFAULT_HOME))) @@ -29,48 +28,37 @@ install(path=str(TF_HOME), source=TF_GITHUB_SOURCE, recursive=True) if not TF_USE_DATALAD: - from zipfile import ZipFile - TF_HOME.mkdir(exist_ok=True, parents=True) - with ZipFile(resource_filename('templateflow', - 'conf/templateflow-skel.zip'), 'r') as zipref: - zipref.extractall(str(TF_HOME)) + from ._s3 import update as _update_s3 + _update_s3(TF_HOME, local=True, overwrite=True) -def update_home(force=False): +def update(local=False, overwrite=True): """Update an existing DataLad or S3 home.""" + if TF_USE_DATALAD and _update_datalad(): + return True + + from ._s3 import update as _update_s3 + return _update_s3(TF_HOME, local=local, overwrite=overwrite) + + +def setup_home(force=False): + """Initialize/update TF's home if necessary.""" if not force and not TF_CACHED: - print("""\ -TemplateFlow was not cached (TEMPLATEFLOW_HOME=%s), \ -a fresh initialization was done.""" % TF_HOME) + print(f"""\ +TemplateFlow was not cached (TEMPLATEFLOW_HOME={TF_HOME}), \ +a fresh initialization was done.""") return False + return update(local=True, overwrite=False) - if TF_USE_DATALAD: - from datalad.api import update - print("Updating TemplateFlow's HOME using DataLad ...") - try: - update(str(TF_HOME), recursive=True, merge=True) - except Exception as e: - warn("""Error updating TemplateFlow's home directory (using DataLad): -%s""" % str(e)) - return True - - # This is an S3 type of installation - from zipfile import ZipFile - with ZipFile(resource_filename('templateflow', - 'conf/templateflow-skel.zip'), 'r') as zipref: - allfiles = zipref.namelist() - current_files = [s.relative_to(TF_HOME) for s in TF_HOME.glob('**/*')] - existing = sorted(set(['%s/' % s.parent for s in current_files])) + \ - [str(s) for s in current_files] - newfiles = sorted(set(allfiles) - set(existing)) - if newfiles: - print("Updating TemplateFlow's HOME using S3. " - "Adding: \n%s" % "\n".join(newfiles)) - zipref.extractall(str(TF_HOME), members=newfiles) - return True - print("TemplateFlow's HOME directory (S3 type) was up-to-date.") - return False +def _update_datalad(): + from datalad.api import update + print("Updating TEMPLATEFLOW_HOME using DataLad ...") + try: + update(str(TF_HOME), recursive=True, merge=True) + except Exception as e: + warn(f"Error updating TemplateFlow's home directory (using DataLad): {e}") + return True TF_LAYOUT = None @@ -78,6 +66,6 @@ def update_home(force=False): from .bids import Layout TF_LAYOUT = Layout( TF_HOME, validate=False, config='templateflow', - ignore=['.git', '.datalad', '.gitannex', '.gitattributes', 'scripts']) + ignore=['.git', '.datalad', '.gitannex', '.gitattributes', '.github', 'scripts']) except ImportError: pass diff --git a/templateflow/conf/_s3.py b/templateflow/conf/_s3.py new file mode 100644 index 00000000..bf4abb39 --- /dev/null +++ b/templateflow/conf/_s3.py @@ -0,0 +1,70 @@ +"""Tooling to handle S3 downloads.""" +from pathlib import Path +from tempfile import mkstemp +from pkg_resources import resource_filename + +TF_SKEL_URL = ("https://raw.githubusercontent.com/templateflow/python-client/" + "{release}/templateflow/conf/templateflow-skel.{ext}").format +TF_SKEL_PATH = Path(resource_filename('templateflow', 'conf/templateflow-skel.zip')) +TF_SKEL_MD5 = Path(resource_filename( + 'templateflow', 'conf/templateflow-skel.md5')).read_text() + + +def update(dest, local=True, overwrite=True): + """Update an S3-backed TEMPLATEFLOW_HOME repository.""" + skel_file = Path( + (_get_skeleton_file() if not local else None) or TF_SKEL_PATH + ) + + retval = _update_skeleton( + skel_file, + dest, + overwrite=overwrite + ) + if skel_file != TF_SKEL_PATH: + skel_file.unlink() + return retval + + +def _get_skeleton_file(): + import requests + try: + r = requests.get(TF_SKEL_URL(release="master", ext="md5", allow_redirects=True)) + except requests.exceptions.ConnectionError: + return + + if not r.ok: + return + + if r.content.decode().split()[0] != TF_SKEL_MD5: + r = requests.get(TF_SKEL_URL(release="master", ext="zip", allow_redirects=True)) + if r.ok: + from os import close + fh, skel_file = mkstemp(suffix=".zip") + Path(skel_file).write_bytes(r.content) + close(fh) + return skel_file + + +def _update_skeleton(skel_file, dest, overwrite=True): + from zipfile import ZipFile + + dest = Path(dest) + dest.mkdir(exist_ok=True, parents=True) + with ZipFile(skel_file, 'r') as zipref: + if overwrite: + zipref.extractall(str(dest)) + return True + + allfiles = zipref.namelist() + current_files = [s.relative_to(dest) for s in dest.glob('**/*')] + existing = sorted(set(['%s/' % s.parent for s in current_files])) + \ + [str(s) for s in current_files] + newfiles = sorted(set(allfiles) - set(existing)) + if newfiles: + print("Updating TEMPLATEFLOW_HOME using S3. " + "Adding: \n%s" % "\n".join(newfiles)) + zipref.extractall(str(dest), members=newfiles) + return True + print("TEMPLATEFLOW_HOME directory (S3 type) was up-to-date.") + return False diff --git a/templateflow/conf/tests/test_conf.py b/templateflow/conf/tests/test_conf.py index e04e4244..70c174b6 100644 --- a/templateflow/conf/tests/test_conf.py +++ b/templateflow/conf/tests/test_conf.py @@ -24,7 +24,7 @@ def test_conf_init(monkeypatch, tmp_path, capsys, use_datalad): @pytest.mark.parametrize('use_datalad', ['off', 'on']) -def test_update_home(monkeypatch, tmp_path, capsys, use_datalad): +def test_setup_home(monkeypatch, tmp_path, capsys, use_datalad): """Check the correct functioning of the installation hook.""" home = (tmp_path / '-'.join(('tf', 'dl', use_datalad))).resolve() monkeypatch.setenv('TEMPLATEFLOW_USE_DATALAD', use_datalad) @@ -33,54 +33,54 @@ def test_update_home(monkeypatch, tmp_path, capsys, use_datalad): reload(tfc) # First execution, the S3 stub is created (or datalad install) assert tfc.TF_CACHED is False - assert tfc.update_home() is False + assert tfc.setup_home() is False out = capsys.readouterr()[0] assert out.startswith('TemplateFlow was not cached') assert ('TEMPLATEFLOW_HOME=%s' % home) in out assert home.exists() assert len(list(home.iterdir())) > 0 - updated = tfc.update_home(force=True) # Templateflow is now cached + updated = tfc.setup_home(force=True) # Templateflow is now cached out = capsys.readouterr()[0] assert not out.startswith('TemplateFlow was not cached') if use_datalad == 'on': - assert out.startswith("Updating TemplateFlow's HOME using DataLad") + assert out.startswith("Updating TEMPLATEFLOW_HOME using DataLad") assert updated is True elif use_datalad == 'off': # At this point, S3 should be up-to-date assert updated is False - assert out.startswith("TemplateFlow's HOME directory (S3 type) was up-to-date.") + assert out.startswith("TEMPLATEFLOW_HOME directory (S3 type) was up-to-date.") # Let's force an update rmtree(str(home / 'tpl-MNI152NLin2009cAsym')) - updated = tfc.update_home(force=True) + updated = tfc.setup_home(force=True) out = capsys.readouterr()[0] assert updated is True - assert out.startswith("Updating TemplateFlow's HOME using S3.") + assert out.startswith("Updating TEMPLATEFLOW_HOME using S3.") reload(tfc) assert tfc.TF_CACHED is True - updated = tfc.update_home() # Templateflow is now cached + updated = tfc.setup_home() # Templateflow is now cached out = capsys.readouterr()[0] assert not out.startswith('TemplateFlow was not cached') if use_datalad == 'on': - assert out.startswith("Updating TemplateFlow's HOME using DataLad") + assert out.startswith("Updating TEMPLATEFLOW_HOME using DataLad") assert updated is True elif use_datalad == 'off': # At this point, S3 should be up-to-date assert updated is False - assert out.startswith("TemplateFlow's HOME directory (S3 type) was up-to-date.") + assert out.startswith("TEMPLATEFLOW_HOME directory (S3 type) was up-to-date.") # Let's force an update rmtree(str(home / 'tpl-MNI152NLin2009cAsym')) - updated = tfc.update_home() + updated = tfc.setup_home() out = capsys.readouterr()[0] assert updated is True - assert out.startswith("Updating TemplateFlow's HOME using S3.") + assert out.startswith("Updating TEMPLATEFLOW_HOME using S3.") def test_layout(monkeypatch, tmp_path): diff --git a/templateflow/conf/tests/test_s3.py b/templateflow/conf/tests/test_s3.py new file mode 100644 index 00000000..5de26aba --- /dev/null +++ b/templateflow/conf/tests/test_s3.py @@ -0,0 +1,48 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +"""Check S3-type repo tooling.""" +# import pytest +from pathlib import Path +import requests +from .. import _s3 as s3 + + +def test_get_skel_file(monkeypatch): + """Exercise the skeleton file generation.""" + local_md5 = s3.TF_SKEL_MD5 + monkeypatch.setattr(s3, 'TF_SKEL_MD5', "invent") + new_skel = s3._get_skeleton_file() + assert new_skel is not None + assert Path(new_skel).exists() + assert Path(new_skel).stat().st_size > 0 + + latest_md5 = requests.get(s3.TF_SKEL_URL( + release="master", ext="md5", allow_redirects=True)).content.decode().split()[0] + monkeypatch.setattr(s3, 'TF_SKEL_MD5', latest_md5) + assert s3._get_skeleton_file() is None + + monkeypatch.setattr(s3, 'TF_SKEL_MD5', local_md5) + monkeypatch.setattr(s3, 'TF_SKEL_URL', "http://weird/{release}/{ext}".format) + assert s3._get_skeleton_file() is None + + monkeypatch.setattr(s3, 'TF_SKEL_URL', + s3.TF_SKEL_URL(release="{release}", ext="{ext}z").format) + assert s3._get_skeleton_file() is None + + +def test_update_s3(tmp_path, monkeypatch): + """Exercise updating the S3 skeleton.""" + newhome = tmp_path / 'templateflow' + assert s3.update(newhome) + assert not s3.update(newhome, overwrite=False) + for p in (newhome / 'tpl-MNI152NLin6Sym').glob("*.nii.gz"): + p.unlink() + assert s3.update(newhome, overwrite=False) + + # This should cover the remote zip file fetching + monkeypatch.setattr(s3, 'TF_SKEL_MD5', "invent") + assert s3.update(newhome, local=False) + assert not s3.update(newhome, local=False, overwrite=False) + for p in (newhome / 'tpl-MNI152NLin6Sym').glob("*.nii.gz"): + p.unlink() + assert s3.update(newhome, local=False, overwrite=False)