diff --git a/.circleci/auto-cache-timestamp b/.circleci/auto-cache-timestamp deleted file mode 100644 index 50b266a34f..0000000000 --- a/.circleci/auto-cache-timestamp +++ /dev/null @@ -1 +0,0 @@ -2019-04-19 15:05:58.522213 \ No newline at end of file diff --git a/.circleci/clean-cache.py b/.circleci/clean-cache.py index 0b05f6f4d1..297df18b0a 100755 --- a/.circleci/clean-cache.py +++ b/.circleci/clean-cache.py @@ -7,7 +7,7 @@ def update_cache_timestamp(timestamp_filename): - """ Updates the contents of the manual-cache-timestamp file + """ Updates the contents of the docs-cache-timestamp file with current timestamp. Returns @@ -22,5 +22,5 @@ def update_cache_timestamp(timestamp_filename): if __name__ == '__main__': - update_cache_timestamp('manual-cache-timestamp') - update_cache_timestamp('auto-cache-timestamp') + update_cache_timestamp('docs-cache-timestamp') + update_cache_timestamp('packages-cache-timestamp') diff --git a/.circleci/config.yml b/.circleci/config.yml index 9ec84ad53f..094db28481 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,42 +3,46 @@ # It doesn't operate on master branch. New branches are always built from scratch. # full-build always rebuilds from scratch, without any cache. Only for changes in master branch. -version: 2 +version: 2.1 -jobs: - quick-build: - docker: - - image: circleci/python:3.6 - environment: - DISTRIB: "conda" - PYTHON_VERSION: "3.6" - NUMPY_VERSION: "*" - SCIPY_VERSION: "*" - SCIKIT_LEARN_VERSION: "*" - JOBLIB_VERSION: "*" - MATPLOTLIB_VERSION: "*" +commands: + preinstall: + description: "Cleans up unused packages; Updates system packages" + steps: + - run: + name: Remove conflicting packages + command: | + # Get rid of existing virtualenvs on circle ci as they conflict with conda. + # Trick found here: + # https://discuss.circleci.com/t/disable-autodetection-of-project-or-application-of-python-venv/235/10 + cd && rm -rf ~/.pyenv && rm -rf ~/virtualenvs + + # We need to remove conflicting texlive packages. + sudo -E apt-get -yq remove texlive-binaries --purge + - run: + name: Install packages for make -C doc check + command: | + # Installing required packages for `make -C doc check command` to work. + sudo -E apt-get -yq update + sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra + + restore_from_cache: + description: "Restores the cache of previously built docs & python packages if present" steps: - - checkout - # Get rid of existing virtualenvs on circle ci as they conflict with conda. - # Trick found here: - # https://discuss.circleci.com/t/disable-autodetection-of-project-or-application-of-python-venv/235/10 - - run: cd && rm -rf ~/.pyenv && rm -rf ~/virtualenvs - # We need to remove conflicting texlive packages. - - run: sudo -E apt-get -yq remove texlive-binaries --purge - # Installing required packages for `make -C doc check command` to work. - - run: sudo -E apt-get -yq update - - run: sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra - run: - name: Today & Week # Saving today's date and current week of the year in files to generate daily & weekly new cache key respectively. + name: Generate cache keys from today's date for built docs & week number for python packages command: | date +%F > today date +%U > week_num - restore_cache: - key: v1-packages+datasets-{{ checksum "week_num" }} + key: v1-packages+datasets-{{ checksum "week_num" }}-{{ checksum ".circleci/packages-cache-timestamp" }} - restore_cache: - key: v1-docs-{{ .Branch }}-{{ checksum "today" }}-{{ checksum ".circleci/manual-cache-timestamp" }} + key: v1-docs-{{ .Branch }}-{{ checksum "today" }}-{{ checksum ".circleci/docs-cache-timestamp" }} + cache_aware_conda_setup: + description: "Downloads & installs conda if not restord by cache" + steps: - run: name: Download & install conda if absent command: | @@ -64,91 +68,109 @@ jobs: else conda create -n testenv -yq fi + + cache_ignorant_conda_setup: + description: "Downloads & installs only the fresh copy of conda." + steps: + - run: + name: setup conda afresh + command: | + wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh + chmod +x ~/miniconda.sh && ~/miniconda.sh -b + echo 'export PATH="$HOME/miniconda3/bin:$PATH"' >> $BASH_ENV + - run: conda create -n testenv + + install_dependencies: + description: "Installs the necessary Python packages" + steps: - run: name: Install packages in conda env command: | - conda install -n testenv python=3.6 numpy scipy scikit-learn matplotlib pandas \ + conda install -n testenv python=3.7 numpy scipy scikit-learn matplotlib pandas \ lxml mkl sphinx numpydoc pillow pandas -yq conda install -n testenv nibabel sphinx-gallery junit-xml -c conda-forge -yq + + build_docs: + description: "Installs Nilearn & builds documentation using Sphinx's make html-strict" + steps: - run: - name: Running CircleCI test (make html) + name: Building documentation command: | source activate testenv pip install -e . set -o pipefail && cd doc && make html-strict 2>&1 | tee log.txt no_output_timeout: 7h + + store_results: + description: "Stores build times and artifacts" + steps: - store_test_results: path: doc/_build/test-results - store_artifacts: path: doc/_build/test-results + - store_artifacts: + path: doc/_build/html + - store_artifacts: + path: coverage + - store_artifacts: + path: doc/log.txt + save_to_cache: + description: "Caches the downloaded packages & buit docs." + steps: - save_cache: key: v1-packages+datasets-{{ checksum "week_num" }} paths: - ../nilearn_data - ../miniconda3 - save_cache: - key: v1-docs-{{ .Branch }}-{{ checksum "today" }}-{{ checksum ".circleci/manual-cache-timestamp" }} + key: v1-docs-{{ .Branch }}-{{ checksum "today" }}-{{ checksum ".circleci/docs-cache-timestamp" }} paths: - doc - - store_artifacts: - path: doc/_build/html - - store_artifacts: - path: coverage - - store_artifacts: - path: doc/log.txt +jobs: - full-build: + quick-build: docker: - - image: circleci/python:3.6 + - image: circleci/python:3.7 environment: DISTRIB: "conda" - PYTHON_VERSION: "3.6" + PYTHON_VERSION: "3.7" NUMPY_VERSION: "*" SCIPY_VERSION: "*" SCIKIT_LEARN_VERSION: "*" + JOBLIB_VERSION: "*" MATPLOTLIB_VERSION: "*" steps: - checkout - # Get rid of existing virtualenvs on circle ci as they conflict with conda. - # Trick found here: - # https://discuss.circleci.com/t/disable-autodetection-of-project-or-application-of-python-venv/235/10 - - run: cd && rm -rf ~/.pyenv && rm -rf ~/virtualenvs - # We need to remove conflicting texlive packages. - - run: sudo -E apt-get -yq remove texlive-binaries --purge - # Installing required packages for `make -C doc check command` to work. - - run: sudo -E apt-get -yq update - - run: sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra - - run: wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh - - run: chmod +x ~/miniconda.sh && ~/miniconda.sh -b - - run: echo 'export PATH="$HOME/miniconda3/bin:$PATH"' >> $BASH_ENV - - run: - name: Install packages in conda env - command: | - conda create -n testenv python=3.6 numpy scipy scikit-learn matplotlib pandas \ - lxml mkl sphinx numpydoc pillow pandas -yq - conda install -n testenv nibabel sphinx-gallery junit-xml -c conda-forge -yq - - run: - name: Running CircleCI test (make html) - command: | - source activate testenv - pip install -e . - set -o pipefail && cd doc && make html-strict 2>&1 | tee log.txt - no_output_timeout: 7h - - store_test_results: - path: doc/_build/test-results - - store_artifacts: - path: doc/_build/test-results + - preinstall + - restore_from_cache + - cache_aware_conda_setup + - install_dependencies + - build_docs + - store_results + - save_to_cache - - store_artifacts: - path: doc/_build/html - - store_artifacts: - path: coverage - - store_artifacts: - path: doc/log.txt + full-build: + docker: + - image: circleci/python:3.7 + environment: + DISTRIB: "conda" + PYTHON_VERSION: "3.7" + NUMPY_VERSION: "*" + SCIPY_VERSION: "*" + SCIKIT_LEARN_VERSION: "*" + MATPLOTLIB_VERSION: "*" + + steps: + - checkout + - preinstall + - cache_ignorant_conda_setup + - install_dependencies + - build_docs + - store_results workflows: diff --git a/.circleci/docs-cache-timestamp b/.circleci/docs-cache-timestamp new file mode 100644 index 0000000000..972236cedd --- /dev/null +++ b/.circleci/docs-cache-timestamp @@ -0,0 +1 @@ +2019-11-07 13:17:41.900352 \ No newline at end of file diff --git a/.circleci/manual-cache-timestamp b/.circleci/manual-cache-timestamp deleted file mode 100644 index e3790b2eeb..0000000000 --- a/.circleci/manual-cache-timestamp +++ /dev/null @@ -1 +0,0 @@ -2019-04-19 15:05:58.522064 \ No newline at end of file diff --git a/.circleci/packages-cache-timestamp b/.circleci/packages-cache-timestamp new file mode 100644 index 0000000000..ed02d172e6 --- /dev/null +++ b/.circleci/packages-cache-timestamp @@ -0,0 +1 @@ +2019-11-07 13:17:41.900617 \ No newline at end of file diff --git a/doc/Makefile b/doc/Makefile index f5e4288c71..f62e78c9c6 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -2,7 +2,7 @@ # # You can set these variables from the command line. -SPHINXOPTS = +SPHINXOPTS = -v SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build diff --git a/nilearn/datasets/func.py b/nilearn/datasets/func.py index 390b149c98..d3e0307130 100644 --- a/nilearn/datasets/func.py +++ b/nilearn/datasets/func.py @@ -1910,7 +1910,8 @@ def _fetch_development_fmri_participants(data_dir, url, verbose): return participants -def _fetch_development_fmri_functional(participants, data_dir, url, verbose): +def _fetch_development_fmri_functional(participants, data_dir, url, resume, + verbose): """Helper function to fetch_development_fmri. This function helps in downloading functional MRI data in Nifti @@ -1932,6 +1933,9 @@ def _fetch_development_fmri_functional(participants, data_dir, url, verbose): Override download URL. Used for test only (or if you setup a mirror of the data). Default: None + resume: bool, optional (default True) + Whether to resume download of a partly-downloaded file. + verbose: int Defines the level of verbosity of the output. @@ -1981,13 +1985,15 @@ def _fetch_development_fmri_functional(participants, data_dir, url, verbose): func_url = url.format(this_osf_id['key_b'][0]) func_file = [(func.format(participant_id, participant_id), func_url, {'move': func.format(participant_id)})] - path_to_func = _fetch_files(data_dir, func_file, verbose=verbose)[0] + path_to_func = _fetch_files(data_dir, func_file, resume=resume, + verbose=verbose)[0] funcs.append(path_to_func) return funcs, regressors def fetch_development_fmri(n_subjects=None, reduce_confounds=True, - data_dir=None, resume=True, verbose=1): + data_dir=None, resume=True, verbose=1, + age_group='both'): """Fetch movie watching based brain development dataset (fMRI) The data is downsampled to 4mm resolution for convenience. The origin of @@ -2019,6 +2025,12 @@ def fetch_development_fmri(n_subjects=None, reduce_confounds=True, verbose: int, optional (default 1) Defines the level of verbosity of the output. + age_group: str, optional (default 'both') + Which age group to fetch + - 'adults' = fetch adults only (n=33, ages 18-39) + - 'child' = fetch children only (n=122, ages 3-12) + - 'both' = fetch full sample (n=155) + Returns ------- data: Bunch @@ -2044,6 +2056,10 @@ def fetch_development_fmri(n_subjects=None, reduce_confounds=True, Preprocessing details: https://osf.io/wjtyq/ + Note that if n_subjects > 2, and age_group is 'both', + fetcher will return a ratio of children and adults representative + of the total sample. + References ---------- Please cite this paper if you are using this dataset. @@ -2070,52 +2086,104 @@ def fetch_development_fmri(n_subjects=None, reduce_confounds=True, url=None, verbose=verbose) - max_subjects = len(participants) - if n_subjects is None: - n_subjects = max_subjects - - if (isinstance(n_subjects, numbers.Number) and - ((n_subjects > max_subjects) or (n_subjects < 1))): - warnings.warn("Wrong value for n_subjects={0}. The maximum " - "value will be used instead n_subjects={1}" - .format(n_subjects, max_subjects)) - n_subjects = max_subjects + adult_count, child_count = _filter_func_regressors_by_participants( + participants, age_group) # noqa: E126 + max_subjects = adult_count + child_count - # Download functional and regressors based on participants - child_count = participants['Child_Adult'].tolist().count('child') - adult_count = participants['Child_Adult'].tolist().count('adult') + n_subjects = _set_invalid_n_subjects_to_max(n_subjects, + max_subjects, + age_group) # To keep the proportion of children versus adults - n_child = np.round(float(n_subjects) / max_subjects * child_count).astype(int) - n_adult = np.round(float(n_subjects) / max_subjects * adult_count).astype(int) + percent_total = float(n_subjects) / max_subjects + n_child = np.round(percent_total * child_count).astype(int) + n_adult = np.round(percent_total * adult_count).astype(int) - # First, restrict the csv files to the adequate number of subjects - child_ids = participants[participants['Child_Adult'] == - 'child']['participant_id'][:n_child] - adult_ids = participants[participants['Child_Adult'] == - 'adult']['participant_id'][:n_adult] - ids = np.hstack([child_ids, adult_ids]) - participants = participants[np.in1d(participants['participant_id'], - ids)] + # We want to return adults by default (i.e., `age_group=both`) or + # if explicitly requested. + if (age_group != 'child') and (n_subjects == 1): + n_adult, n_child = 1, 0 + + if (age_group == 'both') and (n_subjects == 2): + n_adult, n_child = 1, 1 + + participants = _filter_csv_by_n_subjects(participants, n_adult, n_child) funcs, regressors = _fetch_development_fmri_functional(participants, data_dir=data_dir, url=None, + resume=resume, verbose=verbose) if reduce_confounds: - reduced_regressors = [] - for in_file in regressors: - out_file = in_file.replace('desc-confounds', - 'desc-reducedConfounds') - if not os.path.isfile(out_file): - confounds = np.recfromcsv(in_file, delimiter='\t') - selected_confounds = confounds[keep_confounds] - header = '\t'.join(selected_confounds.dtype.names) - np.savetxt(out_file, np.array(selected_confounds.tolist()), - header=header, delimiter='\t', comments='') - reduced_regressors.append(out_file) - regressors = reduced_regressors - + regressors = _reduce_confounds(regressors, keep_confounds) return Bunch(func=funcs, confounds=regressors, phenotypic=participants, description=fdescr) + + +def _filter_func_regressors_by_participants(participants, age_group): + """ Filter functional and regressors based on participants + """ + valid_age_groups = ('both', 'child', 'adult') + if age_group not in valid_age_groups: + raise ValueError("Wrong value for age_group={0}. " + "Valid arguments are: {1}".format(age_group, + valid_age_groups) + ) + + child_adult = participants['Child_Adult'].tolist() + + if age_group != 'adult': + child_count = child_adult.count('child') + else: + child_count = 0 + + if age_group != 'child': + adult_count = child_adult.count('adult') + else: + adult_count = 0 + return adult_count, child_count + + +def _filter_csv_by_n_subjects(participants, n_adult, n_child): + """Restrict the csv files to the adequate number of subjects + """ + child_ids = participants[participants['Child_Adult'] == + 'child']['participant_id'][:n_child] + adult_ids = participants[participants['Child_Adult'] == + 'adult']['participant_id'][:n_adult] + ids = np.hstack([adult_ids, child_ids]) + participants = participants[np.in1d(participants['participant_id'], ids)] + participants = participants[np.argsort(participants, order='Child_Adult')] + return participants + + +def _set_invalid_n_subjects_to_max(n_subjects, max_subjects, age_group): + """ If n_subjects is invalid, sets it to max. + """ + if n_subjects is None: + n_subjects = max_subjects + + if (isinstance(n_subjects, numbers.Number) and + ((n_subjects > max_subjects) or (n_subjects < 1))): + warnings.warn("Wrong value for n_subjects={0}. The maximum " + "value (for age_group={1}) will be used instead: " + "n_subjects={2}" + .format(n_subjects, age_group, max_subjects)) + n_subjects = max_subjects + return n_subjects + + +def _reduce_confounds(regressors, keep_confounds): + reduced_regressors = [] + for in_file in regressors: + out_file = in_file.replace('desc-confounds', + 'desc-reducedConfounds') + if not os.path.isfile(out_file): + confounds = np.recfromcsv(in_file, delimiter='\t') + selected_confounds = confounds[keep_confounds] + header = '\t'.join(selected_confounds.dtype.names) + np.savetxt(out_file, np.array(selected_confounds.tolist()), + header=header, delimiter='\t', comments='') + reduced_regressors.append(out_file) + return reduced_regressors diff --git a/nilearn/datasets/tests/test_func.py b/nilearn/datasets/tests/test_func.py index 8f7de18ea4..d9ad7e19e5 100644 --- a/nilearn/datasets/tests/test_func.py +++ b/nilearn/datasets/tests/test_func.py @@ -6,10 +6,13 @@ import os import uuid + import numpy as np import json import nibabel import gzip + +import pytest from sklearn.utils import check_random_state from nose import with_setup @@ -609,6 +612,7 @@ def test_fetch_development_fmri_functional(): funcs, confounds = func._fetch_development_fmri_functional(csv, data_dir=tst.tmpdir, url=local_url, + resume=True, verbose=1) assert_equal(len(funcs), 8) assert_equal(len(confounds), 8) @@ -633,3 +637,47 @@ def test_fetch_development_fmri(): verbose=1) confounds = np.recfromcsv(data.confounds[0], delimiter='\t') assert_equal(len(confounds[0]), 28) + + # check first subject is an adult + data = func.fetch_development_fmri(n_subjects=1, reduce_confounds=False, + verbose=1) + age_group = data.phenotypic['Child_Adult'][0] + assert_equal(age_group, 'adult') + + # check first subject is an child if requested with age_group + data = func.fetch_development_fmri(n_subjects=1, reduce_confounds=False, + verbose=1, age_group='child') + age_group = data.phenotypic['Child_Adult'][0] + assert_equal(age_group, 'child') + + # check one of each age group returned if n_subject == 2 + # and age_group == 'both + data = func.fetch_development_fmri(n_subjects=2, reduce_confounds=False, + verbose=1, age_group='both') + age_group = data.phenotypic['Child_Adult'] + assert(all(age_group == ['adult', 'child'])) + + # check age_group + data = func.fetch_development_fmri(n_subjects=2, reduce_confounds=False, + verbose=1, age_group='child') + assert(all([x == 'child' for x in data.phenotypic['Child_Adult']])) + + +@with_setup(tst.setup_tmpdata, tst.teardown_tmpdata) +def test_fetch_development_fmri_invalid_n_subjects(): + max_subjects = 155 + n_subjects = func._set_invalid_n_subjects_to_max(n_subjects=None, + max_subjects=max_subjects, + age_group='adult') + assert n_subjects == max_subjects + with pytest.warns(UserWarning, match='Wrong value for n_subjects='): + func._set_invalid_n_subjects_to_max(n_subjects=-1, + max_subjects=max_subjects, + age_group='adult') + + +@with_setup(tst.setup_tmpdata, tst.teardown_tmpdata) +def test_fetch_development_fmri_exception(): + with pytest.raises(ValueError, match='Wrong value for age_group'): + func._filter_func_regressors_by_participants(participants='junk', + age_group='junk for test') diff --git a/setup.cfg b/setup.cfg index 0a9f856f10..63789ed5bf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,7 +7,8 @@ doc-files = doc # For PEP8 error codes see # http://pep8.readthedocs.org/en/latest/intro.html#error-codes # E402: module level import not at top of file -ignore=E402 +# W504: line break after binary operator +ignore=E402, W504 [tool:pytest] doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS