From 432a0c8cace7bc3bcf225afac7611961a5a035b9 Mon Sep 17 00:00:00 2001 From: Benjamin Poldrack Date: Mon, 27 Aug 2018 09:00:37 +0200 Subject: [PATCH 1/9] Remove *_event.tsv files prefilled by heudiconv with a comment only --- datalad_hirni/commands/spec2bids.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/datalad_hirni/commands/spec2bids.py b/datalad_hirni/commands/spec2bids.py index 553caf3e..581c2137 100644 --- a/datalad_hirni/commands/spec2bids.py +++ b/datalad_hirni/commands/spec2bids.py @@ -25,6 +25,7 @@ from datalad.utils import assure_list from datalad.utils import rmtree +from datalad.coreapi import Remove from datalad_container import containers_run import datalad_hirni.support.hirni_heuristic as heuristic import logging @@ -166,8 +167,8 @@ def __call__(specfile, dataset=None, anonymize=False): for r in dataset.containers_run( ['heudiconv', - # XXX absolute path will make rerun on other system - # impossible -- hard to avoid + # XXX absolute path will make rerun on other + # system impossible -- hard to avoid '-f', heuristic.__file__, # leaves identifying info in run record '-s', replacements['bids_subject'], @@ -191,7 +192,7 @@ def __call__(specfile, dataset=None, anonymize=False): "conversion"), inputs=[replacements['location'], rel_spec_path], outputs=[dataset.path], - message="Convert DICOM data for subject {}" + message="[HIRNI] Convert DICOM data for subject {}" "".format(replacements['bids_subject']), return_type='generator', ): @@ -220,6 +221,14 @@ def __call__(specfile, dataset=None, anonymize=False): # remove superfluous heudiconv output rmtree(opj(dataset.path, rel_trash_path)) + # remove empty *_events.tsv files created by heudiconv + from glob import glob + dataset.remove(glob('**/*_events.tsv'), + recursive=True, + check=False, + message="[HIRNI] Remove empty *_event.tsv " + "files") + # run heudiconv only once ran_heudiconv = True From 63f781a5e2683ae79196c491dec4e7ec3bc15ad5 Mon Sep 17 00:00:00 2001 From: Benjamin Poldrack Date: Mon, 27 Aug 2018 10:58:03 +0200 Subject: [PATCH 2/9] RF: Use GitRepo.set_gitattributes() with 'setup_bids_dataset' procedure --- .../resources/procedures/setup_bids_dataset.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/datalad_hirni/resources/procedures/setup_bids_dataset.py b/datalad_hirni/resources/procedures/setup_bids_dataset.py index dcc7dcc2..dc5818a0 100644 --- a/datalad_hirni/resources/procedures/setup_bids_dataset.py +++ b/datalad_hirni/resources/procedures/setup_bids_dataset.py @@ -14,6 +14,8 @@ check_installed=True, purpose='BIDS dataset setup') +# TODO: This looks like it was supposed to be a default README but isn't used +# ATM. README_code = """\ All custom code goes into the directory. All scripts should be written such that they can be executed from the root of the dataset, and are only using @@ -36,18 +38,8 @@ ds.run_procedure(['cfg_metadatatypes', 'bids', 'nifti']) # amend gitattributes -for path in force_in_git: - abspath = op.join(ds.path, path) - d = op.dirname(abspath) - ga_path = op.join(d, '.gitattributes') \ - if op.exists(d) else op.join(ds.path, '.gitattributes') - with open(ga_path, 'a') as gaf: - gaf.write('{} annex.largefiles=nothing\n'.format( - op.relpath(abspath, start=d) if op.exists(d) else path)) - to_add.add(ga_path) +ds.repo.set_gitattributes([(path, {'annex.largefiles': 'nothing'}) + for path in force_in_git]) # leave clean -ds.add( - to_add, - message="Default BIDS dataset setup", -) +ds.add('.gitattributes', message="[HIRNI] Default BIDS dataset setup") From d8753f54664b266d4b15b0d49519507627649690 Mon Sep 17 00:00:00 2001 From: Benjamin Poldrack Date: Mon, 27 Aug 2018 11:53:20 +0200 Subject: [PATCH 3/9] RF: Deprecate create-study and replace with a respective procedure. (Closes #46) --- datalad_hirni/commands/create_study.py | 6 +++ .../procedures/setup_study_dataset.py | 54 +++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 datalad_hirni/resources/procedures/setup_study_dataset.py diff --git a/datalad_hirni/commands/create_study.py b/datalad_hirni/commands/create_study.py index 4f99991a..7f890b8e 100644 --- a/datalad_hirni/commands/create_study.py +++ b/datalad_hirni/commands/create_study.py @@ -44,6 +44,12 @@ def __call__( from datalad.distribution.dataset import Dataset from datalad.distribution.install import Install from datalad.distribution.siblings import Siblings + from datalad.support.exceptions import DeprecatedError + + raise DeprecatedError(new="use 'datalad create' and 'datalad " + "run-procedure setup_study_dataset' instead " + "to setup a HIRNI study dataset.", + msg="'hirni-create-study is deprecated.") import os diff --git a/datalad_hirni/resources/procedures/setup_study_dataset.py b/datalad_hirni/resources/procedures/setup_study_dataset.py new file mode 100644 index 00000000..0f1fa861 --- /dev/null +++ b/datalad_hirni/resources/procedures/setup_study_dataset.py @@ -0,0 +1,54 @@ +"""Procedure to apply a sensible default setup to a study dataset +""" + +import sys +from datalad.distribution.dataset import require_dataset + +# bound dataset methods +import datalad.distribution.add +import datalad.interface.save +from datalad.plugin.add_readme import AddReadme + +ds = require_dataset( + sys.argv[1], + check_installed=True, + purpose='study dataset setup') + + +force_in_git = [ + 'README', + 'CHANGES', + 'dataset_description.json', + '**/{}'.format(ds.config.get("datalad.hirni.studyspec.filename", + "studyspec.json")), +] + +# except for hand-picked global metadata, we want anything +# to go into the annex to be able to retract files after +# publication +ds.repo.set_gitattributes([('**', {'annex.largefiles': 'anything'})]) +ds.repo.set_gitattributes([(p, {'annex.largefiles': 'nothing'}) + for p in force_in_git]) + + +# TODO: +# Note: This default is using the DICOM's PatientID as the acquisition ID +# (directory name in the study dataset). That approach works for values +# accessible via the DICOM metadata directly. We probably want a way to apply +# more sophisticated rules, which could be achieved by a String Formatter +# providing more sophisticated operations like slicing (prob. to be shared with +# datalad's --output-format logic) or by apply specification rules prior to +# determining final location of the imported subdataset. The latter might lead +# to a mess, since import and specification routines would then be quite +# twisted. +ds.config.add('datalad.hirni.import.acquisition-format', + "{PatientID}", where='dataset') + +ds.save(message='[HIRNI] Default study dataset setup') + +# Include the most basic README to prevent heudiconv from adding one +ds.add_readme(filename='README', existing='fail') + + +# TODO: Reconsider using an import container and if so, link it herein. See +# now-deprecated hirni-create-study command From 026f5784de7de329ec3097a8fa7dfe3ae89f8b88 Mon Sep 17 00:00:00 2001 From: Benjamin Poldrack Date: Mon, 27 Aug 2018 13:28:09 +0200 Subject: [PATCH 4/9] BF: Correct import statement; use glob with recursive --- datalad_hirni/commands/spec2bids.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/datalad_hirni/commands/spec2bids.py b/datalad_hirni/commands/spec2bids.py index 581c2137..59f2f36e 100644 --- a/datalad_hirni/commands/spec2bids.py +++ b/datalad_hirni/commands/spec2bids.py @@ -25,7 +25,7 @@ from datalad.utils import assure_list from datalad.utils import rmtree -from datalad.coreapi import Remove +from datalad.coreapi import remove from datalad_container import containers_run import datalad_hirni.support.hirni_heuristic as heuristic import logging @@ -222,9 +222,8 @@ def __call__(specfile, dataset=None, anonymize=False): # remove superfluous heudiconv output rmtree(opj(dataset.path, rel_trash_path)) # remove empty *_events.tsv files created by heudiconv - from glob import glob - dataset.remove(glob('**/*_events.tsv'), - recursive=True, + import glob + dataset.remove(glob.glob('**/*_events.tsv', recursive=True), check=False, message="[HIRNI] Remove empty *_event.tsv " "files") From 5edf387a7ce00cf833a37482487182e50d7790e9 Mon Sep 17 00:00:00 2001 From: Benjamin Poldrack Date: Mon, 27 Aug 2018 14:21:27 +0200 Subject: [PATCH 5/9] BF: PY2 glob() doesn't know recursive and '**' --- datalad_hirni/commands/spec2bids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datalad_hirni/commands/spec2bids.py b/datalad_hirni/commands/spec2bids.py index 59f2f36e..3bbef53c 100644 --- a/datalad_hirni/commands/spec2bids.py +++ b/datalad_hirni/commands/spec2bids.py @@ -223,7 +223,7 @@ def __call__(specfile, dataset=None, anonymize=False): rmtree(opj(dataset.path, rel_trash_path)) # remove empty *_events.tsv files created by heudiconv import glob - dataset.remove(glob.glob('**/*_events.tsv', recursive=True), + dataset.remove(glob.glob('*/*/*_events.tsv'), check=False, message="[HIRNI] Remove empty *_event.tsv " "files") From 7eda528059da830199540cda3c1da1fb0faea81b Mon Sep 17 00:00:00 2001 From: Benjamin Poldrack Date: Mon, 27 Aug 2018 15:29:33 +0200 Subject: [PATCH 6/9] ENH: dicom2spec now sets 'converter' to None for duplicate runs except the on with highest ID --- datalad_hirni/commands/dicom2spec.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/datalad_hirni/commands/dicom2spec.py b/datalad_hirni/commands/dicom2spec.py index 2b3f625d..c1a9b820 100644 --- a/datalad_hirni/commands/dicom2spec.py +++ b/datalad_hirni/commands/dicom2spec.py @@ -242,6 +242,25 @@ def __call__(path=None, spec=None, dataset=None, subject=None, logger=lgr) return + # ignore duplicates (prob. reruns of aborted runs) + # -> convert highest id only + import datalad_hirni.support.hirni_heuristic as heuristic + spec = sorted(spec_series_list, + key=lambda x: heuristic.get_specval(x, 'id')) + for i in range(len(spec)): + if spec[i]["type"] == "dicomseries" and \ + heuristic.has_specval(spec[i], "converter") and \ + heuristic.get_specval(spec[i], "bids_run") in \ + [heuristic.get_specval(s, "bids_run") + for s in spec[i + 1:] + if heuristic.get_specval(s, + "description") == heuristic.get_specval( + spec[i], "description") and \ + heuristic.get_specval(s, + "id") > heuristic.get_specval( + spec[i], "id")]: + spec[i]["converter"] = dict(approved=True, value=None) + lgr.debug("Storing specification (%s)", spec) # store as a stream (one record per file) to be able to # easily concat files without having to parse them, or From 0b5d39fff6fb5ff73ebd4a2c697ace25040073f3 Mon Sep 17 00:00:00 2001 From: Benjamin Poldrack Date: Wed, 29 Aug 2018 14:20:43 +0200 Subject: [PATCH 7/9] Add debug message when ignoring dicomseries that seem to be repeated --- datalad_hirni/commands/dicom2spec.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datalad_hirni/commands/dicom2spec.py b/datalad_hirni/commands/dicom2spec.py index c1a9b820..ca0bdc9e 100644 --- a/datalad_hirni/commands/dicom2spec.py +++ b/datalad_hirni/commands/dicom2spec.py @@ -259,6 +259,7 @@ def __call__(path=None, spec=None, dataset=None, subject=None, heuristic.get_specval(s, "id") > heuristic.get_specval( spec[i], "id")]: + lgr.debug("Set converter to None for SeriesNumber %s" % i) spec[i]["converter"] = dict(approved=True, value=None) lgr.debug("Storing specification (%s)", spec) From 28ebabfe4dc3e5a2bfe6ff46f6c4d22d5a51090e Mon Sep 17 00:00:00 2001 From: Benjamin Poldrack Date: Wed, 29 Aug 2018 15:05:55 +0200 Subject: [PATCH 8/9] BF: Name conflict --- datalad_hirni/commands/dicom2spec.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/datalad_hirni/commands/dicom2spec.py b/datalad_hirni/commands/dicom2spec.py index ca0bdc9e..41a3df28 100644 --- a/datalad_hirni/commands/dicom2spec.py +++ b/datalad_hirni/commands/dicom2spec.py @@ -245,22 +245,22 @@ def __call__(path=None, spec=None, dataset=None, subject=None, # ignore duplicates (prob. reruns of aborted runs) # -> convert highest id only import datalad_hirni.support.hirni_heuristic as heuristic - spec = sorted(spec_series_list, - key=lambda x: heuristic.get_specval(x, 'id')) - for i in range(len(spec)): - if spec[i]["type"] == "dicomseries" and \ - heuristic.has_specval(spec[i], "converter") and \ - heuristic.get_specval(spec[i], "bids_run") in \ + spec_series_list = sorted(spec_series_list, + key=lambda x: heuristic.get_specval(x, 'id')) + for i in range(len(spec_series_list)): + if spec_series_list[i]["type"] == "dicomseries" and \ + heuristic.has_specval(spec_series_list[i], "converter") and \ + heuristic.get_specval(spec_series_list[i], "bids_run") in \ [heuristic.get_specval(s, "bids_run") - for s in spec[i + 1:] + for s in spec_series_list[i + 1:] if heuristic.get_specval(s, "description") == heuristic.get_specval( - spec[i], "description") and \ + spec_series_list[i], "description") and \ heuristic.get_specval(s, "id") > heuristic.get_specval( - spec[i], "id")]: + spec_series_list[i], "id")]: lgr.debug("Set converter to None for SeriesNumber %s" % i) - spec[i]["converter"] = dict(approved=True, value=None) + spec_series_list[i]["converter"] = dict(approved=True, value=None) lgr.debug("Storing specification (%s)", spec) # store as a stream (one record per file) to be able to From 8955eb990dcdad077605dcf9d4b8ebc38b55be64 Mon Sep 17 00:00:00 2001 From: Benjamin Poldrack Date: Thu, 30 Aug 2018 14:03:44 +0200 Subject: [PATCH 9/9] BF: Don't use absolute paths in commit message of spec4anything --- datalad_hirni/commands/spec4anything.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datalad_hirni/commands/spec4anything.py b/datalad_hirni/commands/spec4anything.py index df3220c7..2f10e5f6 100644 --- a/datalad_hirni/commands/spec4anything.py +++ b/datalad_hirni/commands/spec4anything.py @@ -269,8 +269,9 @@ def __call__(path, dataset=None, spec_file=None, properties=None, from os import linesep message = "[HIRNI] Add specification {n_snippets} for: {paths}".format( n_snippets=single_or_plural("snippet", "snippets", len(paths)), - paths=linesep.join(" - " + p['path'] for p in paths) - if len(paths) > 1 else paths[0]['path']) + paths=linesep.join(" - " + op.relpath(p['path'], dataset.path) + for p in paths) + if len(paths) > 1 else op.relpath(paths[0]['path'], dataset.path)) for r in dataset.add( updated_files, to_git=True,