Merge pull request #62 from bpoldrack/enh-conversion

ENH: conversion
psychoinformatics-de · Aug 30, 2018 · e9fe249 · e9fe249
2 parents ab102ac + 8955eb9
commit e9fe249
Show file tree

Hide file tree

Showing 6 changed files with 99 additions and 18 deletions.
diff --git a/datalad_hirni/commands/create_study.py b/datalad_hirni/commands/create_study.py
@@ -44,6 +44,12 @@ def __call__(
         from datalad.distribution.dataset import Dataset
         from datalad.distribution.install import Install
         from datalad.distribution.siblings import Siblings
+        from datalad.support.exceptions import DeprecatedError
+
+        raise DeprecatedError(new="use 'datalad create' and 'datalad "
+                                  "run-procedure setup_study_dataset' instead "
+                                  "to setup a HIRNI study dataset.",
+                              msg="'hirni-create-study is deprecated.")
 
         import os
 

diff --git a/datalad_hirni/commands/dicom2spec.py b/datalad_hirni/commands/dicom2spec.py
@@ -242,6 +242,26 @@ def __call__(path=None, spec=None, dataset=None, subject=None,
                        logger=lgr)
             return
 
+        # ignore duplicates (prob. reruns of aborted runs)
+        # -> convert highest id only
+        import datalad_hirni.support.hirni_heuristic as heuristic
+        spec_series_list = sorted(spec_series_list,
+                                  key=lambda x: heuristic.get_specval(x, 'id'))
+        for i in range(len(spec_series_list)):
+            if spec_series_list[i]["type"] == "dicomseries" and \
+                    heuristic.has_specval(spec_series_list[i], "converter") and \
+                            heuristic.get_specval(spec_series_list[i], "bids_run") in \
+                            [heuristic.get_specval(s, "bids_run")
+                             for s in spec_series_list[i + 1:]
+                             if heuristic.get_specval(s,
+                                                      "description") == heuristic.get_specval(
+                                    spec_series_list[i], "description") and \
+                                             heuristic.get_specval(s,
+                                                                   "id") > heuristic.get_specval(
+                                             spec_series_list[i], "id")]:
+                lgr.debug("Set converter to None for SeriesNumber %s" % i)
+                spec_series_list[i]["converter"] = dict(approved=True, value=None)
+
         lgr.debug("Storing specification (%s)", spec)
         # store as a stream (one record per file) to be able to
         # easily concat files without having to parse them, or

diff --git a/datalad_hirni/commands/spec2bids.py b/datalad_hirni/commands/spec2bids.py
@@ -25,6 +25,7 @@
 from datalad.utils import assure_list
 from datalad.utils import rmtree
 
+from datalad.coreapi import remove
 from datalad_container import containers_run
 import datalad_hirni.support.hirni_heuristic as heuristic
 import logging
@@ -166,8 +167,8 @@ def __call__(specfile, dataset=None, anonymize=False):
 
                         for r in dataset.containers_run(
                                 ['heudiconv',
-                                 # XXX absolute path will make rerun on other system
-                                 # impossible -- hard to avoid
+                                 # XXX absolute path will make rerun on other
+                                 # system impossible -- hard to avoid
                                  '-f', heuristic.__file__,
                                  # leaves identifying info in run record
                                  '-s', replacements['bids_subject'],
@@ -191,7 +192,7 @@ def __call__(specfile, dataset=None, anonymize=False):
                                         "conversion"),
                                 inputs=[replacements['location'], rel_spec_path],
                                 outputs=[dataset.path],
-                                message="Convert DICOM data for subject {}"
+                                message="[HIRNI] Convert DICOM data for subject {}"
                                         "".format(replacements['bids_subject']),
                                 return_type='generator',
                         ):
@@ -220,6 +221,13 @@ def __call__(specfile, dataset=None, anonymize=False):
 
                     # remove superfluous heudiconv output
                     rmtree(opj(dataset.path, rel_trash_path))
+                    # remove empty *_events.tsv files created by heudiconv
+                    import glob
+                    dataset.remove(glob.glob('*/*/*_events.tsv'),
+                                   check=False,
+                                   message="[HIRNI] Remove empty *_event.tsv "
+                                           "files")
+
                     # run heudiconv only once
                     ran_heudiconv = True
 

diff --git a/datalad_hirni/commands/spec4anything.py b/datalad_hirni/commands/spec4anything.py
@@ -269,8 +269,9 @@ def __call__(path, dataset=None, spec_file=None, properties=None,
         from os import linesep
         message = "[HIRNI] Add specification {n_snippets} for: {paths}".format(
                 n_snippets=single_or_plural("snippet", "snippets", len(paths)),
-                paths=linesep.join(" - " + p['path'] for p in paths)
-                if len(paths) > 1 else paths[0]['path'])
+                paths=linesep.join(" - " + op.relpath(p['path'], dataset.path)
+                                   for p in paths)
+                if len(paths) > 1 else op.relpath(paths[0]['path'], dataset.path))
         for r in dataset.add(
                 updated_files,
                 to_git=True,

diff --git a/datalad_hirni/resources/procedures/setup_bids_dataset.py b/datalad_hirni/resources/procedures/setup_bids_dataset.py
@@ -14,6 +14,8 @@
     check_installed=True,
     purpose='BIDS dataset setup')
 
+# TODO: This looks like it was supposed to be a default README but isn't used
+# ATM.
 README_code = """\
 All custom code goes into the directory. All scripts should be written such
 that they can be executed from the root of the dataset, and are only using
@@ -36,18 +38,8 @@
 ds.run_procedure(['cfg_metadatatypes', 'bids', 'nifti'])
 
 # amend gitattributes
-for path in force_in_git:
-    abspath = op.join(ds.path, path)
-    d = op.dirname(abspath)
-    ga_path = op.join(d, '.gitattributes') \
-        if op.exists(d) else op.join(ds.path, '.gitattributes')
-    with open(ga_path, 'a') as gaf:
-        gaf.write('{} annex.largefiles=nothing\n'.format(
-            op.relpath(abspath, start=d) if op.exists(d) else path))
-    to_add.add(ga_path)
+ds.repo.set_gitattributes([(path, {'annex.largefiles': 'nothing'})
+                           for path in force_in_git])
 
 # leave clean
-ds.add(
-    to_add,
-    message="Default BIDS dataset setup",
-)
+ds.add('.gitattributes', message="[HIRNI] Default BIDS dataset setup")
diff --git a/datalad_hirni/resources/procedures/setup_study_dataset.py b/datalad_hirni/resources/procedures/setup_study_dataset.py
@@ -0,0 +1,54 @@
+"""Procedure to apply a sensible default setup to a study dataset
+"""
+
+import sys
+from datalad.distribution.dataset import require_dataset
+
+# bound dataset methods
+import datalad.distribution.add
+import datalad.interface.save
+from datalad.plugin.add_readme import AddReadme
+
+ds = require_dataset(
+    sys.argv[1],
+    check_installed=True,
+    purpose='study dataset setup')
+
+
+force_in_git = [
+    'README',
+    'CHANGES',
+    'dataset_description.json',
+    '**/{}'.format(ds.config.get("datalad.hirni.studyspec.filename",
+                                 "studyspec.json")),
+]
+
+# except for hand-picked global metadata, we want anything
+# to go into the annex to be able to retract files after
+# publication
+ds.repo.set_gitattributes([('**', {'annex.largefiles': 'anything'})])
+ds.repo.set_gitattributes([(p, {'annex.largefiles': 'nothing'})
+                           for p in force_in_git])
+
+
+# TODO:
+# Note: This default is using the DICOM's PatientID as the acquisition ID
+# (directory name in the study dataset). That approach works for values
+# accessible via the DICOM metadata directly. We probably want a way to apply
+# more sophisticated rules, which could be achieved by a String Formatter
+# providing more sophisticated operations like slicing (prob. to be shared with
+# datalad's --output-format logic) or by apply specification rules prior to
+# determining final location of the imported subdataset. The latter might lead
+# to a mess, since import and specification routines would then be quite
+# twisted.
+ds.config.add('datalad.hirni.import.acquisition-format',
+              "{PatientID}", where='dataset')
+
+ds.save(message='[HIRNI] Default study dataset setup')
+
+# Include the most basic README to prevent heudiconv from adding one
+ds.add_readme(filename='README', existing='fail')
+
+
+# TODO: Reconsider using an import container and if so, link it herein. See
+# now-deprecated hirni-create-study command