Skip to content
This repository has been archived by the owner on Nov 2, 2021. It is now read-only.

Commit

Permalink
Merge pull request #62 from bpoldrack/enh-conversion
Browse files Browse the repository at this point in the history
ENH: conversion
  • Loading branch information
bpoldrack committed Aug 30, 2018
2 parents ab102ac + 8955eb9 commit e9fe249
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 18 deletions.
6 changes: 6 additions & 0 deletions datalad_hirni/commands/create_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ def __call__(
from datalad.distribution.dataset import Dataset
from datalad.distribution.install import Install
from datalad.distribution.siblings import Siblings
from datalad.support.exceptions import DeprecatedError

raise DeprecatedError(new="use 'datalad create' and 'datalad "
"run-procedure setup_study_dataset' instead "
"to setup a HIRNI study dataset.",
msg="'hirni-create-study is deprecated.")

import os

Expand Down
20 changes: 20 additions & 0 deletions datalad_hirni/commands/dicom2spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,26 @@ def __call__(path=None, spec=None, dataset=None, subject=None,
logger=lgr)
return

# ignore duplicates (prob. reruns of aborted runs)
# -> convert highest id only
import datalad_hirni.support.hirni_heuristic as heuristic
spec_series_list = sorted(spec_series_list,
key=lambda x: heuristic.get_specval(x, 'id'))
for i in range(len(spec_series_list)):
if spec_series_list[i]["type"] == "dicomseries" and \
heuristic.has_specval(spec_series_list[i], "converter") and \
heuristic.get_specval(spec_series_list[i], "bids_run") in \
[heuristic.get_specval(s, "bids_run")
for s in spec_series_list[i + 1:]
if heuristic.get_specval(s,
"description") == heuristic.get_specval(
spec_series_list[i], "description") and \
heuristic.get_specval(s,
"id") > heuristic.get_specval(
spec_series_list[i], "id")]:
lgr.debug("Set converter to None for SeriesNumber %s" % i)
spec_series_list[i]["converter"] = dict(approved=True, value=None)

lgr.debug("Storing specification (%s)", spec)
# store as a stream (one record per file) to be able to
# easily concat files without having to parse them, or
Expand Down
14 changes: 11 additions & 3 deletions datalad_hirni/commands/spec2bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from datalad.utils import assure_list
from datalad.utils import rmtree

from datalad.coreapi import remove
from datalad_container import containers_run
import datalad_hirni.support.hirni_heuristic as heuristic
import logging
Expand Down Expand Up @@ -166,8 +167,8 @@ def __call__(specfile, dataset=None, anonymize=False):

for r in dataset.containers_run(
['heudiconv',
# XXX absolute path will make rerun on other system
# impossible -- hard to avoid
# XXX absolute path will make rerun on other
# system impossible -- hard to avoid
'-f', heuristic.__file__,
# leaves identifying info in run record
'-s', replacements['bids_subject'],
Expand All @@ -191,7 +192,7 @@ def __call__(specfile, dataset=None, anonymize=False):
"conversion"),
inputs=[replacements['location'], rel_spec_path],
outputs=[dataset.path],
message="Convert DICOM data for subject {}"
message="[HIRNI] Convert DICOM data for subject {}"
"".format(replacements['bids_subject']),
return_type='generator',
):
Expand Down Expand Up @@ -220,6 +221,13 @@ def __call__(specfile, dataset=None, anonymize=False):

# remove superfluous heudiconv output
rmtree(opj(dataset.path, rel_trash_path))
# remove empty *_events.tsv files created by heudiconv
import glob
dataset.remove(glob.glob('*/*/*_events.tsv'),
check=False,
message="[HIRNI] Remove empty *_event.tsv "
"files")

# run heudiconv only once
ran_heudiconv = True

Expand Down
5 changes: 3 additions & 2 deletions datalad_hirni/commands/spec4anything.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,9 @@ def __call__(path, dataset=None, spec_file=None, properties=None,
from os import linesep
message = "[HIRNI] Add specification {n_snippets} for: {paths}".format(
n_snippets=single_or_plural("snippet", "snippets", len(paths)),
paths=linesep.join(" - " + p['path'] for p in paths)
if len(paths) > 1 else paths[0]['path'])
paths=linesep.join(" - " + op.relpath(p['path'], dataset.path)
for p in paths)
if len(paths) > 1 else op.relpath(paths[0]['path'], dataset.path))
for r in dataset.add(
updated_files,
to_git=True,
Expand Down
18 changes: 5 additions & 13 deletions datalad_hirni/resources/procedures/setup_bids_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
check_installed=True,
purpose='BIDS dataset setup')

# TODO: This looks like it was supposed to be a default README but isn't used
# ATM.
README_code = """\
All custom code goes into the directory. All scripts should be written such
that they can be executed from the root of the dataset, and are only using
Expand All @@ -36,18 +38,8 @@
ds.run_procedure(['cfg_metadatatypes', 'bids', 'nifti'])

# amend gitattributes
for path in force_in_git:
abspath = op.join(ds.path, path)
d = op.dirname(abspath)
ga_path = op.join(d, '.gitattributes') \
if op.exists(d) else op.join(ds.path, '.gitattributes')
with open(ga_path, 'a') as gaf:
gaf.write('{} annex.largefiles=nothing\n'.format(
op.relpath(abspath, start=d) if op.exists(d) else path))
to_add.add(ga_path)
ds.repo.set_gitattributes([(path, {'annex.largefiles': 'nothing'})
for path in force_in_git])

# leave clean
ds.add(
to_add,
message="Default BIDS dataset setup",
)
ds.add('.gitattributes', message="[HIRNI] Default BIDS dataset setup")
54 changes: 54 additions & 0 deletions datalad_hirni/resources/procedures/setup_study_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""Procedure to apply a sensible default setup to a study dataset
"""

import sys
from datalad.distribution.dataset import require_dataset

# bound dataset methods
import datalad.distribution.add
import datalad.interface.save
from datalad.plugin.add_readme import AddReadme

ds = require_dataset(
sys.argv[1],
check_installed=True,
purpose='study dataset setup')


force_in_git = [
'README',
'CHANGES',
'dataset_description.json',
'**/{}'.format(ds.config.get("datalad.hirni.studyspec.filename",
"studyspec.json")),
]

# except for hand-picked global metadata, we want anything
# to go into the annex to be able to retract files after
# publication
ds.repo.set_gitattributes([('**', {'annex.largefiles': 'anything'})])
ds.repo.set_gitattributes([(p, {'annex.largefiles': 'nothing'})
for p in force_in_git])


# TODO:
# Note: This default is using the DICOM's PatientID as the acquisition ID
# (directory name in the study dataset). That approach works for values
# accessible via the DICOM metadata directly. We probably want a way to apply
# more sophisticated rules, which could be achieved by a String Formatter
# providing more sophisticated operations like slicing (prob. to be shared with
# datalad's --output-format logic) or by apply specification rules prior to
# determining final location of the imported subdataset. The latter might lead
# to a mess, since import and specification routines would then be quite
# twisted.
ds.config.add('datalad.hirni.import.acquisition-format',
"{PatientID}", where='dataset')

ds.save(message='[HIRNI] Default study dataset setup')

# Include the most basic README to prevent heudiconv from adding one
ds.add_readme(filename='README', existing='fail')


# TODO: Reconsider using an import container and if so, link it herein. See
# now-deprecated hirni-create-study command

0 comments on commit e9fe249

Please sign in to comment.