CAMH studies with single-shell or multi-shell scans acquired in separate images include:
- COGBDO
- COGBDY
- DBDC
- DTI15T
- DTI3T
- PACTMD
- PASD01
- RTMSWM

What combinations of scans do we expect from each study? Write doctests to cover them.
```
['.../ds001/sub-01/ses-01/dwi/sub-01_ses-01_acq-multishelldir30b1000_dwi.nii.gz',
 '.../ds001/sub-01/ses-01/dwi/sub-01_ses-01_acq-multishelldir30b3000_dwi.nii.gz',
 '.../ds001/sub-01/ses-01/dwi/sub-01_ses-01_acq-multishelldir30b4500_dwi.nii.gz',
 '.../ds001/sub-01/ses-01/dwi/sub-01_ses-01_acq-singleshelldir60b1000_dwi.nii.gz']
['.../ds001/sub-01/ses-01/dwi/sub-01_ses-01_acq-singleshelldir21b1000_dwi.nii.gz',
 '.../ds001/sub-01/ses-01/dwi/sub-01_ses-01_acq-singleshelldir22b1000_dwi.nii.gz',
 '.../ds001/sub-01/ses-01/dwi/sub-01_ses-01_acq-singleshelldir23b1000_dwi.nii.gz',
 '.../ds001/sub-01/ses-02/dwi/sub-01_ses-02_acq-singleshelldir21b1000_dwi.nii.gz',
 '.../ds001/sub-01/ses-02/dwi/sub-01_ses-02_acq-singleshelldir22b1000_dwi.nii.gz',
 '.../ds001/sub-01/ses-02/dwi/sub-01_ses-02_acq-singleshelldir23b1000_dwi.nii.gz']
['.../ds001/sub-01/ses-01/dwi/sub-01_ses-01_acq-singleshelldir20b1000_run-1_dwi.nii.gz',
 '.../ds001/sub-01/ses-01/dwi/sub-01_ses-01_acq-singleshelldir20b1000_run-2_dwi.nii.gz',
 '.../ds001/sub-01/ses-01/dwi/sub-01_ses-01_acq-singleshelldir20b1000_run-3_dwi.nii.gz']
```

Group scans by:
1. session_label
2. _acq-<>
3. _run-<>

Some scans have `NumberofAverages` stored in the header.

In [23]:
import os

from bids import BIDSLayout
from nipype.pipeline import engine as pe
from nipype.interfaces import BIDSDataGrabber, utility as niu

In [24]:
class BIDSError(ValueError):
    def __init__(self, message, bids_root):
        indent = 10
        header = '{sep} BIDS root folder: "{bids_root}" {sep}'.format(
            bids_root=bids_root, sep="".join(["-"] * indent)
        )
        self.msg = "\n{header}\n{indent}{message}\n{footer}".format(
            header=header,
            indent="".join([" "] * (indent + 1)),
            message=message,
            footer="".join(["-"] * len(header)),
        )
        super(BIDSError, self).__init__(self.msg)
        self.bids_root = bids_root


class BIDSWarning(RuntimeWarning):
    pass

In [25]:
def collect_participants(bids_dir, participant_label=None, strict=False, bids_validate=True):

    if isinstance(bids_dir, BIDSLayout):
        layout = bids_dir
    else:
        layout = BIDSLayout(str(bids_dir), validate=bids_validate)

    all_participants = set(layout.get_subjects())

    # Error: bids_dir does not contain subjects
    if not all_participants:
        raise BIDSError(
            "Could not find participants. Please make sure the BIDS data "
            "structure is present and correct. Datasets can be validated online "
            "using the BIDS Validator (http://bids-standard.github.io/bids-validator/).\n"
            "If you are using Docker for Mac or Docker for Windows, you "
            'may need to adjust your "File sharing" preferences.',
            bids_dir,
        )

    # No --participant-label was set, return all
    if not participant_label:
        return sorted(all_participants)

    if isinstance(participant_label, str):
        participant_label = [participant_label]

    # Drop sub- prefixes
    participant_label = [
        sub[4:] if sub.startswith("sub-") else sub for sub in participant_label
    ]
    # Remove duplicates
    participant_label = sorted(set(participant_label))

    # Remove labels not found
    found_label = sorted(set(participant_label) & all_participants)
    if not found_label:
        raise BIDSError(
            "Could not find participants [{}]".format(", ".join(participant_label)),
            bids_dir,
        )

    # Warn if some IDs were not found
    notfound_label = sorted(set(participant_label) - all_participants)
    if notfound_label:
        exc = BIDSError(
            "Some participants were not found: {}".format(", ".join(notfound_label)),
            bids_dir,
        )
        if strict:
            raise exc
        warnings.warn(exc.msg, BIDSWarning)

    return all_participants, found_label

In [48]:
def collect_data(bids_dir, participant_label, session_label=None, bids_validate=True):

    if isinstance(bids_dir, BIDSLayout):
        layout = bids_dir
    else:
        layout = BIDSLayout(str(bids_dir), validate=bids_validate)

    queries = {
        'fmap': {'datatype': 'fmap'},
        'dwi': {'datatype': 'dwi', 'suffix': 'dwi'},
        't1w': {'datatype': 'anat', 'suffix': 'T1w'}
    }
    
    if not session_label:
        session_label = layout.get_sessions()
        
    subj_data = {
        dtype: sorted(layout.get(return_type='file',
                                 subject=participant_label, session=session_label,
                                 extension=['nii', 'nii.gz'], **query))
        for dtype, query in queries.items()}

    return subj_data, layout

In [55]:
bids_dir = '/archive/data/DTI3T/data/bids'
layout = BIDSLayout(bids_dir)

In [62]:
participant_label = 'CMHH166'
session_label = '01'
concat_dwis = ["multishelldir30b1000", "multishelldir30b3000", "multishelldir30b4500"]

In [57]:
all_subjects, subject_list = collect_participants(layout, participant_label)

In [59]:
for subject_id in subject_list:
    subject_data = collect_data(layout, subject_id)

In [61]:
subject_data

({'fmap': [],
  'dwi': ['/archive/data/DTI3T/data/bids/sub-CMHH166/ses-01/dwi/sub-CMHH166_ses-01_acq-multishelldir30b1000_run-1_dwi.nii.gz',
   '/archive/data/DTI3T/data/bids/sub-CMHH166/ses-01/dwi/sub-CMHH166_ses-01_acq-multishelldir30b3000_run-1_dwi.nii.gz',
   '/archive/data/DTI3T/data/bids/sub-CMHH166/ses-01/dwi/sub-CMHH166_ses-01_acq-multishelldir30b4500_run-1_dwi.nii.gz',
   '/archive/data/DTI3T/data/bids/sub-CMHH166/ses-01/dwi/sub-CMHH166_ses-01_acq-singleshelldir60b1000_run-1_dwi.nii.gz'],
  't1w': ['/archive/data/DTI3T/data/bids/sub-CMHH166/ses-01/anat/sub-CMHH166_ses-01_run-1_T1w.nii.gz']},
 BIDS Layout: .../archive/data/DTI3T/data/bids | Subjects: 124 | Sessions: 126 | Runs: 146)

In [None]:
if any(['_ses-' in dwi for dwi in subject_data['dwi']]):
    subject_data['dwi'] = group_multisession(subject_data['dwi'])

In [None]:
def group_multisession(dwi_sess):

    from itertools import groupby

    def _grp_echos(x):
        if '_echo-' not in x:
            return x
        echo = re.search("_echo-\\d*", x).group(0)
        return x.replace(echo, "_echo-?")

    ses_uids = []
    for _, bold in groupby(bold_sess, key=_grp_echos):
        bold = list(bold)
        # If single- or dual-echo, flatten list; keep list otherwise.
        action = getattr(ses_uids, 'append' if len(bold) > 2 else 'extend')
        action(bold)
    return ses_uids

In [19]:
def group_dwis(dwi_files, sessions, concat_dwis):

    all_dwis = []

    if sessions:
        session_dwi_groups = []
        for session in sessions:
            session_dwi_files = [img for img in dwi_files if 'ses-{}'.format(session) in img]
            for f in session_dwi_files:
                if any(acq in f for acq in concat_dwis):
                    session_dwi_groups.append(f)
                else:
                    all_dwis.append(f)
        all_dwis.append(session_dwi_groups)
    else:
        all_dwis.append(f)

    return all_dwis