In [31]:
import os
import logging
import re
from subprocess import call
import json
from time import sleep

In [32]:
SCAN_EXPR = """\
^(?P<rec_ex>PU:)?\
(?P<modality>[a-z]+)?\
(-(?P<label>[a-zA-Z0-9]+))?\
(_task-(?P<task>[a-zA-Z0-9]+))?\
(_acq-(?P<acq>[a-zA-Z0-9]+))?\
(_ce-(?P<ce>[a-zA-Z0-9]+))?\
(_rec-(?P<rec>[a-zA-Z0-9]+))?\
(_dir-(?P<dir>[a-zA-Z0-9]+))?\
(_run-(?P<run>[a-zA-Z0-9]+))?\
(_echo-(?P<echo>[0-9]+))?\
"""

In [33]:
def parse_json(json_file):
    """
    Parse json file.
    Parameters
    ----------
    json_file: json
        JSON file containing information about which subjects/sessions/scans to
        download from which project and where to store the files.
    JSON Keys
    ---------
    destination: string
        Directory to construct the BIDS structure
    scan_dict: dictionary
        a dictionary/hash table where the keys are the scan names on xnat
        and the values are the reproin style scan names
    session_labels: list
        (optional) (non-BIDS) If you want to replace the names of the sessions
        on xnat with your own list of scans.
    scan_labels: list
        (optional) a list of the scans you want to download (if you don't want to
        download all the scans).
    Returns
    -------
    input_dict:
        A dictionary containing the parameters specified in the JSON file
    """
    import json
    with open(json_file) as json_input:
        input_dict = json.load(json_input)
        # print(str(input_dict))
    mandatory_keys = ['destination']
    optional_keys = ['session_labels', 'subjects', 'scan_labels',
                     'scan_dict', 'num_digits', 'sub_dict', 'sub_label_prefix']
    total_keys = mandatory_keys+optional_keys
    # print("total_keys: "+str(total_keys))
    # are there any inputs in the json_file that are not supported?
    extra_inputs = list(set(input_dict.keys()) - set(total_keys))
    if extra_inputs:
        logging.warning('JSON spec key(s) not supported: %s' % str(extra_inputs))

    # are there missing mandatory inputs?
    missing_inputs = list(set(mandatory_keys) - set(input_dict.keys()))
    if missing_inputs:
        raise KeyError('option(s) need to be specified in input file: '
                       '%s' % str(missing_inputs))

    return input_dict

In [41]:
input_dict = parse_json("bids_conversion_info.json")
scan_repl_dict = input_dict.get('scan_dict', None)

In [42]:
input_dict

{'destination': '/Users/liza/data/NIH_SDN_BIDS',
 'scan_dict': {'anat_t1w_sag_mp_rage_1mm': 'anat-T1w',
  'sag_cube_t2': 'anat-T2w',
  'resting_epi_3mm_iso_rs': 'func-bold_task-rest',
  'edti_2mm_cdif45_ap': 'dwi_acq-AP',
  'edti_2mm_cdif45_pa': 'dwi_acq-PA',
  'me_mp_rage_1mm_promo_echo-1': 'anat-T1w_acq-multiecho_echo-1',
  'me_mp_rage_1mm_promo_echo-2': 'anat-T1w_acq-multiecho_echo-2',
  'me_mp_rage_1mm_promo_echo-3': 'anat-T1w_acq-multiecho_echo-3',
  'me_mp_rage_1mm_promo_echo-4': 'anat-T1w_acq-multiecho_echo-4',
  'reverse_blip_resting_epi_3mm_iso': 'anat-T2star',
  't2_1.7mm_fat_sat': 'anat-T2w_acq-fatsat',
  'orig_anat_t1w_sag_mp_rage_1mm': 'anat-T1w_rec-orig',
  'orig_sag_cube_t2': 'anat-T2w_rec-orig',
  'orig_me_mp_rage_1mm_promo_echo-1': 'anat-T1w_acq-multiecho_rec-orig_echo-1',
  'orig_me_mp_rage_1mm_promo_echo-2': 'anat-T1w_acq-multiecho_rec-orig_echo-2',
  'orig_me_mp_rage_1mm_promo_echo-3': 'anat-T1w_acq-multiecho_rec-orig_echo-3',
  'orig_me_mp_rage_1mm_promo_echo-4': '

In [43]:
scan_repl_dict

{'anat_t1w_sag_mp_rage_1mm': 'anat-T1w',
 'sag_cube_t2': 'anat-T2w',
 'resting_epi_3mm_iso_rs': 'func-bold_task-rest',
 'edti_2mm_cdif45_ap': 'dwi_acq-AP',
 'edti_2mm_cdif45_pa': 'dwi_acq-PA',
 'me_mp_rage_1mm_promo_echo-1': 'anat-T1w_acq-multiecho_echo-1',
 'me_mp_rage_1mm_promo_echo-2': 'anat-T1w_acq-multiecho_echo-2',
 'me_mp_rage_1mm_promo_echo-3': 'anat-T1w_acq-multiecho_echo-3',
 'me_mp_rage_1mm_promo_echo-4': 'anat-T1w_acq-multiecho_echo-4',
 'reverse_blip_resting_epi_3mm_iso': 'anat-T2star',
 't2_1.7mm_fat_sat': 'anat-T2w_acq-fatsat',
 'orig_anat_t1w_sag_mp_rage_1mm': 'anat-T1w_rec-orig',
 'orig_sag_cube_t2': 'anat-T2w_rec-orig',
 'orig_me_mp_rage_1mm_promo_echo-1': 'anat-T1w_acq-multiecho_rec-orig_echo-1',
 'orig_me_mp_rage_1mm_promo_echo-2': 'anat-T1w_acq-multiecho_rec-orig_echo-2',
 'orig_me_mp_rage_1mm_promo_echo-3': 'anat-T1w_acq-multiecho_rec-orig_echo-3',
 'orig_me_mp_rage_1mm_promo_echo-4': 'anat-T1w_acq-multiecho_rec-orig_echo-4'}

In [12]:
def convert_scan_unformatted(self, scan, dest, scan_repl_dict, bids_num_len,
                             sub_repl_dict=None, sub_label_prefix=None,
                             overwrite_nii=False):
        """
        Downloads a particular scan session
        Parameters
        ----------
        scan: string
            Scan object returned from pyxnat.
        dest: string
            Directory where the zip file will be saved.
            The actual dicoms will be saved under the general scheme
            <session_label>/scans/<scan_label>/resources/DICOM/files
        bids_num_len: int
            the number of integers to use to represent the subject label
        scan_repl_dict: dict
            Dictionary containing terms to match the scan name on xnat with
            the reproin name of the scan
        sub_label_prefix: string
            prefix to add to the subject label (e.g. "AMBI")
        sub_repl_dict: dict
            dictionary to change the subject label based on its representation on xnat
        overwrite_nii: bool
            overwrite the output nifti file if it already exists
        """
        from glob import glob
        if scan not in self.scan_dict.keys():
            print('{scan} is not available for download'.format(scan=scan))
            return 0

        # No easy way to check for complete download
        # the session label (e.g. 20180613)
        # ^soon to be sub-01_ses-01
        ses_dir = self.scan_dict[scan].parent().label()
        scan_par = self.scan_dict[scan].parent()
        # the number id given to a scan (1, 2, 3, 400, 500)
        scan_id = self.scan_dict[scan].id()
        if scan not in scan_repl_dict.keys():
            print('{scan} not a part of dictionary, skipping'.format(scan=scan))
            return 0

        bids_scan = scan_repl_dict[scan]
        # PU:task-rest_bold -> PU_task_rest_bold
        scan_fmt = re.sub(r'[^\w]', '_', scan)
        scan_dir = scan_id + '-' + scan_fmt

        dcm_outdir = os.path.join(dest, 'sourcedata')
        if not os.path.isdir(dcm_outdir):
            os.makedirs(dcm_outdir)

        potential_files = glob(os.path.join(dcm_outdir,
                                            ses_dir,
                                            'scans',
                                            scan_dir,
                                            'resources/DICOM/files/*.dcm'))
        if potential_files:
            msg = """
                  dicoms were already found in the output directory: {}
                  """.format(potential_files[0])
            print(msg)
        else:
            # attempt to download dicoms (with a max of 5 tries)
            max_retries = 5
            for rtry in range(max_retries):
                # track whether download succeeded
                err = False
                try:
                    scan_par.scans().download(dest_dir=dcm_outdir,
                                              type=scan_id,
                                              name=scan_fmt,
                                              extract=True,
                                              removeZip=True)
                except TypeError:
                    print('download attempt {n} failed'.format(n=rtry + 1))
                    err = True
                    sleep(5)
                finally:
                    # break out of for loop if download succeeded
                    if not err:
                        break
                    elif rtry == (max_retries - 1):
                        raise TypeError("Could not download dicom")

        # getting information about the directories
        dcm_dir = os.path.join(dcm_outdir,
                               ses_dir,
                               'scans',
                               scan_dir)
        if sub_repl_dict:
            sub_name = 'sub-' + sub_repl_dict[self.sub_obj.attrs.get('label')]
        else:
            sub_name = 'sub-' + self.sub_obj.attrs.get('label').zfill(bids_num_len)

        if self.ses_name_dict:
            ses_name = 'ses-' + self.ses_name_dict[ses_dir]
        else:
            # To capture cases where the session is named 20180508_2
            ses_name = 'ses-' + ses_dir.replace('_', 's')

        scan_pattern = re.compile(SCAN_EXPR)

        scan_pattern_dict = re.search(scan_pattern, bids_scan).groupdict()
        # check if the modality is empty
        if scan_pattern_dict['modality'] is None:
            print('{scan} is not in BIDS, not converting'.format(scan=scan))
            return 0

        # adding additional information to scan label (such as GE120)
        if sub_label_prefix is not None:
            sub_label = sub_name.split('-')[1]
            sub_name = 'sub-' + sub_label_prefix + sub_label

        # build up the bids directory
        bids_dir = os.path.join(dest, sub_name, ses_name, scan_pattern_dict['modality'])

        if not os.path.isdir(bids_dir):
            os.makedirs(bids_dir)

        # name the bids file
        fname = '_'.join([sub_name, ses_name])

        bids_keys_order = ['task', 'acq', 'ce', 'rec', 'rec_ex', 'dir', 'run', 'echo']

        for key in bids_keys_order:
            label = scan_pattern_dict[key]
            if label is not None:
                if key == 'rec_ex':
                    key = 'rec'
                    label = 'pu'
                fname = '_'.join([fname, key + '-' + label])

        # add the label (e.g. _bold)
        if scan_pattern_dict['label'] is None:
            label = scan_pattern_dict['modality']
        else:
            label = scan_pattern_dict['label']

        fname = '_'.join([fname, label])

        print('the dcm dir is {dcm_dir}'.format(dcm_dir=dcm_dir))
        dcm2niix = 'dcm2niix -o {bids_dir} -f {fname} -z y -b y {dcm_dir}'.format(
            bids_dir=bids_dir,
            fname=fname,
            dcm_dir=dcm_dir)
        bids_outfile = os.path.join(bids_dir, fname + '.nii.gz')
        if not os.path.exists(bids_outfile) or overwrite_nii:
            call(dcm2niix, shell=True)
        else:
            print('It appears the nifti file already exists for {scan}'.format(scan=scan))



In [13]:
bids_scan = 'anat-T1rho_acq-SL50'
scan_pattern = re.compile(SCAN_EXPR)
scan_pattern_dict = re.search(scan_pattern, bids_scan).groupdict()



In [14]:
scan_pattern_dict

{'rec_ex': None,
 'modality': 'anat',
 'label': 'T1rho',
 'task': None,
 'acq': 'SL50',
 'ce': None,
 'rec': None,
 'dir': None,
 'run': None,
 'echo': None}

In [15]:
curr_dir = "/Users/liza/data/LASTNAME_FIRSTNAME_MIDDLENAME_MR-0XXXXXXX/YYYYMMDD_XXXXXX/"

In [16]:
# Turn the text in the README-Study.txt file into a dictionary 

In [17]:
def extract_mr_dir_description(readme_path): 
    readme_dict_orig = {} 
    # creating dictionary 
    with open(os.path.join(curr_dir, "README-Study.txt")) as fh: 
        for line in fh: 
            # reads each line and trims of extra the spaces  
            # and gives only the valid words 
            command, description = line.strip().split(None, 1) 
            readme_dict_orig[command] = description.strip() 
    mr_dir_dict = {}
    for key in readme_dict_orig.keys(): 
        if "Series" in key: 
            mr_dir = key.split(":")[1].split(",")[0]
            metadata = [x.lstrip() for x in readme_dict_orig[key].split(",")]
            description = metadata[2].split(")")[-1]
            mr_dir_dict[mr_dir] = description
    return mr_dir_dict

In [18]:
mr_dir_dict = extract_mr_dir_description(os.path.join(curr_dir, "README-Study.txt"))

In [19]:
mr_dir_dict['subject'] = '09XRZ'
mr_dir_dict['session'] = '01'

In [20]:
i=1
j=1
for key in mr_dir_dict.keys(): 
    if mr_dir_dict[key] == "me_mp_rage_1mm_promo": 
        mr_dir_dict[key] = "me_mp_rage_1mm_promo_echo-{0}".format(i) 
        i += 1
    elif mr_dir_dict[key] == "orig_me_mp_rage_1mm_promo": 
        mr_dir_dict[key] = "orig_me_mp_rage_1mm_promo_echo-{0}".format(j) 
        j += 1

In [21]:
mr_dir_dict

{'mr_0001': '3_plane_localizer',
 'mr_0002': 'sag_t1_spin_echo',
 'mr_0003': 'ax_t2_frfse',
 'mr_0004': 'ax_t2_flair',
 'mr_0005': 'asset_pure_calibration',
 'mr_0006': 'anat_t1w_sag_mp_rage_1mm',
 'mr_0007': 'sag_cube_t2',
 'mr_0008': 'me_mp_rage_1mm_promo_echo-1',
 'mr_0008-e02': 'me_mp_rage_1mm_promo_echo-2',
 'mr_0008-e03': 'me_mp_rage_1mm_promo_echo-3',
 'mr_0008-e04': 'me_mp_rage_1mm_promo_echo-4',
 'mr_0009': 'sagittal_ref_pa_fr8',
 'mr_0010': 'sagittal_ref_body_fr8',
 'mr_0012': 'resting_epi_3mm_iso_rs',
 'mr_0013': 'reverse_blip_resting_epi_3mm_iso',
 'mr_0015': 'edti_2mm_cdif45_ap',
 'mr_0016': 'edti_2mm_cdif45_pa',
 'mr_0017': 't2_1.7mm_fat_sat',
 'mr_40006': 'orig_anat_t1w_sag_mp_rage_1mm',
 'mr_40007': 'orig_sag_cube_t2',
 'mr_40008': 'orig_me_mp_rage_1mm_promo_echo-1',
 'mr_40008-e02': 'orig_me_mp_rage_1mm_promo_echo-2',
 'mr_40008-e03': 'orig_me_mp_rage_1mm_promo_echo-3',
 'mr_40008-e04': 'orig_me_mp_rage_1mm_promo_echo-4',
 'sc_0000': 'requisition',
 'sc_20005': 'screen

In [22]:
scan_repl_dict

{'anat_t1w_sag_mp_rage_1mm': 'anat-T1w',
 'sag_cube_t2': 'anat-T2w',
 'resting_epi_3mm_iso_rs': 'func-bold_task-rest',
 'edti_2mm_cdif45_ap': 'dwi_rec-PA',
 'me_mp_rage_1mm_promo_echo-1': 'anat-T1w_acq-multiecho_echo-1',
 'me_mp_rage_1mm_promo_echo-2': 'anat-T1w_acq-multiecho_echo-2',
 'me_mp_rage_1mm_promo_echo-3': 'anat-T1w_acq-multiecho_echo-3',
 'me_mp_rage_1mm_promo_echo-4': 'anat-T1w_acq-multiecho_echo-4',
 'reverse_blip_resting_epi_3mm_iso': 'anat-T2star',
 't2_1.7mm_fat_sat': 'anat-T2w_acq-fatsat',
 'orig_anat_t1w_sag_mp_rage_1mm': 'anat-T1w_rec-orig',
 'orig_sag_cube_t2': 'anat-T2w_rec-orig',
 'orig_me_mp_rage_1mm_promo_echo-1': 'anat-T1w_acq-multiecho_rec-orig_echo-1',
 'orig_me_mp_rage_1mm_promo_echo-2': 'anat-T1w_acq-multiecho_rec-orig_echo-2',
 'orig_me_mp_rage_1mm_promo_echo-3': 'anat-T1w_acq-multiecho_rec-orig_echo-3',
 'orig_me_mp_rage_1mm_promo_echo-4': 'anat-T1w_acq-multiecho_rec-orig_echo-4'}

In [48]:
for mr_key in mr_dir_dict.keys():
    sub_name = 'sub-' + mr_dir_dict['subject'] 
    ses_name = 'ses-' + mr_dir_dict['session']
    scan = mr_dir_dict[mr_key]
    dest = input_dict['destination']
    if scan in scan_repl_dict.keys():
#         print('{scan} is a part of dictionary'.format(scan=scan))
        dcm_dir = os.path.join(curr_dir, mr_key)
        bids_scan = scan_repl_dict[scan]
        # PU:task-rest_bold -> PU_task_rest_bold
        scan_fmt = re.sub(r'[^\w]', '_', scan)        
        scan_pattern = re.compile(SCAN_EXPR)

        scan_pattern_dict = re.search(scan_pattern, bids_scan).groupdict()

        # build up the bids directory
        bids_dir = os.path.join(dest, sub_name, ses_name, scan_pattern_dict['modality'])
#         print(bids_dir)
        if not os.path.isdir(bids_dir):
            os.makedirs(bids_dir)
        
        # name the bids file
        fname = '_'.join([sub_name, ses_name])

        bids_keys_order = ['task', 'acq', 'ce', 'rec', 'rec_ex', 'dir', 'run', 'echo']

        for key in bids_keys_order:
            label = scan_pattern_dict[key]
            if label is not None:
                if key == 'rec_ex':
                    key = 'rec'
                    label = 'pu'
                fname = '_'.join([fname, key + '-' + label])

        # add the label (e.g. _bold)
        if scan_pattern_dict['label'] is None:
            label = scan_pattern_dict['modality']
        else:
            label = scan_pattern_dict['label']

        fname = '_'.join([fname, label])

#         print('the dcm dir is {dcm_dir}'.format(dcm_dir=dcm_dir))
        dcm2niix = 'dcm2niix -o {bids_dir} -f {fname} -z y -b y {dcm_dir}'.format(
            bids_dir=bids_dir,
            fname=fname,
            dcm_dir=dcm_dir)
        bids_outfile = os.path.join(bids_dir, fname + '.nii.gz')
        print(bids_outfile)
        if not os.path.exists(bids_outfile) or overwrite_nii:
            call(dcm2niix, shell=True)
        else:
            print('It appears the nifti file already exists for {scan}'.format(scan=scan))



/Users/liza/data/NIH_SDN_BIDS/sub-09XRZ/ses-01/anat/sub-09XRZ_ses-01_T1w.nii.gz
/Users/liza/data/NIH_SDN_BIDS/sub-09XRZ/ses-01/anat/sub-09XRZ_ses-01_T2w.nii.gz
/Users/liza/data/NIH_SDN_BIDS/sub-09XRZ/ses-01/anat/sub-09XRZ_ses-01_acq-multiecho_echo-1_T1w.nii.gz
/Users/liza/data/NIH_SDN_BIDS/sub-09XRZ/ses-01/anat/sub-09XRZ_ses-01_acq-multiecho_echo-2_T1w.nii.gz
/Users/liza/data/NIH_SDN_BIDS/sub-09XRZ/ses-01/anat/sub-09XRZ_ses-01_acq-multiecho_echo-3_T1w.nii.gz
/Users/liza/data/NIH_SDN_BIDS/sub-09XRZ/ses-01/anat/sub-09XRZ_ses-01_acq-multiecho_echo-4_T1w.nii.gz
/Users/liza/data/NIH_SDN_BIDS/sub-09XRZ/ses-01/func/sub-09XRZ_ses-01_task-rest_bold.nii.gz
/Users/liza/data/NIH_SDN_BIDS/sub-09XRZ/ses-01/anat/sub-09XRZ_ses-01_T2star.nii.gz
/Users/liza/data/NIH_SDN_BIDS/sub-09XRZ/ses-01/dwi/sub-09XRZ_ses-01_acq-AP_dwi.nii.gz
/Users/liza/data/NIH_SDN_BIDS/sub-09XRZ/ses-01/dwi/sub-09XRZ_ses-01_acq-PA_dwi.nii.gz
/Users/liza/data/NIH_SDN_BIDS/sub-09XRZ/ses-01/anat/sub-09XRZ_ses-01_acq-fatsat_T2w.nii.gz