In [1]:
%pylab inline

import matplotlib

import os
import sys
import re

import numpy as np
import pandas as pd

import nibabel 
import subprocess
import nilearn as nlr
import nipy

from nilearn._utils.compat import _basestring
import glob

from sklearn.externals.joblib import Parallel, delayed, Memory
from nipype.caching import Memory as NipypeMemory
import nipype.interfaces.spm as spm

from nipy.modalities.fmri.glm import FMRILinearModel

Populating the interactive namespace from numpy and matplotlib


In [2]:
options = {'protocol': 'SOCIAL', 
           'dataset_description': 'HCP SOCIAL experiment', 
#            'dataset_dir': 'C:/data/graduate_thesis/hcp_data_test_gz/',  
           'dataset_dir': 'C:\\data\\graduate_thesis\\hcp_data_test_gz\\',  
           'subject_dirs': '*', 
           'output_dir': 'output', 
#            'session_1_func': 'MNINonLinear/Results/tfMRI_%protocol%_RL/tfMRI_%protocol%_RL.nii.gz', 
#            'session_2_func': 'MNINonLinear/Results/tfMRI_%protocol%_LR/tfMRI_%protocol%_LR.nii.gz', 
           'session_2_func': 'MNINonLinear\\Results\\tfMRI_SOCIAL_RL\\tfMRI_SOCIAL_RL.nii.gz', 
           'session_1_func': 'MNINonLinear\\Results\\tfMRI_SOCIAL_LR\\tfMRI_SOCIAL_LR.nii.gz', 
           'caching': True, 
           'deleteorient': False, 
           'disable_distortion_correction': True, 
           'disable_slice_timing': True, 
           'TR': 0.72, 
           'TA': 'TR * (1 - 1 / nslices)', 
           'slice_order': 'ascending', 
           'interleaved': False, 
           'refslice': '1', 
           'slice_timing_software': 'spm', 
           'disable_realign': True, 
           'register_to_mean': True, 
           'realign_reslice': False, 
           'realign_software': 'spm', 
           'disable_coregister': True, 
           'coreg_func_to_anat': True, 
           'coregister_reslice': False, 
           'coregister_software': 'spm', 
           'disable_segment': True,
           'segment_software': 'spm', 
           'newsegment': True, 
           'disable_normalize': True, 
           'template': 'MNI', 
           'func_write_voxel_sizes': [2.0, 2.0, 2.0], 
           'anat_write_voxel_size': [1.0, 1.0, 1.0], 
           'dartel': False, 
           'normalize_software': 'spm', 
           'fwhm': 4.0, 
           'smooth_software': 'spm',
           'report': False, 
           'plot_tsdiffana': True,
           'slicer': 'ortho',   # slicer of activation maps QA
           'cut_coords': None, 
           'threshold': 3, 
           'cluster_th': 15,   # minimum number of voxels in reported clusters
           'n_jobs': 32,
           'scratch': 'scratch', 
#            'spm_dir': 'C:/Program Files/MATLAB/spm12' 
           'spm_dir': 'C:\\Program Files\\MATLAB\\spm12', 
#            'matlab_exec': 'C:/Program Files/MATLAB/R2017a'
           'matlab_exec': 'C:\\Program Files\\MATLAB\\R2017a', 
           'nsubjects':3
          }

In [3]:
assert options['dataset_dir']
assert options['output_dir']

In [4]:
def expand_path(path, relative_to=None):
    # cd to reference directory
    if relative_to is None:
        relative_to = os.getcwd()
    else:
        relative_to = expand_path(relative_to)
        if not os.path.exists(relative_to):
            raise OSError(
                "Reference path %s doesn't exist" % relative_to)
    old_cwd = os.getcwd()
    os.chdir(relative_to)

    _path = path
#     if _path.startswith('..'):
#         if _path == '..':
#             _path = os.path.dirname(os.getcwd())
#         else:
#             match = re.match('(?P<head>(?:\.{2}\/)+)(?P<tail>.*)', _path)
#             if match:
#                 _path = os.getcwd()
#                 for _ in range(len(match.group('head')) // 3):
#                     _path = os.path.dirname(_path)
#                 _path = os.path.join(_path, match.group('tail'))
#             else:
#                 _path = None
#     elif _path.startswith('./'): #'.\\'
#         _path = _path[2:]
#     elif _path.startswith('.'):
#         _path = _path[1:]
#     elif _path.startswith('~'):
#         if _path == '~':
#             _path = os.environ['HOME']
#         else:
#             _path = os.path.join(os.environ['HOME'], _path[2:])

    if not _path is None:
        _path = os.path.abspath(_path)

    # restore cwd
    os.chdir(old_cwd)
    return _path

In [5]:
def get_relative_path(ancestor, descendant):
    if ancestor == descendant:
        return ""

    ancestor = ancestor.rstrip("\\")
    descendant = descendant.rstrip("\\")
    right_part = descendant[len(ancestor):].lstrip("\\")
    if right_part is None:
        return None
    else:
        return right_part
    
    
def get_abspath_relative_to_file(filename, ref_filename):
    
    assert os.path.isfile(ref_filename)

    old_cwd = os.getcwd()  # save CWD
    os.chdir(os.path.dirname(ref_filename))  # in context
    abspath = os.path.abspath(filename)  # bing0!
    os.chdir(old_cwd)  # restore CWD

    return abspath

  

In [6]:
# check dataset_dir
dataset_dir = expand_path(options['dataset_dir'])
print(dataset_dir)
if not os.path.isdir(dataset_dir):
    raise OSError("dataset_dir '%s' doesn't exist" % dataset_dir)

C:\data\graduate_thesis\hcp_data_test_gz


In [7]:
# check output_dir
output_dir = expand_path(options['output_dir'], relative_to=dataset_dir)
if output_dir is None:
    raise OSError(
        ("Could not expand 'output_dir' : invalid"
         " path %s (relative to directory %s)") % (options['output_dir'],
                                                   dataset_dir))

# check scratch
scratch = expand_path(options['scratch'], relative_to=dataset_dir)
if scratch is None:
    raise OSError(
        ("Could not expand 'scratch' : invalid"
         " path %s (relative to directory %s)") % (options['scratch'],
                                                   dataset_dir))


In [8]:
# check subject_dirs
subject_dirs = expand_path(options['subject_dirs'], relative_to=dataset_dir)
print(subject_dirs)
if subject_dirs is None:
    raise OSError(
        ("Could not expand 'output_dir' : invalid"
         " path %s (relative to directory %s)") % (options['subject_dirs'],
                                                   dataset_dir))

C:\data\graduate_thesis\hcp_data_test_gz\*


In [9]:
# preproc parameters
preproc_params = {
    'spm_dir': options['spm_dir'],
    'matlab_exec': options['matlab_exec'],
    'report': options['report'],
    'output_dir': options['output_dir'],
    'scratch': options['scratch'],
    'dataset_id': options['dataset_dir'],
    'n_jobs': options['n_jobs'],
    'caching': options['caching'],
    'tsdiffana': options['plot_tsdiffana'],
    'dataset_description': options['dataset_description'],
    'slice_timing_software': options['slice_timing_software'],
    'realign_software': options['realign_software'],
    'coregister_software': options['coregister_software'],
    'smooth_software': options['smooth_software'], 
    'deleteorient': options['deleteorient'], 
    'slice_timing': options['disable_slice_timing']}

In [10]:
if preproc_params['slice_timing']:
    preproc_params.update(dict((k, options.get(k, None))
                               for k in ['TR', 'TA', 'slice_order',
                                         'interleaved']))
    if preproc_params['TR'] is None:
        preproc_params['slice_timing'] = False
        
# configure motion correction node
preproc_params['realign'] = not options.get('disable_realign', False)
if preproc_params['realign']:
    preproc_params['realign_reslice'] = options.get('reslice_realign',
                                                    False)
    preproc_params['register_to_mean'] = options.get('register_to_mean',
                                                     True)

# configure coregistration node
preproc_params['coregister'] = not options.get('disable_coregister',
                                               False)
if preproc_params['coregister']:
    preproc_params['coregister_reslice'] = options.get(
        'coregister_reslice')
    preproc_params['coreg_anat_to_func'] = not options.get(
        'coreg_func_to_anat', True)

# configure tissue segmentation node
preproc_params['segment'] = not options.get('disable_segment', False)
preproc_params['newsegment'] = options.get(
    'newsegment', False) and preproc_params['segment']

# configure normalization node
preproc_params['normalize'] = not options.get(
    'disable_normalize', False)


# configure output voxel sizes
for brain in ['func', 'anat']:
    k = '%s_write_voxel_size' % brain
    ks = k + 's'
    if k in options:
        assert not ks in options, (
            'Both %s and %s specified in ini file. Please use only one of '
            'them, they mean thesame thing!')
        options[ks] = options.pop(k)
    preproc_params[ks] = options.get(
        ks, [[3, 3, 3], [1, 1, 1]][brain == 'anat'])

In [11]:
# configure dartel
preproc_params['dartel'] = options.get('dartel', False)
preproc_params['output_modulated_tpms'] = options.get(
    'output_modulated_tpms', False)

# can't do dartel without newsegment!
if not preproc_params['newsegment']:
    preproc_params['newsegment'] = preproc_params['dartel']

# configure smoothing node
preproc_params['fwhm'] = options.get('fwhm', 0.)
preproc_params['anat_fwhm'] = options.get('anat_fwhm', 0.)

In [12]:
# how many subjects ?
subjects = []
nsubjects = options.get('nsubjects', np.inf)
exclude_these_subject_ids = options.get(
    'exclude_these_subject_ids', [])
include_only_these_subject_ids = options.get(
    'include_only_these_subject_ids', [])

In [13]:
subject_data_dirs = options.get("subject_dirs", "*")
if isinstance(subject_dirs, _basestring):
    subject_dir_wildcard = os.path.join(dataset_dir, subject_dirs)
    subject_data_dirs = [x for x in sorted(glob.glob(subject_dir_wildcard))
                         if os.path.isdir(x)]
    subject_data_dirs = [os.path.join(x, re.findall(r'\d{6}', x)[0]) for x in subject_data_dirs]
else:
    # list of subjects or subject wildcards
    subject_data_dirs = [os.path.join(dataset_dir, x)
                         for x in subject_data_dirs]
    subject_dir_wildcard = subject_data_dirs
    aux = []
    for subject_data_dir in subject_data_dirs:
        for x in sorted(glob.glob(subject_data_dir)):
            if os.path.isdir(x):
                aux.append(x)
    subject_data_dirs = aux

In [14]:
subject_data_dirs

['C:\\data\\graduate_thesis\\hcp_data_test_gz\\100307_3T_tfMRI_SOCIAL_preproc\\100307',
 'C:\\data\\graduate_thesis\\hcp_data_test_gz\\103414_3T_tfMRI_SOCIAL_preproc\\103414',
 'C:\\data\\graduate_thesis\\hcp_data_test_gz\\105115_3T_tfMRI_SOCIAL_preproc\\105115']

In [15]:
if not subject_data_dirs:
    warnings.warn("No subject directories found for wildcard: %s" % (
        subject_dir_wildcard))
else:
    print(len(subject_data_dirs) == nsubjects)

True


In [16]:
sess_func_wildcards = [key for key in list(options.keys())
                           if re.match("session_.+_func", key)]
sess_onset_wildcards = [key for key in list(options.keys())
                        if re.match("session_.+_onset", key)]
sess_ids = [re.match("session_(.+)_func", session).group(1)
                for session in sess_func_wildcards]

In [17]:
sess_func_wildcards

['session_1_func', 'session_2_func']

In [18]:
class SubjectData(object):
    def __init__(self, func=None, anat=None, subject_id="100307",
                 session_ids=None, output_dir=None, session_output_dirs=None,
                 anat_output_dir=None, scratch=None, warpable=None, **kwargs):
        if warpable is None:
            warpable = ['anat', 'func']
        self.func = func
        self.anat = anat
        self.subject_id = subject_id
        self.session_ids = session_ids
        self.n_sessions = None
        self.output_dir = output_dir
        self.anat_output_dir = anat_output_dir
        self.session_output_dirs = session_output_dirs
        self.warpable = warpable
        self.failed = False
        self.warpable = warpable
        self.nipype_results = {}
        self._set_items(**kwargs)
        self.scratch = output_dir if scratch is None else scratch
        self.anat_scratch_dir = anat_output_dir if scratch is None else scratch
        self.session_scratch_dirs = (session_output_dirs if scratch is None
                                     else [scratch] * len(session_output_dirs))

    def _set_items(self, **kwargs):
        for k, v in list(kwargs.items()):
            setattr(self, k, v)
            
    def __getitem__(self, key):
        return self.__dict__[key]
    
    def _sanitize_output_dir(self, output_dir):
        if output_dir is not None:
            output_dir = os.path.abspath(output_dir)
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
        return output_dir

    def _sanitize_session_output_dirs(self):
        if self.session_output_dirs is None:
            if self.n_sessions is None:
                return
            self.session_output_dirs = [None] * self.n_sessions

        # session-wise func output directories
        for sess, sess_output_dir in enumerate(self.session_output_dirs):
            if sess_output_dir is None:
                if self.n_sessions > 1:
                    sess_output_dir = os.path.join(
                        self.output_dir, self.session_ids[sess])
                else:
                    sess_output_dir = self.output_dir
            else:
                sess_output_dir = sess_output_dir
            self.session_output_dirs[sess] = self._sanitize_output_dir(
                sess_output_dir)

    def _sanitize_session_scratch_dirs(self):
        if self.session_scratch_dirs is None:
            if self.n_sessions is None:
                return
            self.session_scratch_dirs = [None] * self.n_sessions

        # session-wise func scratch directories
        for sess, sess_scratch_dir in enumerate(self.session_scratch_dirs):
            if sess_scratch_dir is None:
                if self.n_sessions > 1:
                    sess_scratch_dir = os.path.join(
                        self.scratch, self.session_ids[sess])
                else:
                    sess_scratch_dir = self.scratch
            self.session_scratch_dirs[sess] = self._sanitize_output_dir(
                sess_scratch_dir)

    def _sanitize_output_dirs(self):
        # output dir
        self.output_dir = self._sanitize_output_dir(self.output_dir)

        # anat output dir
        if self.anat_output_dir is None:
            self.anat_output_dir = self.output_dir
        self.anat_output_dir = self._sanitize_output_dir(self.anat_output_dir)

        # sanitize per-session func output dirs
        self._sanitize_session_output_dirs()

    def _sanitize_scratch_dirs(self):
        # scratch dir
        self.scratch = self._sanitize_output_dir(self.scratch)

        # anat scratch dir
        if self.anat_scratch_dir is None:
            self.anat_scratch_dir = self.scratch
        self.anat_scratch_dir =\
            self._sanitize_output_dir(self.anat_scratch_dir)

        # sanitize per-session func scratch dirs
        self._sanitize_session_scratch_dirs()

    def _niigz2nii(self):
        if self.scratch is None:
            self.scratch = self.output_dir
        cache_dir = os.path.join(self.scratch, 'cache_dir')
        mem = Memory(cache_dir, verbose=100)
        self._sanitize_session_output_dirs()
        self._sanitize_session_scratch_dirs()
        if None not in [self.func, self.n_sessions,
                        self.session_scratch_dirs]:
            self.func = [mem.cache(do_niigz2nii)(
                self.func[sess], output_dir=self.session_scratch_dirs[sess])
                for sess in range(self.n_sessions)]
        if self.anat is not None:
            self.anat = mem.cache(do_niigz2nii)(
                self.anat, output_dir=self.anat_scratch_dir)

In [20]:
for subject_data_dir in subject_data_dirs:
    print(subject_data_dir)

C:\data\graduate_thesis\hcp_data_test_gz\100307_3T_tfMRI_SOCIAL_preproc\100307
C:\data\graduate_thesis\hcp_data_test_gz\103414_3T_tfMRI_SOCIAL_preproc\103414
C:\data\graduate_thesis\hcp_data_test_gz\105115_3T_tfMRI_SOCIAL_preproc\105115


In [19]:
# os.path.join(subject_data_dirs[0],options[sess_func_wildcards[0]])        

In [21]:
for subject_data_dir in subject_data_dirs:
    if len(subjects) == nsubjects:
        # we've had enough subjects already; end
        break
    subject_id = os.path.basename(subject_data_dir)
    print(subject_id)
    subject_output_dir = os.path.join(output_dir, subject_id)
    if scratch is not None:
        subject_scratch = os.path.join(scratch, subject_id)
    else:
        subject_scratch = None
    
    
    # grab functional data
    func = []
    sess_output_dirs = []
    # skip_subject = False
    onset = []
    
    for s, sess_func_wildcard in enumerate(sess_func_wildcards):
        o = None
        if s < len(sess_onset_wildcards):
            sess_onset_wildcard = sess_onset_wildcards[s]
            sess_onset_wildcard = options[sess_onset_wildcard]
            sess_onset_wildcard = os.path.join(subject_data_dir,
                                               sess_onset_wildcard)
            sess_onset = sorted(glob.glob(sess_onset_wildcard))
            if len(sess_onset) > 1:
                raise ValueError
            if len(sess_onset) > 0:
                o = sess_onset[0]
        onset.append(o)
        
        sess_func_wildcard = options[sess_func_wildcard]  
        
#         if expand_path(sess_func_wildcard, relative_to=dataset_dir) is None:
#             raise OSError(
#                 ("Could not expand 'sess_func_wildcard' : invalid"
#                  " path %s (relative to directory %s)") % (options['sess_func_wildcard'],
#                                                            dataset_dir))
#         else: 
#         subject_data_dir = os.path.join(subject_data_dir,
#                                           sess_func_wildcard) 
        
#         sess_func = os.path.join(subject_data_dir,
#                                           sess_func_wildcard)    
        sess_func = os.path.join(subject_data_dir,
                                          sess_func_wildcard) 
        
        # sess_func = sorted(glob.glob(sess_func_wildcard))
    
        # skip session if no data found
        if not sess_func:
            warnings.warn(
                ("subject %s: No func images found for"
                 " wildcard %s" % (subject_id, sess_func_wildcard)))
            continue
        
        sess_dir = os.path.dirname(sess_func)
#         if len(sess_func) == 1:
#             sess_func = sess_func_wildcard[0]
        func.append(sess_func)
        print(sess_func)

        # session output dir
        if os.path.basename(sess_dir) != os.path.basename(
                subject_output_dir):
            sess_output_dir = os.path.join(subject_output_dir,
                                           get_relative_path(
                                               subject_data_dir, sess_dir))
        else:
            sess_output_dir = subject_output_dir
        if not os.path.exists(sess_output_dir):
            os.makedirs(sess_output_dir)
        sess_output_dirs.append(sess_output_dir)
        

#     # something is wrong with this guy, skip
#     if skip_subject:
#         warnings.warn("Skipping subject %s" % subject_id)
#         continue

    # grab anat
    anat = None
    if not options.get("anat", None) is None:
        # grap anat file(s)
        anat_wildcard = os.path.join(subject_data_dir, options['anat'])
        anat = glob.glob(anat_wildcard)
        # skip subject if anat absent
        if len(anat) < 1:
            print((
                "subject %s: anat image matching %s not found!; skipping"
                " subject" % (subject_id, anat_wildcard)))
            continue

        # we need just 1 anat volume
        anat = anat[0]
        anat_dir = os.path.dirname(anat)
    else:
        anat = None
        anat_dir = ""

    # anat output dir
    anat_output_dir = None
    if anat_dir:
        anat_output_dir = os.path.join(subject_output_dir,
                                       get_relative_path(subject_data_dir,
                                                         anat_dir))
        if not os.path.exists(anat_output_dir):
            os.makedirs(anat_output_dir)

    # make subject data
    subject_data = SubjectData(
        subject_id=subject_id, 
        func=func, 
        anat=anat,
        output_dir=subject_output_dir,
        scratch=subject_scratch,
        session_output_dirs=sess_output_dirs,
        anat_output_dir=anat_output_dir,
        session_id=sess_ids,
        data_dir=subject_data_dir,
        onset=onset,
        TR=options.get('TR', None),
        drift_model='Cosine',
        hrf_model=options.get('hrf_model', 'spm + derivative'),
        hfcut=options.get("hfcut", 128.),
        time_units=options.get("time_units", "seconds"))

    subjects.append(subject_data)
    
if not subjects:
    warnings.warn(
        "No subjects globbed (dataset_dir=%s, subject_dir_wildcard=%s" % (
            dataset_dir, subject_dir_wildcard))


100307
C:\data\graduate_thesis\hcp_data_test_gz\100307_3T_tfMRI_SOCIAL_preproc\100307\MNINonLinear\Results\tfMRI_SOCIAL_LR\tfMRI_SOCIAL_LR.nii.gz
C:\data\graduate_thesis\hcp_data_test_gz\100307_3T_tfMRI_SOCIAL_preproc\100307\MNINonLinear\Results\tfMRI_SOCIAL_RL\tfMRI_SOCIAL_RL.nii.gz
103414
C:\data\graduate_thesis\hcp_data_test_gz\103414_3T_tfMRI_SOCIAL_preproc\103414\MNINonLinear\Results\tfMRI_SOCIAL_LR\tfMRI_SOCIAL_LR.nii.gz
C:\data\graduate_thesis\hcp_data_test_gz\103414_3T_tfMRI_SOCIAL_preproc\103414\MNINonLinear\Results\tfMRI_SOCIAL_RL\tfMRI_SOCIAL_RL.nii.gz
105115
C:\data\graduate_thesis\hcp_data_test_gz\105115_3T_tfMRI_SOCIAL_preproc\105115\MNINonLinear\Results\tfMRI_SOCIAL_LR\tfMRI_SOCIAL_LR.nii.gz
C:\data\graduate_thesis\hcp_data_test_gz\105115_3T_tfMRI_SOCIAL_preproc\105115\MNINonLinear\Results\tfMRI_SOCIAL_RL\tfMRI_SOCIAL_RL.nii.gz


In [22]:
subjects[0].data_dir

'C:\\data\\graduate_thesis\\hcp_data_test_gz\\100307_3T_tfMRI_SOCIAL_preproc\\100307'

In [23]:
slicer = 'ortho'  # slicer of activation maps QA
cut_coords = None
threshold = 3.
cluster_th = 15  # minimum number of voxels in reported clusters
protocol = 'SOCIAL'

In [24]:
# regex for contrasts
CON_REAL_REGX = ("set fmri\(con_real(?P<con_num>\d+?)\.(?P<ev_num>\d+?)\)"
            " (?P<con_val>\S+)")

# regex for "Number of EVs"
NUM_EV_REGX = """set fmri\(evs_orig\) (?P<evs_orig>\d+)
set fmri\(evs_real\) (?P<evs_real>\d+)
set fmri\(evs_vox\) (?P<evs_vox>\d+)"""

# regex for "Number of contrasts"
NUM_CON_REGX = """set fmri\(ncon_orig\) (?P<ncon>\d+)
set fmri\(ncon_real\) (?P<ncon_real>\d+)"""

# regex for "# EV %i title"
EV_TITLE_REGX = """set fmri\(evtitle\d+?\) \"(?P<evtitle>.+)\""""

# regex for "Title for contrast_real %i"
CON_TITLE_REGX = """set fmri\(conname_real\.\d+?\) \"(?P<conname_real>.+)\""""

# regex for "Basic waveform shape (EV %i)"
# 0 : Square
# 1 : Sinusoid
# 2 : Custom (1 entry per volume)
# 3 : Custom (3 column format)
# 4 : Interaction
# 10 : Empty (all zeros)
EV_SHAPE_REGX = """set fmri\(shape\d+\) (?P<shape>[0|1|3])"""

# regex for "Custom EV file (EV %i)"
EV_CUSTOM_FILE_REGX = """set fmri\(custom\d+?\) \"(?P<custom>.+)\""""


def read_fsl_design_file(design_filename):
    # read design file
    design_conf = open(design_filename, 'r').read()

    # scrape n_conditions and n_contrasts
    n_conditions_orig = int(re.search(NUM_EV_REGX,
                                      design_conf).group("evs_orig"))
    n_conditions = int(re.search(NUM_EV_REGX, design_conf).group("evs_real"))
    n_contrasts = int(re.search(NUM_CON_REGX, design_conf).group("ncon_real"))

    # initialize 2D array of contrasts
    contrasts = np.zeros((n_contrasts, n_conditions))

    # lookup EV titles
    conditions = [item.group("evtitle") for item in re.finditer(
                  EV_TITLE_REGX, design_conf)]
    assert len(conditions) == n_conditions_orig

    # lookup contrast titles
    contrast_ids = [item.group("conname_real")for item in re.finditer(
                    CON_TITLE_REGX, design_conf)]
    assert len(contrast_ids) == n_contrasts

    # # lookup EV (condition) shapes
    # condition_shapes = [int(item.group("shape")) for item in re.finditer(
    #         EV_SHAPE_REGX, design_conf)]
    # print(condition_shapes)

    # lookup EV (condition) custom files
    timing_files = [get_abspath_relative_to_file(item.group("custom"),
                                                  design_filename)
                    for item in re.finditer(EV_CUSTOM_FILE_REGX, design_conf)]

    # lookup the contrast values
    count = 0
    for item in re.finditer(CON_REAL_REGX, design_conf):
        count += 1
        value = float(item.group('con_val'))

        i = int(item.group('con_num')) - 1
        j = int(item.group('ev_num')) - 1

        # roll-call
        assert 0 <= i < n_contrasts, item.group()
        assert 0 <= j < n_conditions, item.group()

        contrasts[i, j] = value

    # roll-call
    assert count == n_contrasts * n_conditions, count

    return conditions, timing_files, contrast_ids, contrasts


def niigz2nii(ifilename, output_dir=None):
    if isinstance(ifilename, list):
        return [niigz2nii(x, output_dir=output_dir) for x in ifilename]
    else:
        if not isinstance(ifilename, _basestring):
            raise RuntimeError(
                "ifilename must be string or list of strings, got %s" % type(
                    ifilename))

    if not ifilename.endswith('.nii.gz'):
        return ifilename

    ofilename = ifilename[:-3]
    if not output_dir is None:
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        ofilename = os.path.join(output_dir, os.path.basename(ofilename))

    nibabel.save(nibabel.load(ifilename), ofilename)

    return ofilename

In [67]:
from pypreprocess.external.nistats.design_matrix import make_design_matrix

def make_paradigm_from_timing_files(timing_files, condition_ids=None):
    if not condition_ids is None:
        assert len(condition_ids) == len(timing_files)

    onsets = []
    durations = []
    amplitudes = []
    _condition_ids = []
    count = 0
    for timing_file in timing_files:
        timing = np.loadtxt(timing_file)
        if timing.ndim == 1:
            timing = timing[np.newaxis, :]
        if condition_ids is None:
            condition_id = os.path.basename(timing_file).lower(
                ).split('.')[0]
            
        else:
            condition_id = condition_ids[count]
        _condition_ids = _condition_ids + [condition_id
                                           ] * timing.shape[0]

        count += 1

        if timing.shape[1]  == 3:
            onsets = onsets + list(timing[..., 0])
            durations = durations + list(timing[..., 1])
            amplitudes = amplitudes + list(timing[..., 2])
        elif timing.shape[1]  == 2:
            onsets = onsets + list(timing[..., 0])
            durations = durations + list(timing[..., 1])
            amplitudes = durations + list(np.ones(len(timing)))
        elif timing.shape[1] == 1:
            onsets = onsets + list(timing[..., 0])
            durations = durations + list(np.zeros(len(timing)))
            amplitudes = durations + list(np.ones(len(timing)))
        else:
            raise TypeError(
                "Timing info must either be 1D array of onsets of 2D "
                "array with 2 or 3 columns: the first column is for "
                "the onsets, the second for the durations, and the "
                "third --if present-- if for the amplitudes; got %s" % timing)
        print('--------')
        print(onsets)
        print(durations)
        print(amplitudes)
        print(_condition_ids)
        print('--------')
    return pd.DataFrame({'name': _condition_ids,
                         'onset': onsets,
                         'duration': durations,
                         'modulation': amplitudes})


def make_dmtx_from_timing_files(timing_files, condition_ids=None,
                                frametimes=None, n_scans=None, tr=None,
                                add_regs_file=None,
                                add_reg_names=None,
                                **make_dmtx_kwargs):
    # make paradigm
    paradigm = make_paradigm_from_timing_files(timing_files,
                                               condition_ids=condition_ids)

    # make frametimes
    if frametimes is None:
#         assert not n_scans is None, ("frametimes not specified, especting a "
#                                      "value for n_scans")
#         assert not tr is None, ("frametimes not specified, especting a "
#                                 "value for tr")
        frametimes = np.linspace(0, (n_scans - 1) * tr, n_scans)
#     else:
#         assert n_scans is None, ("frametimes specified, not especting a "
#                                  "value for n_scans")
#         assert tr is None, ("frametimes specified, not especting a "
#                                  "value for tr")

    # load addition regressors from file
    if not add_regs_file is None:
        if isinstance(add_regs_file, np.ndarray):
            add_regs = add_regs_file
        else:
            assert os.path.isfile(add_regs_file), (
                "add_regs_file %s doesn't exist")
            add_regs = np.loadtxt(add_regs_file)
        assert add_regs.ndim == 2, (
            "Bad add_regs_file: %s (must contain a 2D array, each column "
            "representing the values of a single regressor)" % add_regs_file)
        if add_reg_names is None:
            add_reg_names = ["R%i" % (col + 1) for col in range(
                    add_regs.shape[-1])]
        else:
            assert len(add_reg_names) == add_regs.shape[1], (
                "Expecting %i regressor names, got %i" % (
                    add_regs.shape[1], len(add_reg_names)))

        make_dmtx_kwargs["add_reg_names"] = add_reg_names
        make_dmtx_kwargs["add_regs"] = add_regs

    # make design matrix
    design_matrix = make_design_matrix(frame_times=frametimes,
                                       paradigm=paradigm,
                                       **make_dmtx_kwargs)

    # return output
    return design_matrix, paradigm, frametimes



### проверка

In [29]:
subject_data.design_files = [os.path.join(
            subject_data.data_dir, ("MNINonLinear\\Results\\tfMRI_%s_%s\\"
                                    "tfMRI_%s_%s_hp200_s4_level1.fsf") % (
                protocol, direction, protocol, direction))
            for direction in ['LR', 'RL']]

In [30]:
fsl_condition_ids, timing_files, fsl_contrast_ids, contrast_values = \
            read_fsl_design_file(subject_data.design_files[0])

In [34]:
contrast_values

array([[ 1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.],
       [ 1.,  0., -1.,  0.],
       [-1.,  0.,  0.,  0.],
       [ 0.,  0., -1.,  0.],
       [-1.,  0.,  1.,  0.]])

In [32]:
timing_files

['C:\\data\\graduate_thesis\\hcp_data_test_gz\\105115_3T_tfMRI_SOCIAL_preproc\\105115\\MNINonLinear\\Results\\EVs\\rnd.txt',
 'C:\\data\\graduate_thesis\\hcp_data_test_gz\\105115_3T_tfMRI_SOCIAL_preproc\\105115\\MNINonLinear\\Results\\EVs\\mental.txt']

In [33]:
fsl_contrast_ids

['RANDOM', 'TOM', 'RANDOM-TOM', 'neg_RANDOM', 'neg_TOM', 'TOM-RANDOM']

In [35]:
fsl_condition_ids

['RANDOM', 'TOM']

In [36]:
timing_files = [tf.replace("EVs", "tfMRI_%s_%s\\EVs" % (
                    "SOCIAL", "LR")) for tf in timing_files]

In [37]:
timing_files

['C:\\data\\graduate_thesis\\hcp_data_test_gz\\105115_3T_tfMRI_SOCIAL_preproc\\105115\\MNINonLinear\\Results\\tfMRI_SOCIAL_LR\\EVs\\rnd.txt',
 'C:\\data\\graduate_thesis\\hcp_data_test_gz\\105115_3T_tfMRI_SOCIAL_preproc\\105115\\MNINonLinear\\Results\\tfMRI_SOCIAL_LR\\EVs\\mental.txt']

In [68]:
paradigm = make_paradigm_from_timing_files(timing_files)
paradigm

--------
[84.031000000000006, 160.08000000000001]
[23.0, 23.0]
[1.0, 1.0]
['rnd', 'rnd']
--------
--------
[84.031000000000006, 160.08000000000001, 8.2100000000000009, 46.006999999999998, 122.056]
[23.0, 23.0, 23.0, 23.0, 23.0]
[1.0, 1.0, 1.0, 1.0, 1.0]
['rnd', 'rnd', 'mental', 'mental', 'mental']
--------


Unnamed: 0,duration,modulation,name,onset
0,23.0,1.0,rnd,84.031
1,23.0,1.0,rnd,160.08
2,23.0,1.0,mental,8.21
3,23.0,1.0,mental,46.007
4,23.0,1.0,mental,122.056


In [69]:
n = nibabel.load(subject_data.func[1]).shape[-1]
design_matrix, paradigm, frametimes = make_dmtx_from_timing_files(
            timing_files,fsl_condition_ids, n_scans = int(n), tr=0.72)

--------
[84.031000000000006, 160.08000000000001]
[23.0, 23.0]
[1.0, 1.0]
['RANDOM', 'RANDOM']
--------
--------
[84.031000000000006, 160.08000000000001, 8.2100000000000009, 46.006999999999998, 122.056]
[23.0, 23.0, 23.0, 23.0, 23.0]
[1.0, 1.0, 1.0, 1.0, 1.0]
['RANDOM', 'RANDOM', 'TOM', 'TOM', 'TOM']
--------


In [71]:
design_matrix.shape

(274, 5)

In [44]:
n_scans = nibabel.load(subject_data.func[1]).shape[-1]
tr = 0.72
a  = np.linspace(0, (n_scans - 1) * tr, n_scans)

In [45]:
a

array([   0.  ,    0.72,    1.44,    2.16,    2.88,    3.6 ,    4.32,
          5.04,    5.76,    6.48,    7.2 ,    7.92,    8.64,    9.36,
         10.08,   10.8 ,   11.52,   12.24,   12.96,   13.68,   14.4 ,
         15.12,   15.84,   16.56,   17.28,   18.  ,   18.72,   19.44,
         20.16,   20.88,   21.6 ,   22.32,   23.04,   23.76,   24.48,
         25.2 ,   25.92,   26.64,   27.36,   28.08,   28.8 ,   29.52,
         30.24,   30.96,   31.68,   32.4 ,   33.12,   33.84,   34.56,
         35.28,   36.  ,   36.72,   37.44,   38.16,   38.88,   39.6 ,
         40.32,   41.04,   41.76,   42.48,   43.2 ,   43.92,   44.64,
         45.36,   46.08,   46.8 ,   47.52,   48.24,   48.96,   49.68,
         50.4 ,   51.12,   51.84,   52.56,   53.28,   54.  ,   54.72,
         55.44,   56.16,   56.88,   57.6 ,   58.32,   59.04,   59.76,
         60.48,   61.2 ,   61.92,   62.64,   63.36,   64.08,   64.8 ,
         65.52,   66.24,   66.96,   67.68,   68.4 ,   69.12,   69.84,
         70.56,   71

In [113]:
# def run_suject_level1_glm(subject_data,
#                           readout_time=.01392,  # seconds
#                           tr=.72,
#                           dc=True,
#                           hrf_model="spm + derivative",
#                           drift_model="Cosine",
#                           hfcut=100,
#                           regress_motion=True,
#                           slicer='ortho',
#                           cut_coords=None,
#                           threshold=3.,
#                           cluster_th=15,
#                           normalize=True,
#                           fwhm=0.,
#                           protocol="SOCIAL",
#                           func_write_voxel_sizes=None,
#                           anat_write_voxel_sizes=None,
#                           **other_preproc_kwargs
#                           ):
#     """
#     Function to do preproc + analysis for a single HCP subject (task fMRI)

#     """

# #     add_regs_files = None
# #     n_regressions = 6
#     subject_data.n_sessions = 2

#     subject_data.tmp_output_dir = os.path.join(subject_data.output_dir, "tmp")
#     if not os.path.exists(subject_data.tmp_output_dir):
#         os.makedirs(subject_data.tmp_output_dir)

#     if not os.path.exists(subject_data.output_dir):
#         os.makedirs(subject_data.output_dir)

#     mem = Memory(os.path.join(subject_data.output_dir, "cache_dir"),
#                  verbose=100)

#     # glob design files (.fsf)
#     subject_data.design_files = [os.path.join(
#             subject_data.data_dir, ("MNINonLinear\\Results\\tfMRI_%s_%s\\"
#                                     "tfMRI_%s_%s_hp200_s4_level1.fsf") % (
#                 protocol, direction, protocol, direction))
#             for direction in ['LR', 'RL']]

#     assert len(subject_data.design_files) == 2
    
#     for df in subject_data.design_files:
#         if not os.path.isfile(df):
#             return
#     print(subject_data.design_files)
#     if 0x0:
#         subject_data = _do_fmri_distortion_correction(
#             subject_data, dc=dc, fwhm=fwhm, readout_time=readout_time,
#             **other_preproc_kwargs)

#     # chronometry
# #     stats_start_time = pretty_time()

#     # merged lists
#     paradigms = []
#     frametimes_list = []
#     design_matrices = []
#     # fmri_files = []
#     n_scans = []
#     # for direction, direction_index in zip(['LR', 'RL'], xrange(2)):
#     for sess in xrange(subject_data.n_sessions):
#         direction = ['LR', 'RL'][sess]
#         # glob the design file
#         # design_file = os.path.join(# _subject_data_dir, "tfMRI_%s_%s" % (
#                 # protocol, direction),
#         design_file = subject_data.design_files[sess]
#                 #                    "tfMRI_%s_%s_hp200_s4_level1.fsf" % (
#                 # protocol, direction))
#         if not os.path.isfile(design_file):
#             print("Can't find design file %s; skipping subject %s" %
#                   design_file, subject_data.subject_id)
#             return

#         # read the experimental setup
#         print("Reading experimental setup from %s ..." % design_file)
#         fsl_condition_ids, timing_files, fsl_contrast_ids, contrast_values = \
#             read_fsl_design_file(design_file)
#         print("... done.\r\n")

#         # fix timing filenames
#         timing_files = [tf.replace("EVs", "tfMRI_%s_%s\\EVs" % (
#                     protocol, direction)) for tf in timing_files]

#         # make design matrix
#         print("Constructing design matrix for direction %s ..." % direction)
#         _n_scans = nibabel.load(subject_data.func[sess]).shape[-1]
#         n_scans.append(_n_scans)
# #         add_regs_file = add_regs_files[
# #             sess] if not add_regs_files is None else None
#         design_matrix, paradigm, frametimes = make_dmtx_from_timing_files(
#             timing_files, fsl_condition_ids, n_scans=_n_scans, tr=tr,
#             hrf_model=hrf_model, drift_model=drift_model, hfcut=hfcut,
#             add_regs_file=add_regs_file,
#             add_reg_names= None
#             )

#         print("... done.")
#         paradigms.append(paradigm)
#         frametimes_list.append(frametimes)
#         design_matrices.append(design_matrix)

#         # convert contrasts to dict
#         contrasts = dict((contrast_id,
#                           # append zeros to end of contrast to match design
#                           np.hstack((contrast_value, np.zeros(len(
#                                 design_matrix.names) - len(contrast_value)))))

#                          for contrast_id, contrast_value in zip(
#                 fsl_contrast_ids, contrast_values))


#         contrasts = dict((k, v) for k, v in contrasts.items() if "-" in k)

#     # replicate contrasts across sessions
#     contrasts = dict((cid, [cval] * 2)
#                      for cid, cval in contrasts.items())

#     cache_dir = cache_dir = os.path.join(subject_data.output_dir,
#                                          'cache_dir')
#     if not os.path.exists(cache_dir):
#         os.makedirs(cache_dir)
#     nipype_mem = NipypeMemory(base_dir=cache_dir)

#     if 0x0:
#         if np.sum(fwhm) > 0.:
#             subject_data.func = nipype_mem.cache(spm.Smooth)(
#                 in_files=subject_data.func,
#                 fwhm=fwhm,
#                 ignore_exception=False,
#                 ).outputs.smoothed_files

#     # compute native-space maps and mask
#     stuff = mem.cache(tortoise)(
#         subject_data.func, subject_data.anat)
#     if stuff is None:
#         return None
#     effects_maps, z_maps, mask_path, map_dirs = stuff

#     # remove repeated contrasts
#     contrasts = dict((cid, cval[0]) for cid, cval in contrasts.items())
#     import json
#     json.dump(dict((k, list(v)) for k, v in contrasts.items()),
#               open(os.path.join(subject_data.tmp_output_dir,
#                                 "contrasts.json"), "w"))
#     subject_data.contrasts = contrasts

#     if normalize:
#         assert hasattr(subject_data, "parameter_file")

#         subject_data.native_effects_maps = effects_maps
#         subject_data.native_z_maps = z_maps
#         subject_data.native_mask_path = mask_path

#         # warp effects maps and mask from native to standard space (MNI)
#         apply_to_files = [
#             v for _, v in subject_data.native_effects_maps.items()
#             ] + [subject_data.native_mask_path]
#         tmp = nipype_mem.cache(spm.Normalize)(
#             parameter_file=getattr(subject_data, "parameter_file"),
#             apply_to_files=apply_to_files,
#             write_bounding_box=[[-78, -112, -50], [78, 76, 85]],
#             write_voxel_sizes=func_write_voxel_sizes,
#             write_wrap=[0, 0, 0],
#             write_interp=1,
#             jobtype='write',
#             ignore_exception=False,
#             ).outputs.normalized_files

#         subject_data.mask = hard_link(tmp[-1], subject_data.output_dir)
#         subject_data.effects_maps = dict(zip(effects_maps.keys(), hard_link(
#                     tmp[:-1], map_dirs["effects"])))

#         # warp anat image
#         subject_data.anat = hard_link(nipype_mem.cache(spm.Normalize)(
#                 parameter_file=getattr(subject_data, "parameter_file"),
#                 apply_to_files=subject_data.anat,
#                 write_bounding_box=[[-78, -112, -50], [78, 76, 85]],
#                 write_voxel_sizes=anat_write_voxel_sizes,
#                 write_wrap=[0, 0, 0],
#                 write_interp=1,
#                 jobtype='write',
#                 ignore_exception=False,
#                 ).outputs.normalized_files, subject_data.anat_output_dir)
#     else:
#         subject_data.mask = mask_path
#         subject_data.effects_maps = effects_maps
#         subject_data.z_maps = z_maps

#     return subject_data



# # fit GLM
# def tortoise(*args):
#     print(args)
#     print(
#         'Fitting a "Fixed Effect" GLM for merging LR and RL '
#         'phase-encoding directions for subject %s ...' %
#         subject_data.subject_id)
#     fmri_glm = FMRILinearModel(subject_data.func,
#                                [design_matrix.matrix
#                                 for design_matrix in design_matrices],
#                                mask='compute'
#                                )
#     fmri_glm.fit(do_scaling=True, model='ar1')
#     print("... done.\r\n")

#     # save computed mask
#     mask_path = os.path.join(subject_data.output_dir, "mask.nii")
#     print("Saving mask image to %s ..." % mask_path)
#     nibabel.save(fmri_glm.mask, mask_path)
#     print("... done.\r\n")

#     z_maps = {}
#     effects_maps = {}
#     map_dirs = {}
#     try:
#         for contrast_id, contrast_val in contrasts.items():
#             print("\tcontrast id: %s" % contrast_id)
#             z_map, eff_map = fmri_glm.contrast(
#                 contrast_val,
#                 con_id=contrast_id,
#                 output_z=True,
#                 output_effects=True
#                 )

#             # store stat maps to disk
#             for map_type, out_map in zip(['z', 'effects'],
#                                          [z_map, eff_map]):
#                 map_dir = os.path.join(
#                     subject_data.output_dir, '%s_maps' % map_type)
#                 map_dirs[map_type] = map_dir
#                 if not os.path.exists(map_dir):
#                     os.makedirs(map_dir)
#                 map_path = os.path.join(
#                     map_dir, '%s_%s.nii' % (map_type, contrast_id))
#                 print("\t\tWriting %s ..." % map_path)

#                 nibabel.save(out_map, map_path)

#                 # collect zmaps for contrasts we're interested in
#                 if map_type == 'z':
#                     z_maps[contrast_id] = map_path

#                 if map_type == 'effects':
#                     effects_maps[contrast_id] = map_path

#         return effects_maps, z_maps, mask_path, map_dirs
#     except:
#         return None

In [26]:
# # for subject in subjects:
# fwhm = preproc_params.get("fwhm")
# task_output_dir = os.path.join(os.path.dirname(subjects[0].output_dir))

# kwargs = {"regress_motion": True,
#           "slicer": slicer,
#           "threshold": threshold,
#           "cluster_th": cluster_th,
#           "protocol": protocol,
#           "dc": not preproc_params.get(
#          "disable_distortion_correction", False),
#           "realign": preproc_params["realign"],
#           "coregister": preproc_params["coregister"],
#           "segment": preproc_params["segment"],
#           "normalize": preproc_params["normalize"],
#           'func_write_voxel_sizes': preproc_params[
#          'func_write_voxel_sizes'],
#           'anat_write_voxel_sizes': preproc_params[
#          'anat_write_voxel_sizes'],
#           "fwhm": fwhm
#           }

# # n_jobs = int(os.environ.get('N_JOBS', 1))
# # if n_jobs > 1:
# #     subjects = Parallel(
# #         n_jobs=n_jobs, verbose=100)(delayed(
# #             run_suject_level1_glm)(
# #                 subject_data,
# #                 **kwargs) for subject_data in subjects)
# # else:


# # subjects = [run_suject_level1_glm(subject_data,
# #                                       **kwargs)
# #                 for subject_data in subjects]
# # subjects = [subject for subject in subjects if subject]

 

In [47]:
   # # level 2
    # stats_start_time = pretty_time()
    # mask_images = [subject_data.mask for subject_data in subjects]
    # group_mask = nibabel.Nifti1Image(
    #     intersect_masks(mask_images).astype(np.int8),
    #     nibabel.load(mask_images[0]).get_affine())
    # nibabel.save(group_mask, os.path.join(
    #         task_output_dir, "mask.nii.gz"))

    # print("... done.\r\n")
    # print("Group GLM")
    # contrasts = subjects[0].contrasts
    # subjects_effects_maps = [subject_data.effects_maps
    #                          for subject_data in subjects]

    # group_one_sample_t_test(
    #     mask_images,
    #     subjects_effects_maps,
    #     contrasts,
    #     task_output_dir,
    #     threshold=threshold,
    #     cluster_th=cluster_th,
    #     start_time=stats_start_time,
    #     subjects=[subject_data.subject_id for subject_data in subjects],
    #     title='Group GLM for HCP fMRI %s protocol (%i subjects)' % (
    #         protocol, len(subjects)),
    #     slicer=slicer
    #     )

1