## Globals

In [8]:
from __future__ import print_function

import sys
import os
import re
import glob
import subprocess
import shutil
import json

#root_dir = '/home/preclineu/yartoe'
root_dir = '/project_cephfs/3022017.02/'
cluster_log_dir = os.path.join(root_dir,'logs')
aroma_path = '/home/preclineu/andmar/sfw/ICA-AROMA/ICA_AROMA.py'  

cmd_qsub_base = ['/home/preclineu/andmar/DCCN/Scripts/Torque/SubmitToCluster.py',
                 '-length', '102400',
                 '-memory', '32gb',
                 '-logfiledir', cluster_log_dir
                ]

abcd_dict = os.path.join(root_dir,'downloads','abcd_modalities.json')

# read the dictionary
with open(abcd_dict,'r') as f:
   modalities = json.load(f) 
    
# get a list of subjects to process
sub_dirs = sorted(glob.glob(os.path.join(root_dir,'subjects','sub-NDARINV*')))

#sub_dirs = sub_dirs[0:15]
#print('\n'.join(sub_dirs))

print('Dictionary read from:', abcd_dict)
print('Found', len(sub_dirs), 'subjects to process')


Dictionary read from: /project_cephfs/3022017.02/downloads/abcd_modalities.json
Found 11856 subjects to process


['/project_cephfs/3022017.02/subjects/sub-NDARINV003RTV85',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV005V6D2C',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV007W6H7B',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV00BD7VDC',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV00CY2MDM',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV00HEV6HB',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV00J52GPG',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV00LH735Y',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV00LJVZK2',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV00NPMHND']

## run fsl_anat

In [17]:
verbose = False

sub_batch = sub_dirs[:10]#[10000:]

submitted = 0
already_processed = 0
for s in sub_batch:
    subid = os.path.splitext(os.path.basename(s))[0]
    if verbose:
        print('processing subject',subid)

    job_name = 'ABCD_fsl_anat_' + subid
    
    sessions = glob.glob(os.path.join(s, 'ses-*'))
    for sess in sessions:
        sdir = os.path.join(sess, 'anat')
        if not os.path.exists(sdir):
            continue
        
        # get all subdirectories 
        t1_all = [os.path.join(sdir, o) for o in os.listdir(sdir) if os.path.isdir(os.path.join(sdir, o))]

        for t1_dir in t1_all:
            if os.path.exists(os.path.join(t1_dir,'T1.anat','T1_biascorr_brain.nii.gz')):
                if verbose: 
                    print(' >', subid, ' anatomical data already processed. Doing nothing')
                already_processed += 1
                continue
    
            cmd_fsl_anat = ['fsl_anat -i ', os.path.join(t1_dir,'T1.nii.gz')]
            cmd_str = '"%s"' % str(' '.join(cmd_fsl_anat)) 
            cmd_qsub = cmd_qsub_base + ['-name', job_name,'-command', cmd_str]

            #subprocess.Popen(' '.join(cmd_qsub), shell=True)
            
            submitted += 1
    
print('number of jobs submitted =', submitted)
print('number of T1s already processed =', already_processed)

number of jobs submitted = 4
number of T1s already processed = 24


## Select subjects (e.g. only those having 4 year follow-up)

In [32]:
target_ses = 'ses-4YearFollowUpYArm1'
target_ses = 'ses-2YearFollowUpYArm1'
#target_ses = 'ses-baselineYear1Arm1'

sub_list = []
sub_dirs_target = []
for s in sub_dirs:
    subid = os.path.splitext(os.path.basename(s))[0] 
    ses_dirs = glob.glob(os.path.join(s, 'ses-*'))
    
    sessions = list(map(os.path.basename, ses_dirs))
    if target_ses in sessions:
        sub_list.append(subid)
        sub_dirs_target.append(s)

print(len(sub_list), 'subjects found')

7829 subjects found


In [30]:
sub_dirs_target

['/project_cephfs/3022017.02/subjects/sub-NDARINV00CY2MDM',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV00HEV6HB',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV00LH735Y',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV00LJVZK2',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV00U4FTRU',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV00X2TBWJ',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV010ZM3H9',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV014RTM1V',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV0191C80U',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV019DXLU4',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV01AJ15N9',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV01D03VR7',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV01ELX9L6',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV01EN91PG',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV01NAYMZH',
 '/project_cephfs/3022017.02/subjects/sub-NDARINV01RGTWD2',
 '/project_cephfs/3022017.02/subjects/su

## Run Freesurfer (cross-sectional)

In [None]:
num_processed = 0
num_failed = 0
num_submitted = 0
sub_no_t1 = []

verbose = False

default_t1_type = 'ABCD-T1-NORM'
log_prefix = 'ABCD_fs_'

#sub_batch = sub_dirs[100:]
sub_batch = sub_dirs_target

#fs_env = 'export FREESURFER_HOME=/opt/freesurfer/7.3.2'
fs_env = 'module load freesurfer/7.3.2'

cmd_qsub_fs = ['/home/preclineu/andmar/DCCN/Scripts/Torque/SubmitToCluster.py',
                 '-length', '202400',
                 '-memory', '5gb',
                 '-logfiledir', cluster_log_dir,
                 '-queue verylong'
                ]

print('Processing', len(sub_batch), 'subjects ...')
for s in sub_batch:
    subid = os.path.splitext(os.path.basename(s))[0] 
    
    sessions = glob.glob(os.path.join(s, 'ses-*'))
    for sess in sessions:
        sdir = os.path.join(sess, 'anat')
        sesid = os.path.basename(sess)
        full_subid = '_'.join((subid, sesid))
        
        if not os.path.exists(sdir):
            continue
        
        # get all subdirectories 
        t1_all = [os.path.join(sdir, o) for o in os.listdir(sdir) if os.path.isdir(os.path.join(sdir, o))]
        
        if len(t1_all) > 1:
            if verbose:
                print(' > Multiple t1s found for', full_subid, '(',len(t1_all), ') using default')
            
            # sub-select only ABCD-T1-NORM
            t1_all = [k for k in t1_all if default_t1_type in k]
            if len(t1_all) > 1:
                if verbose:
                    print(' > Failed.')
                continue;                        
        
        for t1_dir in t1_all:
            t1_file = os.path.join(t1_dir,'T1.nii.gz')            
            out_dir = os.path.join(root_dir, 'freesurfer', full_subid)
            
            if os.path.exists(out_dir):
                if os.path.exists(os.path.join(out_dir, 'stats', 'rh.w-g.pct.stats')):
                    # analysis is complete
                    if verbose:
                        print(' > Freesurfer already run for', full_subid,'doing nothing')
                    num_processed += 1
                    continue
                else:
                    # analysis did not complete
                    num_failed +=1
                    if verbose:
                        print(' > Freesurfer subject',full_subid,'found but failed. Continuing analysis')
                    cmd_fs = ['recon-all -s', full_subid, '-all','-qcache']
            else:
                # no analysis has been run
                if not os.path.exists(t1_file):
                    if verbose: 
                        print(' > No T1 found for sub', full_subid, '. Doing nothing')
                    sub_no_t1.append(subid)
                    continue
        
                cmd_fs = ['recon-all -i ', t1_file, '-s', full_subid, '-all','-qcache']
            
            #print(' '.join(cmd_fs))

            cmd_env = [fs_env + ' ; ',
                       'export SUBJECTS_DIR=' + os.path.join(root_dir,'freesurfer ; ')]
            cmd_str = '"%s"' % str(' '.join(cmd_env + cmd_fs)) 
            cmd_qsub = cmd_qsub_fs + ['-name', log_prefix + str(full_subid),'-command', cmd_str]
    
            num_submitted += 1
            #print(' '.join(cmd_qsub))
            subprocess.Popen(' '.join(cmd_qsub), shell=True)

print('jobs submitted =', num_submitted)       
print('number of subjects already processed =',num_processed)
print('number failed =',num_failed)
print('number without t1 =',len(sub_no_t1))

## Create Freesurfer longitudinal templates

Note: this needs to be run more carefully than the cross sectional approach above because it just clobbers the template directory. 

In [35]:
bl

'/project_cephfs/3022017.02/subjects/sub-NDARINVZZZP87KR/ses-baselineYear1Arm1'

In [33]:
num_processed = 0
num_failed = 0
num_submitted = 0

verbose = False

log_prefix = 'ABCD_longfs_'

#sub_batch = sub_dirs[10:]
sub_batch = sub_dirs_target[5900:]

#fs_env = 'export FREESURFER_HOME=/opt/freesurfer/7.3.2'
fs_env = 'module load freesurfer/7.3.2'

cmd_qsub_fs = ['/home/preclineu/andmar/DCCN/Scripts/Torque/SubmitToCluster.py',
                 '-length', '202400',
                 '-memory', '5gb',
                 '-logfiledir', cluster_log_dir,
                 '-queue verylong'
                ]

print('Processing', len(sub_batch), 'subjects ...')
for s in sub_batch:
    subid = os.path.splitext(os.path.basename(s))[0] 
    
    cmd_fs = ['recon-all -base ', subid + '_long',]
    
    sessions = sorted(glob.glob(os.path.join(s, 'ses-*')))
    
    # make sure the baseline session is first 
    for i in range(len(sessions)):
        if 'baseline' in sessions[i]:
            bl = sessions[i]
            del sessions[i]
            sessions.insert(0, bl)
            continue
    
    good_sessions = 0 
    for sess in sessions:
        sesid = os.path.basename(sess)
        full_subid = '_'.join((subid, sesid))  
        fs_sess_dir = os.path.join(root_dir, 'freesurfer', full_subid)
        
        fs_sessions = []
        if os.path.exists(os.path.join(fs_sess_dir, 'stats', 'rh.w-g.pct.stats')):
            fs_sessions.append(full_subid)
        else:
            print(' > Freesurfer did not complete for', full_subid)
            continue
        
        cmd_fs += ['-tp', full_subid]
        good_sessions += 1
        
    if good_sessions < 2:
        print(' > Only one timepoint for', full_subid, 'aborting.')
        continue
        
    cmd_fs += ['-all', '-qcache']
        
    print(' '.join(cmd_fs))

    cmd_env = [fs_env + ' ; ',
               'export SUBJECTS_DIR=' + os.path.join(root_dir,'freesurfer ; ')]
    cmd_str = '"%s"' % str(' '.join(cmd_env + cmd_fs)) 
    cmd_qsub = cmd_qsub_fs + ['-name', log_prefix + str(subid + '_long'),'-command', cmd_str]
    num_submitted += 1
    print(' '.join(cmd_qsub))
    
    #subprocess.Popen(' '.join(cmd_qsub), shell=True)

print('jobs submitted =', num_submitted)       
print('number of subjects already processed =',num_processed)
print('number failed =',num_failed)

Processing 1929 subjects ...
 > Freesurfer did not complete for sub-NDARINVR1927JG7_ses-2YearFollowUpYArm1
 > Only one timepoint for sub-NDARINVR1927JG7_ses-2YearFollowUpYArm1 aborting.
recon-all -base  sub-NDARINVR1JFZ70M_long -tp sub-NDARINVR1JFZ70M_ses-baselineYear1Arm1 -tp sub-NDARINVR1JFZ70M_ses-2YearFollowUpYArm1 -tp sub-NDARINVR1JFZ70M_ses-4YearFollowUpYArm1 -all -qcache
/home/preclineu/andmar/DCCN/Scripts/Torque/SubmitToCluster.py -length 202400 -memory 5gb -logfiledir /project_cephfs/3022017.02/logs -queue verylong -name ABCD_longfs_sub-NDARINVR1JFZ70M_long -command "module load freesurfer/7.3.2 ;  export SUBJECTS_DIR=/project_cephfs/3022017.02/freesurfer ;  recon-all -base  sub-NDARINVR1JFZ70M_long -tp sub-NDARINVR1JFZ70M_ses-baselineYear1Arm1 -tp sub-NDARINVR1JFZ70M_ses-2YearFollowUpYArm1 -tp sub-NDARINVR1JFZ70M_ses-4YearFollowUpYArm1 -all -qcache"
recon-all -base  sub-NDARINVR1KT1MAU_long -tp sub-NDARINVR1KT1MAU_ses-baselineYear1Arm1 -tp sub-NDARINVR1KT1MAU_ses-2YearFollowU

In [34]:
full_subid

'sub-NDARINVZZZP87KR_ses-2YearFollowUpYArm1'

## Select subjects

In [4]:
fs_dir = os.path.join(root_dir,'freesurfer')
sub_dirs = glob.glob(os.path.join(fs_dir, '*_long'))

sub_list_target = []
for s in sub_dirs:
    subid = os.path.splitext(os.path.basename(s))[0].split('_')[0]
    
    sub_list_target.append(subid)
        
print(len(sub_list_target), 'subjects found')
sub_list_target[:10]

6684 subjects found


['sub-NDARINVZKBLKU16',
 'sub-NDARINVGY4WAXPR',
 'sub-NDARINVB3L1EM9T',
 'sub-NDARINV65HEDB50',
 'sub-NDARINVCB2Z3BZD',
 'sub-NDARINV4YH8H7KU',
 'sub-NDARINVE3MEFJL5',
 'sub-NDARINVTAX3MN8C',
 'sub-NDARINVNGAXVYPV',
 'sub-NDARINV2RD4CZ7T']

## Run longitudinal freesurfer

In [31]:
num_processed = 0
num_failed = 0
num_submitted = 0
sub_no_t1 = []

verbose = True

log_prefix = 'ABCD_longfs2_'

sub_id_batch = sub_list_target[6000:]

#fs_env = 'export FREESURFER_HOME=/opt/freesurfer/7.3.2'
fs_env = 'module load freesurfer/7.3.2'

cmd_qsub_fs = ['/home/preclineu/andmar/DCCN/Scripts/Torque/SubmitToCluster.py',
                 '-length', '202400',
                 '-memory', '5gb',
                 '-logfiledir', cluster_log_dir,
                 '-queue verylong'
                ]

print('Processing', len(sub_id_batch), 'subjects ...')
for subid in sub_id_batch:
        
    sessions = glob.glob(os.path.join(fs_dir, f'{subid}_ses-*'))
    
    for full_subid in sessions:
        sesid = os.path.basename(full_subid)
        
        #if not os.path.exists(os.path.join(fs_dir, full_subid)):
        #    continue
                  
        out_dir = os.path.join(root_dir, 'freesurfer', full_subid)
            
        #if os.path.exists(out_dir):
        #    if os.path.exists(os.path.join(out_dir, 'stats', 'rh.w-g.pct.stats')):
        #        # analysis is complete
        #        if verbose:
        #            print(' > Freesurfer already run for', full_subid,'doing nothing')
        #        num_processed += 1
        #        continue
        #    else:
        #         # analysis did not complete
        #        num_failed +=1
        #        if verbose:
        #            print(' > Freesurfer subject',full_subid,'found but failed. Continuing analysis')
        #        cmd_fs = ['recon-all -long ', full_subid, f'{sub_id}_long', '-all','-qcache']
        #else:
            # no analysis has been run
            #if not os.path.exists(t1_file):
            #    if verbose: 
            #        print(' > No T1 found for sub', full_subid, '. Doing nothing')
            #    sub_no_t1.append(subid)
            #    continue
        
        cmd_fs = ['recon-all -long ', full_subid, f'{subid}_long', '-all','-qcache']
            
        #print(' '.join(cmd_fs))

        cmd_env = [fs_env + ' ; ',
                    'export SUBJECTS_DIR=' + fs_dir + ' ; ']
        cmd_str = '"%s"' % str(' '.join(cmd_env + cmd_fs)) 
        cmd_qsub = cmd_qsub_fs + ['-name', log_prefix + str(full_subid),'-command', cmd_str]
    
        num_submitted += 1
        print(' '.join(cmd_qsub))
        #subprocess.Popen(' '.join(cmd_qsub), shell=True)

print('jobs submitted =', num_submitted)       
print('number of subjects already processed =',num_processed)
print('number failed =',num_failed)
print('number without t1 =',len(sub_no_t1))

NameError: name 'sub_list_target' is not defined