### Create dsQ job submission array
#### Commands: 


ml load dSQ/1.05

1: dsq --job-file 1_preproc_dsq_job_array.txt --mem-per-cpu 50g -t 6:00:00 --mail-type ALL --partition gpu --cpus-per-task=1 --gres=gpu:2

2: dsq --job-file 2_csd_dsq_job_array.txt --mem-per-cpu 10g -t 50:00:00 --mail-type ALL --partition verylong


4: dsq --job-file 4_fixel_job_array.txt --mem-per-cpu 10g -t 10:00:00 --mail-type ALL --partition verylong


5: dsq --job-file 5_tract_dsq_job_array.txt --mem-per-cpu 50g -t 20:00:00 --mail-type ALL --partition verylong

7: dsq --job-file 7_fixelAnalysis_dsq_job_array.txt --mem-per-cpu 10g -t 2:00:00 --mail-type ALL --partition verylong


In [3]:
import pandas as pd
import csv
from glob import glob

In [4]:
home = '/home/lms233/Github/Diffusion'
candpath = '/gpfs/milgram/pi/gee_dylan/candlab'
data = candpath + '/analyses/shapes/dwi/data'
dwi = candpath + '/analyses/shapes/dwi'

In [5]:
subject_info = pd.Series(glob(data + '/1_Preprocessed_Data/sub*'))
subs = subject_info.replace(data + '/1_Preprocessed_Data/', '')
all_subjects = subs.tolist()
merge_sublist = pd.DataFrame(subs, columns = ['Subject'])
merge_sublist['Subject'] = merge_sublist['Subject'].str.replace('sub-', '')

In [6]:
#Subjects with RI and DTI data under motion threshold
ri_sublist = pd.read_csv(dwi + '/subjectlist_preDiss_motion0.75_n107_2021-03-29.csv', names = ['index', 'Subject'], header = 0)
print('{} subs had DTI and RI data, and motion under threshold'.format(len(ri_sublist)))
risubs = 'sub-' + ri_sublist['Subject']

106 subs had DTI and RI data, and motion under threshold


In [7]:
print(risubs.tolist())

['sub-A217', 'sub-A218', 'sub-A222', 'sub-A223', 'sub-A225', 'sub-A228', 'sub-A229', 'sub-A230', 'sub-A231', 'sub-A232', 'sub-A233', 'sub-A234', 'sub-A236', 'sub-A237', 'sub-A238', 'sub-A240', 'sub-A242', 'sub-A246', 'sub-A248', 'sub-A250', 'sub-A251', 'sub-A253', 'sub-A255', 'sub-A256', 'sub-A257', 'sub-A258', 'sub-A260', 'sub-A262', 'sub-A266', 'sub-A268', 'sub-A271', 'sub-A272', 'sub-A276', 'sub-A279', 'sub-A280', 'sub-A281', 'sub-A283', 'sub-A285', 'sub-A286', 'sub-A288', 'sub-A291', 'sub-A293', 'sub-A294', 'sub-A548', 'sub-A553', 'sub-A554', 'sub-A555', 'sub-A556', 'sub-A557', 'sub-A592', 'sub-A593', 'sub-A609', 'sub-A611', 'sub-A619', 'sub-A620', 'sub-A621', 'sub-A622', 'sub-A637', 'sub-A643', 'sub-A646', 'sub-A650', 'sub-A651', 'sub-A653', 'sub-A656', 'sub-A660', 'sub-A661', 'sub-A663', 'sub-A664', 'sub-A665', 'sub-A666', 'sub-A680', 'sub-A682', 'sub-A686', 'sub-A687', 'sub-A688', 'sub-A689', 'sub-A692', 'sub-A694', 'sub-A695', 'sub-A696', 'sub-A698', 'sub-A704', 'sub-A707', 'su

In [8]:
#Generate subs who didn't process fully (MSMT CSD)
subjslist = []
for i in range(0, len(all_subjects)):
    sub = all_subjects[i]
    newlist = pd.Series(glob(data+ '/4_Deconvolution/*'))
    newsubs = newlist.replace(data + '/4_Deconvolution/', '').tolist()
    if sub in newsubs:
        pass
    else:
        subjslist.append(sub)

notrun_subjects = list(set(subjslist))

  """


In [12]:
# #Generate subs who didn't process fully (Tractography)
# notrun_tractsubs = []
# for i in range(0, len(ri_sublist)):
#     sub = 'sub-' + ri_sublist['Subject'].tolist()[i]
#     newlist = pd.Series(glob(data + '/tract_output/*/*.csv'))
#     for j in range(0, len(newlist)):
#         newsub = newlist[j].replace(data + '/tract_output/', '')
#         newsub2 = newsub.split('/')[0]
#         newlist.append(stnewsub2))
#     if sub in newsublist:
#         pass
#     else:
#         notrun_tractsubs.append(sub)

In [16]:
print(len(subs))
print(len(notrun_subjects))
print(len(notrun_tractsubs))

106
106
27


### Create batch file for preprocessing

In [10]:
#Set subjects
subjects = risubs
commands_preproc = []

for i in range(0, len(subjects)):
    sub = subjects[i]
    commands_preproc.append('sh 1_sbatch_preproc.sh {}'.format(sub))

In [11]:
out = pd.DataFrame(commands_preproc)
out.to_csv(home + '/1_preproc_dsq_job_array.txt', sep = '\t', header = False, index=False, 
           quoting=csv.QUOTE_NONE)

### Create batch file for MS CSD

In [19]:
#Set subjects
subjects = risubs

commands_csd = []

for i in range(0, len(subjects)):
    sub = subjects[i]
    commands_csd.append('sh 2_sbatch_csd.sh {}'.format(sub))

In [20]:
out = pd.DataFrame(commands_csd)
out.to_csv(home + '/2_csd_dsq_job_array.txt', sep = '\t', header = False, index=False, 
           quoting=csv.QUOTE_NONE)

### Create batch file for fixel registration

In [33]:
#Set subjects
subjects = ri_sublist['Subject'].tolist()

commands_fixel = []

for i in range(0, len(subjects)):
    sub = subjects[i]
    commands_fixel.append('sh 4_sbatch_register_FOD.sh sub-{}'.format(sub))

In [34]:
out = pd.DataFrame(commands_fixel)
print(len(out))
out.to_csv(home + '/4_fixel_job_array.txt', sep = '\t', header = False, index=False, 
           quoting=csv.QUOTE_NONE)

117


### Create batch file for final fixel extraction

In [13]:
subjects = ri_sublist['Subject'].tolist()
commands_fixan = []

for i in range(0, len(subjects)):
    sub = subjects[i]
    commands_fixan.append('sh 7_sbatch_TransformSubjectFODtoTemplate.sh sub-{}'.format(sub))

In [14]:
out_tract = pd.DataFrame(commands_fixan)
out_tract.to_csv(home + '/7_fixelAnalysis_dsq_job_array.txt', sep = '\t', header = False, index=False, 
           quoting=csv.QUOTE_NONE)

### Create batch file for TractSeg

In [13]:
subjects = risubs
commands_tract = []

for i in range(0, len(subjects)):
    sub = subjects[i]
    commands_tract.append('sh 5_TractSegFlow.sh {}'.format(sub))

In [14]:
out_tract = pd.DataFrame(commands_tract)
out_tract.to_csv(home + '/5_tract_dsq_job_array.txt', sep = '\t', header = False, index=False, 
           quoting=csv.QUOTE_NONE)