In [1]:
# REQUIREMENTS FOR THIS PIPELINE
#  the recon output will be named e.g. "unedit.FS6_02". After manually editing
#  please rename this recon folder to "edit.FS6_02". Then the next time 
#  pipeline is run, it will re-submit the recon for this folder and create symlink.

#  the following notes need to be updated upon each batch download so this info can be piped:
#  files: LEADS_#####_date.csv (csv download file), Mayo_ADRIL_MRI_Quality_date.csv

        #details:
        #MOST IMPORTANT: once a new download has been initiated, load the CSV download file into
        #/autofs/cluster/animal/scan_data/leads/spreadsheets/LONI_DOWNLOADS/
        #if it is a new image collection on loni, it will create a separate CSV file, 
        #otherwise will replace old one with concatenated data of that image collection. So archive
        #any csvs that are old versions of downloaded collections.
        
        #must also download Mayo_ADRIL_MRI_Quality_date.csv and save to:
        #/autofs/cluster/animal/scan_data/leads/spreadsheets/MRIQUALITY
        #this file will replace the old one (concatenates all data on loni about QC).

# if any files need to be re-run / re-processed, delete the files in the 
# folder. This pipeline does not overwrite anything.

# TO DO
# send a recon job- just do for ones that do not have output?
# decide if I want to have the option to re-run edits manually of implement here?
# not sure why but import_loni_notes is creates a new column in the dataframe scannotes?


In [2]:
# import modules
import io, os, sys, types # needed
import glob # needed
from nipype.pipeline.engine import Workflow, Node, MapNode # needed
from nipype.interfaces.utility import Function, IdentityInterface
import nipype.interfaces.io as nio
import nipype.pipeline.engine as pe
from nipype.interfaces.freesurfer import MRIConvert
from nipype.interfaces.freesurfer import ReconAll
from nipype import config
import pandas as pd
import re
import shutil
import pathlib
import pydicom
from pydicom.tag import Tag

In [3]:
# Clean and update spreadsheets

# these do not concatenate, must do this
downloadsdir = '/autofs/cluster/animal/scan_data/leads/spreadsheets/LONI_DOWNLOADS/'

# vertically concatenate all csvs
downloadlist = glob.glob(downloadsdir+'*.csv')
downloadlist.remove('/autofs/cluster/animal/scan_data/leads/spreadsheets/LONI_DOWNLOADS/combined_downloads.csv')

# vertically concatenate all csvs
combined_csv = pd.concat( [ pd.read_csv(f) for f in downloadlist ] , sort=False)

# drop all non-MPRAGES, sort dataframe by subject column, drop all duplicates
combined_csv = combined_csv[combined_csv.Description == 'Accelerated Sagittal MPRAGE']
combined_csv = combined_csv.sort_values(by=['Downloaded'])
combined_csv = combined_csv.drop_duplicates(['Image Data ID'], keep='last')

# # save combined download file
combined_csv.to_csv("/autofs/cluster/animal/scan_data/leads/spreadsheets/LONI_DOWNLOADS/combined_downloads.csv", index=False,)

# # these download already concatenating all sessions ; use latest
qualitydir = '/autofs/cluster/animal/scan_data/leads/spreadsheets/MRIQUALITY/'

list_of_qualityfiles = glob.glob(qualitydir+'*.csv')
MRIQUALITY = max(list_of_qualityfiles, key=os.path.getctime)

In [4]:
def scan(subject):
    subsessions = glob.glob(dicomdir+subject+'/Accelerated_Sagittal_MPRAGE/*/*/')
    repeat_tag = '-'
    for num in range(len(subsessions)):
        # look to see if more than one session on the same date
        # then look to see if more than one date (or both)
        parentfolder = subsessions[num].split('/')[8]
        filename = os.listdir(subsessions[num])[0] # dicom name to extract date
        #extract date from dicom:
        ds = pydicom.read_file(subsessions[num]+'/'+filename)
        date = str(ds[0x08, 0x22].value)
        #date = re.search('raw_'+r'+[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]', filename).group()[4:]
        if num == len(subsessions)-1:
            namedate = dicomdir+subject+'_'+date
            os.rename(dicomdir+subject, namedate)
        else:
            namedateplus = dicomdir+subject+'_'+date+repeat_tag+'/Accelerated_Sagittal_MPRAGE/'+parentfolder
            pathlib.Path(namedateplus).mkdir(parents=True, exist_ok=True)
            shutil.move(subsessions[num], namedateplus)
            if not os.listdir(dicomdir+subject+'/Accelerated_Sagittal_MPRAGE/'+parentfolder):
                os.rmdir(dicomdir+subject+'/Accelerated_Sagittal_MPRAGE/'+parentfolder)
            repeat_tag = repeat_tag+'-'

In [5]:
# specify variables
leadsdir = '/cluster/animal/scan_data/leads/'
os.chdir(leadsdir)
dicomdir = "/cluster/animal/scan_data/leads/LEADS/"
unpacklog = "/autofs/cluster/animal/scan_data/leads/recon/unpack.log"
#recondir = '/autofs/cluster/animal/scan_data/leads/recon_nip/'
recondir = '/autofs/cluster/animal/scan_data/leads/recon_nip/RECON_FLAIR/'
recondir_3t = '/autofs/cluster/animal/scan_data/leads/recon_nip/RECON_3T/'
recondir_edit = '/autofs/cluster/animal/scan_data/leads/recon_nip/RECON_FLAIR_EDITED/'
folders = [x for x in os.listdir(dicomdir) if not x.startswith(".")]
subjlist = [f for f in os.listdir(dicomdir) if (("_") not in f) and (not f.startswith('.') and ("duplicate" not in f))]

# wipe clean batch.recon.list
open(recondir+'batch.recon.list', 'w').close()

# unwravel all subjects with multiple sessions and rename to include date
for sub in subjlist:
    scan(sub)

# now just make a list of subjects by ID (this will be the input to the nodes)
# will define dicom path within node
sh_dicomlist = [f for f in os.listdir(dicomdir) if (("_" in f) and ("REPEAT_RUNS" not in f))]

# define workflow
leads_workflow = Workflow(name='leads_workflow') #, base_dir = '/autofs/cluster/animal/scan_data/leads/recon_nip/') # add  base_dir='/shared_fs/nipype_scratch'

# configure to stop on first crash
cfg = dict(execution={'stop_on_first_crash': True})
config.update_config(cfg)

In [6]:
# for debugging

sh_dicomlist = ['LDS3600069_20190426']

# ['LDS0370005_20180802','LDS0370020_20181212',  'LDS0670070_20190305',  'LDS3600067_20190320', \
# 'LDS0110022_20181129','LDS0370006_20180726','LDS0370029_20181210','LDS0670076_20190312','LDS3600068_20190325', \
# 'LDS0110040_20190123','LDS0370007_20180801','LDS0370034_20190107','LDS0670077_20190318','LDS3600087_20190417', \
# 'LDS0110041_20190212','LDS0370008_20180815','LDS0370037_20181218','LDS0670080_20190315','LDS9410023_20181127', \
# 'LDS0110052_20190205','LDS0370009_20180816','LDS0370038_20181217','LDS0670085_20190327','LDS9410025_20181128', \
# 'LDS0110053_20190227','LDS0370010_20180815','LDS0370042_20190108','LDS0730001_20180619','LDS9410027_20181105', \
# 'LDS0110078_20190320','LDS0370011_20180822','LDS0370047_20190226','LDS0730024_20181107','LDS9410028_20181109', \
# 'LDS0220026_20181109','LDS0370012_20180824','LDS0370058_20190318','LDS0730044_20190129','LDS9410035_20181126', \
# 'LDS0220031_20181130','LDS0370013_20180822','LDS0370061_20190219','LDS0730051_20190313','LDS9410036_20181203', \
# 'LDS0220050_20190208','LDS0370014_20180913','LDS0370065_20190227','LDS0730055_20190304','LDS9410049_20190118', \
# 'LDS0220062_20190225','LDS0370015_20181113','LDS0370073_20190308','LDS1770064_20190222','LDS9410060_20190219', \
# 'LDS0220071_20190311','LDS0370016_20180912','LDS0370074_20190404','LDS3600030_20181219','LDS9410066_20190227', \
# 'LDS0220081_20190322','LDS0370017_20181001','LDS0370086_20190329','LDS3600032_20190123', \
# 'LDS0370001_20180509','LDS0370018_20181015','LDS0370089_20190403','LDS3600043_20190118', \
# 'LDS0370002_20180606','LDS0370019_20181121','LDS0670048_20190320','LDS3600056_20190323']


In [7]:
# TEST NODE : PASSSWORDS

def credentials(): # combined with find_dicom
    import getpass
    USER = getpass.getuser()
    print('Please enter your PASSWORD for launchpad access: ')
    PASS= getpass.getpass()
    return USER, PASS

PASSWORDS = pe.Node(Function(input_names=["user", "pw"],
                         output_names=["USER","PASS"], # actual dicom (redundant to create unpacking node visualization)
                         function=credentials),
                        name='PASSWORDS')

In [8]:
# NODE : CREATEDIR
def createdir(val, USER, PASS):
    import os
    import re
    import glob
    import pydicom
    from pydicom.tag import Tag
    val = val.split('/')[-1]
    dicomdir = "/autofs/cluster/animal/scan_data/leads/LEADS/"
    pipelines = ['RECON_3T', 'RECON_FLAIR']
    for pipe in pipelines:
        recondir = '/autofs/cluster/animal/scan_data/leads/recon_nip/'+pipe+'/' 
        reconpath = recondir+val+'/'
        imgpath = reconpath+'mri/orig/'
        if not glob.glob(reconpath + '/**/*mri', recursive=True): # changed recondir to reconpath
            os.makedirs(imgpath) # edited this part!
    dumplocation = imgpath+'001.mgz'
    flairdumplocation = imgpath+'FLAIR.mgz'
    subject = val.split('_')[0]
    MPRAGE_path = glob.glob(dicomdir+val+'/Accelerated_Sagittal_MPRAGE/*/*')[0]
    pickdicom = glob.glob(dicomdir+val+'/Accelerated_Sagittal_MPRAGE/*/*/*')[0]
    pickflair = glob.glob(dicomdir+val+'/Sagittal_3D_FLAIR/*/*/*')[0]
    ds = pydicom.read_file(pickdicom)
    date = str(ds[0x08, 0x22].value)
    sessionid = MPRAGE_path.split("/")[-1]
    return reconpath, MPRAGE_path, pickdicom, dumplocation, recondir, USER, PASS, imgpath, date, flairdumplocation, pickflair
        
CREATEDIR = pe.Node(Function(input_names=["val", "USER", "PASS"],
                         output_names=["createdir_out1","createdir_out2", "createdir_out3", "createdir_out4", "createdir_out5", "USER", "PASS", "createdir_out6", "date", "flairdumplocation","pickflair"], # actual dicom (redundant to create unpacking node visualization)
                         function=createdir),
                        name='CREATEDIR')

In [9]:
# NODE : IMPORT_LONI_INFO

def import_loni_notes(dicomname, date, subjectdir):
    import pandas as pd
    import glob
    import os
    import re
    # download info
    download_df = pd.read_csv('/autofs/cluster/animal/scan_data/leads/spreadsheets/LONI_DOWNLOADS/combined_downloads.csv')
    recon_dir = '/autofs/cluster/animal/scan_data/leads/recon_nip/RECON_NOTES/'
    mgh_subs = '/autofs/cluster/animal/scan_data/leads/spreadsheets/IDENTIFICATION/MGH_SUBJECTS.csv'
    notes_dir = recon_dir+subjectdir.split('/')[-2]
    dicom = dicomname.split("/")[-1]
    subid = dicom.split("_")[1]
    imageid = dicom.split("_")[12][1:-4]
    scannotes_df = pd.read_csv(notes_dir+'/scannotes.csv')
    try:
        download_date = download_df.loc[download_df['Image Data ID'] == float(imageid), 'Downloaded'].values[0]
    except(IndexError):
        pass
    # loni notes
    qualitydir = '/autofs/cluster/animal/scan_data/leads/spreadsheets/MRIQUALITY/'
    list_qc_files = glob.glob(qualitydir+'*.csv')
    MRIQUALITY = max(list_qc_files, key=os.path.getctime)
    # these download already concatenating all sessions ; use latest
    quality_df = pd.read_csv(MRIQUALITY)
    try:
        loni_overallpass = quality_df.loc[quality_df['loni_image'] == float(imageid), 'study_overallpass'].values[0]
        if loni_overallpass == 1:
            qc_pass = '1'
        elif loni_overallpass == 4:
            qc_pass = '0'
        else:
            qc_pass = ''
        study_comments = quality_df.loc[quality_df['loni_image'] == float(imageid), 'study_comments'].values[0]
        study_protocol_comment = quality_df.loc[quality_df['loni_image'] == float(imageid), 'study_protocol_comment'].values[0]
        protocol_comments = quality_df.loc[quality_df['loni_image'] == float(imageid), 'protocol_comments'].values[0]
        series_comments = quality_df.loc[quality_df['loni_image'] == float(imageid), 'series_comments'].values[0]
        series_quality = quality_df.loc[quality_df['loni_image'] == float(imageid), 'series_quality'].values[0]# if 3, needs review; if 2 it is ok
        if series_quality == 2:
            s_quality = 'Scan quality is acceptable according to MAYO. '
        elif series_quality == 3:
            s_quality = 'Scan quality is questionable according to MAYO and needs review. '
        elif series_quality == 4:
            s_quality = 'Scan quality is poor according to MAYO and needs review. '
        else:
            s_quality = 'No scan quality data recorded from MAYO. '
        study_rescan_requested = quality_df.loc[quality_df['loni_image'] == float(imageid), 'study_rescan_requested'].values[0]
        if study_rescan_requested == 'TRUE':
            rescan_requested = ' Study rescan has been requested. '
        else:
            rescan_requested = '. No study rescans have been requested. '

        # delete duplicates within list, delete nans
        comments_list = [s_quality,str(study_comments),str(study_protocol_comment),str(series_comments),str(protocol_comments),rescan_requested]
        cleanedList = [x for x in comments_list if (x != 'nan')]
        concat_comments = ''.join(cleanedList)+" QC_pass from original site is "+qc_pass+" ."
        xnat_upload = '0'
    except(IndexError):
        if subid[0:6] == 'LDS360':   # if its MGH data # line 57
            mgh_df = pd.read_csv(mgh_subs,index_col=False)
            concat_comments = mgh_df.loc[mgh_df['leadsid'] == subid+'_'+date, 'notes'].values[0]
            xnat_upload = mgh_df.loc[mgh_df['leadsid'] == subid+'_'+date, 'XNAT_upload'].values[0]
            if str(mgh_df.loc[mgh_df['leadsid'] == subid+'_'+date, 'notes'].values[0]) == 'nan':
                concat_comments = 'No comments from MAYO. '
            qc_pass = "No data."
        else:
            concat_comments = 'No comments from MAYO. '
            qc_pass = 'No data.'
            xnat_upload = '0'
            
    # add recon path by taking newest edit folder (if exists)
    try:
        edit_dir = '/autofs/cluster/animal/scan_data/leads/recon_nip/RECON_FLAIR_EDITED/'+subjectdir.split('/')[-2]
        edit_folders = [f for f in os.listdir(edit_dir) if f.startswith("edit.")]
        ext_folders = [edit_dir + s for s in edit_folders]
        try:
            recon_folder = max(ext_folders, key=os.path.getctime)
        except(ValueError):
            recon_folder = ''
    except(FileNotFoundError):
        recon_folder = ''
        
    scannotes_df.loc[scannotes_df.index[0], 'xnat_upload'] = xnat_upload
    scannotes_df.loc[scannotes_df.index[0], 'recon_path'] = recon_folder
    scannotes_df.loc[scannotes_df.index[0], 'loni_overallpass'] = qc_pass
    scannotes_df.loc[scannotes_df.index[0], 'scan_notes'] = concat_comments
    scannotes_df.loc[scannotes_df.index[0], 'download_date'] = download_date
    scannotes_df.to_csv(notes_dir+'/scannotes.csv')
    return dicomname, subjectdir, imageid
    
IMPORT_LONI_INFO = pe.Node(Function(input_names=["dicomname", "date", "subjectdir"],
                        output_names = ["dicomname", "subjectdir", "imageid"], 
                        function=import_loni_notes),
                        name='IMPORT_LONI_INFO')


In [10]:
# NODE : PREPARE_4_REDCAP
# note that QC notes and status are manually recorded in scannotes

def prepare_redcap(dicomname, subjectdir, imageid, pickdicom, fsversion):
    import pandas as pd
    import pydicom
    from pydicom.tag import Tag
    import csv
    subject = subjectdir.split("/")[-1]
    convert_sex = '/autofs/cluster/animal/scan_data/leads/spreadsheets/IDENTIFICATION/DEMOGRAPHIC_IDS.csv'
    demo_form = '/autofs/cluster/animal/scan_data/leads/spreadsheets/LONI_DOWNLOADS/combined_downloads.csv'
    site_conversion = '/autofs/cluster/animal/scan_data/leads/spreadsheets/IDENTIFICATION/SITE_IDS.csv'
    notes_dir = subjectdir.replace('RECON_FLAIR','RECON_NOTES')
    scannotes = pd.read_csv(notes_dir+'scannotes.csv')
    download_df = pd.read_csv(demo_form)
    reader = csv.reader(open(convert_sex))
    ds = pydicom.read_file(pickdicom)
    d={}
    for row in reader:
        d[row[0]]=row[1:][0]
    sex = str(ds[0x10,0x40].value)
    SEX = d.get(sex)
    AQ_DATE = ds[0x08,0x22].value
    age = ds[0x00101010].value
    AGE = str(int(age[:-1]))
    
    try:
        SITE = str(ds[0x08, 0x80].value) #Institution Name
    except(KeyError):
        d={}
        reader = csv.reader(open(site_conversion))
        for row in reader:
            d[row[0]]=row[1:][0]
        t = subject[3:6]
        SITE = d.get(t)

    dicom_path = pickdicom.strip(pickdicom.split('/')[-1]) # or use scaninfo ?
    GROUP = download_df.loc[download_df['Image Data ID'] == float(imageid), 'Group'].values[0]
    GEN_NOTES = ''
    RECON_PATH = scannotes.loc[scannotes.index[0], 'recon_path'] 
    FS_VERSION = fsversion
    
    #Can be found in scannotes:
    DN_DATE = scannotes.loc[scannotes.index[0], 'download_date']
    XNAT = scannotes.loc[scannotes.index[0], 'xnat_upload']
    
    acq_notes = scannotes.loc[scannotes.index[0], 'scan_notes']
    post_notes = scannotes.loc[scannotes.index[0], 'recon_notes'] 
    overallpass = scannotes.loc[scannotes.index[0], 'dickerson_overallpass'] 
    if str(overallpass) == '1':
        QC_STATUS = '1' # pass
    elif str(overallpass) == '0':
        QC_STATUS = '0' # fail
    else:
        QC_STATUS = '2' # in_progress
    if not str(post_notes) == 'nan':
        post_notes = " Post-aquisition / recon notes: "+post_notes
        SESSION_NOTES = acq_notes+post_notes+" Dickerson Lab overall pass is "+str(overallpass)+"."
    else:
        SESSION_NOTES = acq_notes   # qc_notes in redcap


PREPARE_4_REDCAP = pe.Node(Function(input_names=["dicomname","subjectdir", "imageid", "pickdicom", "fsversion"],
                        function=prepare_redcap),
                          name='PREPARE_4_REDCAP')

In [11]:
# NODE : UNPACK

def unpack(subjectdir, MPRAGE_path):
    from os import system
    import csv
    import pandas as pd
    import os.path
    notesdir = subjectdir.replace('RECON_FLAIR',"RECON_NOTES")
    if not os.path.isfile(notesdir+'scan.info'): 
        cmdstring = 'unpacksdcmdir -src %s -targ %s -scanonly %s/scan.info' % (MPRAGE_path, notesdir, notesdir)
        system(cmdstring)
    if not os.path.isfile(notesdir+'scaninfo.csv'):
        with open(notesdir+'/scan.info', 'r') as in_file:
            for line in in_file:
                editline = line.split()
                with open(notesdir+'/scaninfo.csv', 'w') as result:
                    wr = csv.writer(result, dialect='excel')
                    wr.writerow(editline)
                result.close()
            in_file.close()
    scan_info = notesdir+'/scaninfo.csv'
    subname = notesdir.split('/')[-2]
    return subname, subjectdir, scan_info

UNPACK = pe.Node(Function(input_names=["subjectdir","MPRAGE_path"],
                         output_names=["unpack_out1","unpack_out2", "unpack_out3"], # actual dicom (redundant to create unpacking node visualization)
                         function=unpack),
                        name='UNPACK')


In [12]:
# (first option) # # NODE : CONVERT2MGZ (only runs if .mgz is not available)

def convert_dicom(in_file, out_file, reconpath):
    import os
    import glob
    from os import system
    #import time # just see if this works if waits
    # check for a file called 001.mgz
    if not glob.glob(reconpath + '/**/*001.mgz', recursive=True):
        cmdstring = 'mri_convert %s %s' % (in_file, out_file)
        system(cmdstring)
        complete = 1
    else:
        complete = 1

    return complete

CONVERT2MGZ = pe.Node(Function(input_names=["in_file", "out_file", "reconpath"],
                         output_names=["out_file"],
                         function=convert_dicom),
                        name='CONVERT2MGZ')

In [13]:
def convert_flair(pickflair, flairdumplocation, reconpath, out_file):
    import os
    import glob
    from os import system
    import shutil
    reconpath_3t = reconpath.replace('RECON_FLAIR','RECON_3T')
#     if not glob.glob(reconpath + '/**/*FLAIR.mgz', recursive=True):
#         cmdstring = 'mri_convert %s %s' % (pickflair, reconpath)
#         system(cmdstring)
#         complete = 1
#     else:
#         complete = 1  

    # put back after copying all flairs ad hoc
    if not glob.glob(reconpath + '/**/*FLAIR.mgz', recursive=True):
        cmdstring = 'mri_convert %s %s' % (pickflair, flairdumplocation)
        system(cmdstring)
        complete = 1
    else:
        complete = 1  
# #     # copy 001 to 3T folder
    if not glob.glob(reconpath_3t + '/**/*001.mgz', recursive=True):
        for root, dirs, files in os.walk(reconpath): 
            for file in files:  
                if file == '001.mgz': 
                    shutil.copyfile(root+'/'+str(file), reconpath_3t+'/mri/orig/001.mgz')
                    complete = 1
    else:
        complete = 1
    return complete

CONVERTFLAIR = pe.Node(Function(input_names=["pickflair", "flairdumplocation", "reconpath", "out_file"],
                         output_names=["out_file"],
                         function=convert_flair),
                        name='CONVERTFLAIR')

In [14]:
# NODE SCAN_AND_LOG
# note: decided to add this afterward precaution to increase efficiency because there are few errors
# and want to run the unpack and convert2mgz in parallel)

def scan_and_log(subjectdir, scan_info, mgz, reconfolder, subname):
    import re
    import os
    import pandas as pd
    notesdir = subjectdir.replace('RECON_FLAIR',"RECON_NOTES")
    # load in the scaninfo file
    dicomdir = "/cluster/animal/scan_data/leads/LEADS/"
    scaninfo = pd.read_csv(scan_info, header=None)
    check = scaninfo.iloc[0,2] # first row (only row); second col (validity)
    if check != 'ok':
        with open(reconfolder+'/scanerrors', "a") as efile:
            efile.write(scaninfo.iloc[0,7]) # log for errors in dicoms (or any ommitted scans)
    else:
        with open(reconfolder+'/batch.recon.list', "a") as bfile:
            bfile.write(subname)
        with open(reconfolder+'/unpack.log', "a") as ufile:
            ufile.write(subname)
        # should I makea scannotes? (will add info after recon)
        Elements = {'scan_notes': [''],'loni_overallpass': [''], 'download_date':[''], 'xnat_upload':[''], 'recon_path':[''],'recon_notes':[''], 'dickerson_overallpass':['']}
        df = pd.DataFrame(Elements, columns= ['scan_notes', 'loni_overallpass', 'download_date','xnat_upload','recon_path','recon_notes','dickerson_overallpass'])
        df.to_csv(notesdir+'/scannotes.csv')
    return subjectdir, subname

SCAN_AND_LOG = pe.Node(Function(input_names=["subjectdir","scan_info",'mgz', 'reconfolder', 'subname'],
                         output_names=["subjectdir", "subname"],
                         function=scan_and_log),
                        name='SCAN_AND_LOG')

In [15]:
# NODE RECON_JOB

def recon_job(subjectname, USER, PASS): # add in username, pass, and subjectname
    # add condition :: run this only is FS_XX, or scripts does not exist!!
    import os
    import glob
    from paramiko import SSHClient
    analyses_pipes = ['RECON_FLAIR','RECON_3T']
    for pipeline in analyses_pipes:
        reconpath = '/autofs/cluster/animal/scan_data/leads/recon_nip/'+pipeline
        if not glob.glob('/autofs/cluster/animal/scan_data/leads/recon_nip/'+pipeline+'/'+subjectname + '/**/*scripts', recursive=True):
            host="launchpad"
            user=USER
            pw=PASS
            client=SSHClient()
            client.load_system_host_keys()
            client.connect(host,username=user,password=pw, look_for_keys=False)
            tmpstr = '(cd /autofs/cluster/animal/scan_data/leads/analyses_nip/%s; setenv p %s ; ./batch.recon.sh)' % (pipeline, subjectname)
            stdin, stdout, stderr = client.exec_command(tmpstr)
            #stin = print("stdin: ", stdin.readlines())
            err = "stderr: ", stderr.readlines()
            out = "pwd: ", stdout.readlines()
            if len(err) < 1:
                warning = '0'
            else:
                warning = '1'
            #print err, out warning to text file in both recon dirs
            with open(reconpath+'log_nip.txt','a') as outf:
                outf.write(tmpstr)
        else:
            err = ""
            out = ""
            warning = "na"
            
    return err, out, warning, subjectname

RECON_JOB = pe.Node(Function(input_names=["subjectname","USER", "PASS"], 
                        output_names=[ 'err', 'out', 'warning','subjectname'],
                         function=recon_job),
                        name='RECON_JOB')


In [16]:
# NODE: GATHER_FS_DETAILS (this part only after recon is done)

def gather_FS_details(subjectname): # add in username, pass, and subjectname
    import csv
    import os
    recondir = '/autofs/cluster/animal/scan_data/leads/recon_nip/RECON_FLAIR/'
    # if you can access the status (complete) and in directory
    if os.path.isfile(recondir+subjectname+'/scripts/recon-all.done'): # replaces from old: recon-all.log
        recon_pending = 0
        # obtain FS version
        versionfile = open(recondir+subjectname+'/scripts/build-stamp.txt', 'r')
        versionstring = versionfile.read()
        version = versionstring.split('-')
        result = [i for i in version if i.startswith('v')][0]
        long = result[1:]
        
        #obtain short verison of long
        size = len(long)
        x = 0
        while x ==0:
            if (long[-1] == '0') or (long[-1] == '.'): # shave off any . or 0s from the end of version number.
                long = long[:-1]
            else:
                x =1
        vlabel = 'FS'+long

        # obtain run number
        notesdir = recondir.replace("RECON_FLAIR","RECON_NOTES")
        with open(recondir+subjectname+'/scaninfo.csv','r') as f:
            reader = csv.reader(f)
            scan_list = list(reader)
            runstring = scan_list[0][0] # run
            if len(runstring) == 1:
                runstring = '0'+runstring
        recon_name = vlabel+'_'+runstring
    else: # otherwise incomplete, not run yet, or already moved
        recon_pending = 1
        print(subjectname+" Recon either already organized or not ready yet.")
        recon_name = ''
        long = ''
    return subjectname, recon_name, recon_pending, long
        
FS_DETAILS = pe.Node(Function(input_names=["subjectname"], 
                        output_names=[ 'subjectname', 'recon_name', 'recon_pending', 'long'],
                         function=gather_FS_details),
                        name='FS_DETAILS')

In [17]:
# NODE : MAKE_ORIG_FOLDER

def create_orig_folder(subjectname, recon_name, recon_pending):
    import os
    import shutil
    recondir = '/autofs/cluster/animal/scan_data/leads/recon_nip/RECON_FLAIR/'
    freesurfer_dirs = ['mri', 'stats', 'tmp', 'trash', 'touch', 'label', 'surf', 'scripts']
    if recon_pending == 0:
        # move all subfolders into this recon_name folder
        for fsdir in freesurfer_dirs:
            if os.path.isdir(recondir+subjectname+'/'+fsdir):
                shutil.move(recondir+subjectname+'/'+fsdir, recondir+subjectname+'/'+recon_name+'/'+fsdir) # does this create FS6_02?
    else:
        print(subjectname+" files moves already, or not yet prepared.")
    return subjectname, recon_name, recondir, recon_pending

MAKE_ORIGINAL_DIR = pe.Node(Function(input_names=["subjectname", "recon_name", "recon_pending"], 
                        output_names=['subjectname', 'recon_name', 'recondir', 'recon_pending'],
                         function=create_orig_folder),
                        name='MAKE_ORIGINAL_DIR')



In [18]:
# NODE : PREPARE_MANEDITS

def preparing_manedits(subjectname, recon_name, recondir, recon_pending):
    import shutil
    import pathlib
    import os
    analysesdir = '/autofs/cluster/animal/scan_data/leads/analyses_nip/RECON_FLAIR/'
    if recon_pending == 0: # otherwise dir already created or not ready)
        recon_name2 = 'unedit.'+recon_name
        shutil.copytree(recondir+subjectname+'/'+recon_name, recondir+subjectname+'/'+recon_name2)
        shutil.copyfile(recondir+subjectname+'/'+recon_name2+'/mri/brain.finalsurfs.mgz', recondir+subjectname+'/'+recon_name2+'/mri/brain.finalsurfs.manedit.mgz')
        with open(recondir+'to_edit.list\n', "a") as myfile:
            myfile.write(subjectname)
        
    # this will symlink before manual editing but ok since renaming unedit >> edit is required
    #Now create symlink to edit. in analysis folder!!
#     try:
#         os.symlink(recondir+subjectname+'/'+recon_name2, analysesdir+sub)
#     except(FileExistsError):
#         print(subjectname+" in analyses is already linked to unedit.recon.")
    return subjectname
        
PREPARE_MANEDITS = pe.Node(Function(input_names=['subjectname', 'recon_name', 'recondir', 'recon_pending'], 
                        output_names=['subjectname'],
                         function=preparing_manedits),
                        name='PREPARE_MANEDITS')

In [19]:
# start the 3T recon

In [20]:
# # # NODE : INFOSOURCE
INFOSOURCE = Node(IdentityInterface(fields=['subject_name'], mandatory_inputs=False),
                  name="INFOSOURCE")

INFOSOURCE.iterables = ('subject_name', sh_dicomlist)

# NODE : SELECTFILES
#templates = dict(dicom=sh_dicomlist[0])    ## THIS WORKED!
templates = {
    "dicom": "{subject_name}" 
    }
SELECTFILES = Node(nio.SelectFiles(templates, base_directory=dicomdir),
                   name="SELECTFILES")

# NODE : DATASINK
DATASINK = Node(nio.DataSink(base_directory=leadsdir,
                container='recon_nip'),
                name="DATASINK")

In [21]:
# Connect all nodes (including INFOSOURCE, SELECTFILES, and DATASINK) to workflow

leads_workflow.connect([(INFOSOURCE, SELECTFILES, [('subject_name', 'subject_name')]),
                (SELECTFILES, CREATEDIR, [('dicom', 'val')]),
                (PASSWORDS, CREATEDIR, [('USER', 'USER')]),
                (PASSWORDS, CREATEDIR, [('PASS', 'PASS')]), 
                (CREATEDIR, IMPORT_LONI_INFO, [('date', 'date')]),
                (CREATEDIR, IMPORT_LONI_INFO, [('createdir_out3', 'dicomname')]),
                (CREATEDIR, IMPORT_LONI_INFO, [('createdir_out1', 'subjectdir')]),     # need actual subjectdir name (in case of repeats)
                (IMPORT_LONI_INFO, PREPARE_4_REDCAP, [('dicomname', 'dicomname')]),
                (IMPORT_LONI_INFO, PREPARE_4_REDCAP, [('subjectdir', 'subjectdir')]),
                (IMPORT_LONI_INFO, PREPARE_4_REDCAP, [('imageid', 'imageid')]),
                (CREATEDIR, PREPARE_4_REDCAP, [('createdir_out3', 'pickdicom')]),
                (CREATEDIR, UNPACK, [('createdir_out1', 'subjectdir')]),
                 (CREATEDIR, UNPACK, [('createdir_out2', 'MPRAGE_path')]),
                 (CREATEDIR, CONVERT2MGZ, [('createdir_out3', 'in_file')]),
                 (CREATEDIR, CONVERT2MGZ, [('createdir_out4', 'out_file')]),
                (CREATEDIR, CONVERT2MGZ, [('createdir_out1', 'reconpath')]),
                (CONVERT2MGZ, CONVERTFLAIR, [('out_file', 'out_file')]), # added 
                (CREATEDIR, CONVERTFLAIR, [('flairdumplocation', 'flairdumplocation')]),
                 (CREATEDIR, CONVERTFLAIR, [('pickflair', 'pickflair')]),
                (CREATEDIR, CONVERTFLAIR, [('createdir_out1', 'reconpath')]),
                (CONVERTFLAIR, SCAN_AND_LOG, [('out_file', 'mgz')]), #changed
                #(CONVERTFLAIR, SCAN_AND_LOG, [('out_file', 'mgz')]),
                (CREATEDIR, SCAN_AND_LOG, [('createdir_out5', 'reconfolder')]),
                (UNPACK, SCAN_AND_LOG, [('unpack_out1', 'subname')]),
                (UNPACK, SCAN_AND_LOG, [('unpack_out2', 'subjectdir')]),
                (UNPACK, SCAN_AND_LOG, [('unpack_out3', 'scan_info')]),
                (CREATEDIR, RECON_JOB, [('USER','USER')]),
                (CREATEDIR, RECON_JOB, [('PASS','PASS')]),
                (SCAN_AND_LOG, RECON_JOB, [('subname','subjectname')]), 
                (RECON_JOB, FS_DETAILS, [('subjectname','subjectname')]),
                (FS_DETAILS, MAKE_ORIGINAL_DIR, [('subjectname','subjectname')]), 
                (FS_DETAILS, MAKE_ORIGINAL_DIR, [('recon_pending','recon_pending')]), 
                (FS_DETAILS, MAKE_ORIGINAL_DIR, [('recon_name','recon_name')]), 
                (MAKE_ORIGINAL_DIR, PREPARE_MANEDITS, [('subjectname','subjectname')]), 
                (MAKE_ORIGINAL_DIR, PREPARE_MANEDITS, [('recon_name','recon_name')]), 
                (MAKE_ORIGINAL_DIR, PREPARE_MANEDITS, [('recondir','recondir')]), 
                (MAKE_ORIGINAL_DIR, PREPARE_MANEDITS, [('recon_pending','recon_pending')]),
                (FS_DETAILS, PREPARE_4_REDCAP, [('long','fsversion')]), 
                (PREPARE_MANEDITS, DATASINK, [('subjectname','backup')])  # backup folder?
                 ])

In [22]:
# Execute your workflow in sequential way
# leads_workflow.run(run(plugin='MultiProc', plugin_args={'n_procs' : 2})
leads_workflow.run()

leads_workflow.write_graph(graph2use='flat')

190429-16:53:44,390 nipype.workflow INFO:
	 Workflow leads_workflow settings: ['check', 'execution', 'logging', 'monitoring']
190429-16:53:44,437 nipype.workflow INFO:
	 Running serially.
190429-16:53:44,439 nipype.workflow INFO:
	 [Node] Setting-up "leads_workflow.SELECTFILES" in "/tmp/tmpt077hxw6/leads_workflow/_subject_name_LDS3600069_20190426/SELECTFILES".
190429-16:53:44,444 nipype.workflow INFO:
	 [Node] Running "SELECTFILES" ("nipype.interfaces.io.SelectFiles")
190429-16:53:44,453 nipype.workflow INFO:
	 [Node] Finished "leads_workflow.SELECTFILES".
190429-16:53:44,455 nipype.workflow INFO:
	 [Node] Setting-up "leads_workflow.PASSWORDS" in "/tmp/tmpzcr345bo/leads_workflow/PASSWORDS".
190429-16:53:44,459 nipype.workflow INFO:
	 [Node] Running "PASSWORDS" ("nipype.interfaces.utility.wrappers.Function")
Please enter your PASSWORD for launchpad access: 
········
190429-16:53:48,335 nipype.workflow INFO:
	 [Node] Finished "leads_workflow.PASSWORDS".
190429-16:53:48,337 nipype.workflo

190429-16:54:03,373 nipype.workflow INFO:
	 ***********************************
190429-16:54:03,373 nipype.workflow ERROR:
	 could not run node: leads_workflow.RECON_JOB.a0
190429-16:54:03,374 nipype.workflow INFO:
	 crashfile: /autofs/homes/002/rje11/crash-20190429-165403-rje11-RECON_JOB.a0-f532211e-ba03-44e0-8ff2-b557923c5e43.pklz
190429-16:54:03,375 nipype.workflow ERROR:
	 could not run node: leads_workflow.IMPORT_LONI_INFO.a0
190429-16:54:03,376 nipype.workflow INFO:
	 crashfile: /autofs/homes/002/rje11/crash-20190429-165403-rje11-IMPORT_LONI_INFO.a0-ffaffee5-ec1e-4b6e-a0bd-1c8d1842fcb8.pklz
190429-16:54:03,376 nipype.workflow INFO:
	 ***********************************


RuntimeError: Workflow did not execute cleanly. Check log for details

In [None]:
leads_workflow.write_graph(graph2use='flat')