This notebook runs the recon-all pipeline (Freesurfer).
The output is written to the 'outdir' listed below.

# Setup 

In [None]:
from pathlib import Path
import pandas as pd

In [None]:
import re
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('max_colwidth',500)

In [None]:
## EDIT THESE VARIABLES
analysis_version = "2017_12_12"
project_dir = Path('/data/NNDSP/') # needs to be pathlib.Path object
ncpus = '8'
ram = '16'

In [None]:
freesurfer_dir  = project_dir.joinpath('anal/freesurfer_files_john/other_files')
expert_opts = freesurfer_dir.joinpath('expert.opts')
conf_script = freesurfer_dir.joinpath('conf' + analysis_version + '.sh')
# subj_dir = project_dir.joinpath('derivatives') # passed using the conf script though
subj_dir =  project_dir / 'derivatives' / 'fs_subj_john'
bids_dir = Path('./bids_2017_07_14')
log_dir = freesurfer_dir.joinpath('swarm_output', analysis_version)

swarm_path = freesurfer_dir.joinpath('cross_sectional_recon_' + analysis_version + '.cmd')


if not outdir.exists():
    outdir.mkdir()


In [None]:
%pwd
%cd {project_dir}
%pwd

In [None]:
# check the values here, they must be edited
conf_script.write_text("""
umask 0007
export base=/data/NNDSP
export SUBJECTS_DIR=$base/derivatives/fs_subj_john
module load freesurfer fsl afni 
source ${FREESURFER_HOME}/SetUpFreeSurfer.sh
export PATH=$base/scripts:/data/DSST/scripts/:${PATH}
alias csf="mkdir -p .trash;mv -f cmd.* swarm*[eo] sw*n*.[eo] .trash/"
ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS=2
""")

# Generating subject list

In [None]:
df_scans = pd.DataFrame(columns=['scan_path'],data=[p.as_posix() for p in bids_dir.glob('**/*T1w.nii.gz')])

In [None]:
df_scans.head()

In [None]:
# df_scans = df_scans.assign(subject = lambda df: df.scan_path.str.extract('(sub-\d{2,4})', expand=True))
df_scans = pd.concat(
    [df_scans,
     df_scans.scan_path.
     str.extract(
         '.*(?P<subject>sub-\d{2,4}).*(?P<run>run-[0-9]{3}).*',
         expand=True)],
    axis = 1)

df_scans.head()

# Running recon-all on all scans

### Function for generating commands

In [None]:
def generate_freesurfer_command(tpNid=None,subj_dir=None,ncpus='4',image=None,conf_script=None,  data_on_lscratch=False):
        # (cross sectional):  recon-all -all -s <tpNid> -i path_to_tpN_dcm
    lscratch = Path('/lscratch/$SLURM_JOBID/work')
    oic = lscratch.joinpath('out')
    cmd = 'mkdir -p ' + oic.as_posix() + ';'
    cmd += 'source ' + conf_script.as_posix() + ';' + \
    ' recon-all' +  \
    ' -all' + \
    ' -no-isrunning' + \
    ' -sd ' + oic.as_posix() + \
    ' -openmp ' + ncpus + \
    ' -s ' + tpNid + \
    ' -i ' + image 
    
    cmd += '; rsync -a {lscratch}/out/ '.format(lscratch = lscratch.as_posix()) + subj_dir.absolute().as_posix() + '/;'  
    cmd += ' rm -rf /lscratch/$SLURM_JOB_ID/*'

    return cmd
# generate_singularity_command(output_dir=output_dir, sing_image= sing_image, bids_dir=bids_dir, participant=participant)

### Generate cross-sectional reconall commands

In [None]:
df_sing = (
    df_scans.
    assign(
        cmd = lambda df:
        generate_freesurfer_command(tpNid = df.subject + '_' + df.run,
                                    subj_dir = subj_dir,
                                    conf_script = conf_script,
                                    image = df.scan_path,
                                    ncpus = ncpus))
          )

swarm_path = freesurfer_dir.joinpath('cross_sectional_recon_' + analysis_version + '.cmd')

In [None]:
swarm_path.write_text('\n'.join(df_sing.cmd.drop_duplicates())) 

swarm_path.read_text().splitlines()[:4]

In [None]:
(len(df_sing.cmd),len(df_sing.cmd.drop_duplicates()))

###  Run swarm

In [None]:
job_id = !swarm -f {swarm_path} -g {ram} -t {ncpus} --logdir {log_dir} --time 24:00:00 --partition=nimh,norm --gres=lscratch:200
job_id = job_id[0]
job_id

###  Exploring possible issues with swarm.

In [None]:
files_of_interest = []
# df_error_files_paths = pd.DataFrame([x.as_posix() for x in Path('swarm_output_2017_06_05').glob('*.e')],columns=['paths'])
df_error_files_paths = pd.DataFrame([x.as_posix() for x in log_dir.glob('*.e')],columns=['paths'])
df_error_files = (df_error_files_paths.
                  assign(run = lambda df:
                         df.paths.str.extract(
                             '/.*swarm_\d*_(\d*).e',
                             expand=False).
                         astype(int)).sort_values('run'))
if not files_of_interest:
    files_of_interest = list(range(len(df_error_files_paths)))


df_error_files.head()

Some files failed (observed on dashboard):

In [None]:
print('\n\n\n'.join(np.array(df_error_files.paths)[[164,119,113,95,14,15]]))

In [None]:
error_files = [Path(x).read_text() for x in np.array(df_error_files.paths)[[164,119,113,95,14,15]]]

In [None]:
print('\n\n\n'.join(error_files))

In [None]:
output_files = [Path(x).with_suffix('.o').read_text() for x in np.array(df_error_files.paths)[[164,119,113,95,14,15]]]

In [None]:
print('\n\n\n'.join(output_files))b
