# Description - Create Swarm File to run lsd functional pre-processing pipeline on NC dataset

This script creates the swarm file to run the functional pre-processing pipeline on the lsd (NC) portion of the lemon dataset. 

***

In [1]:
import pandas as pd
import os.path as osp
import os
from datetime import datetime
import getpass
from utils.basics import PRJ_DIR, NOTEBOOKS_DIR, SCRIPTS_DIR, RESOURCES_DINFO_DIR, PREPROCESSING_NOTES_DIR
print('++ INFO: Project Dir:                  %s' % PRJ_DIR) 
print('++ INFO: Notebooks Dir:                %s' % NOTEBOOKS_DIR) 
print('++ INFO: Bash Scripts Dir:             %s' % SCRIPTS_DIR)
print('++ INFO: Resources (Dataset Info) Dir: %s' % RESOURCES_DINFO_DIR)
print('++ INFO: Pre-processing Notes Dir:     %s' % PREPROCESSING_NOTES_DIR)

++ INFO: Project Dir:                  /data/SFIMJGC_Introspec/2023_fc_introspection
++ INFO: Notebooks Dir:                /data/SFIMJGC_Introspec/2023_fc_introspection/code/fc_introspection/notebooks
++ INFO: Bash Scripts Dir:             /data/SFIMJGC_Introspec/2023_fc_introspection/code/fc_introspection/bash
++ INFO: Resources (Dataset Info) Dir: /data/SFIMJGC_Introspec/2023_fc_introspection/resources/dataset_info
++ INFO: Pre-processing Notes Dir:     /data/SFIMJGC_Introspec/2023_fc_introspection/code/fc_introspection/resources/preprocessing_notes


In [2]:
username = getpass.getuser()
print('++ INFO: user working now --> %s' % username)

++ INFO: user working now --> javiergc


In [3]:
#user specific folders
#=====================
swarm_folder   = osp.join(PRJ_DIR,'SwarmFiles.{username}'.format(username=username))
logs_folder    = osp.join(PRJ_DIR,'Logs.{username}'.format(username=username))
logdir_path    = osp.join(logs_folder,'S02_NC_run_func_preproc.logs')

In [4]:
# create user specific folders if needed
# ======================================
if not osp.exists(swarm_folder):
    os.makedirs(swarm_folder)
    print('++ INFO: New folder for swarm files created [%s]' % swarm_folder)
if not osp.exists(logs_folder):
    os.makedirs(logs_folder)
    print('++ INFO: New folder for log files created [%s]' % logs_folder)

In [5]:
anat_info_path           = osp.join(RESOURCES_DINFO_DIR,'NC_anat_info.pkl')
bad_struct_subjects_path = osp.join(RESOURCES_DINFO_DIR,'NC_struc_fail_list.csv')
swarm_path               = osp.join(swarm_folder,'S02_NC_run_func_preproc.SWARM.sh')

***
# 1. Load list of subjects with at least one rest run with accompanying SNYQ data

In [6]:
sbj_list = (pd.read_csv(osp.join(RESOURCES_DINFO_DIR,'NC_withSNYCQ_subjects.txt'), header=None)[0]).tolist()
print("++ INFO: Number of subjects: %s" % len(sbj_list))

++ INFO: Number of subjects: 175


***
# 2. Load list of subjects that failed structural pre-processing

In [11]:
bad_struct_sbj_df   = pd.read_csv(osp.join(PREPROCESSING_NOTES_DIR,'NC_struct_fail_list.csv'))
bad_struct_sbj_list = list(bad_struct_sbj_df['Subject'].values)
bad_struct_sbj_df.head()
print("++ INFO: Number of subjects with incomplete structural pre-processing:                          %d subjects" % len(bad_struct_sbj_list))
print("++ INFO: Number of rest scans that will be removed due to incomplete structural pre-processing: %d scans " % bad_struct_sbj_df['func_scans'].sum())

++ INFO: Number of subjects with incomplete structural pre-processing:                          25 subjects
++ INFO: Number of rest scans that will be removed due to incomplete structural pre-processing: 96 scans 


***
# 3. Don't attempt functional pre-processing on subjects that failed the structural

If the structual pre-processing failed for a subject, we will not be able to complete our analysis. For that reason, we will not attempt functional pre-processing of scans from subjects with failed anatomical scans

In [12]:
sbj_list = [sbj for sbj in sbj_list if sbj not in bad_struct_sbj_list]
print('++ INFO: Number of subjects for which we will attempt functional pre-processing: %d' % len(sbj_list))

++ INFO: Number of subjects for which we will attempt functional pre-processing: 150


***
# 2. Create Log Directory for swarm jobs

In [13]:
if not osp.exists(logdir_path):
    os.mkdir(logdir_path)
    print("++ INFO: Log folder created [%s]" % logdir_path)

++ INFO: Log folder created [/data/SFIMJGC_Introspec/2023_fc_introspection/Logs.javiergc/S02_NC_run_func_preproc.logs]


***
### 2. Create Swarm File

In [15]:
# Open the file
swarm_file = open(swarm_path, "w")
# Log the date and time when the SWARM file is created
swarm_file.write('#Create Time: %s' % datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
swarm_file.write('\n')
# Insert comment line with SWARM command
swarm_file.write('#swarm -f {swarm_path} -g 32 -t 32 --time 32:00:00 --logdir {logdir_path}'.format(swarm_path=swarm_path, logdir_path=logdir_path))
swarm_file.write('\n')

# Insert one line per subject
for sbj in sbj_list:
    swarm_file.write("export SBJ={sbj}; sh {scripts_folder}/S02_NC_run_func_preproc.sh".format(sbj=sbj,scripts_folder=SCRIPTS_DIR))
    swarm_file.write('\n')
swarm_file.close()

In [17]:
print('++ INFO: Swarm file available at: %s' % swarm_path)

++ INFO: Swarm file available at: /data/SFIMJGC_Introspec/2023_fc_introspection/SwarmFiles.javiergc/S02_NC_run_func_preproc.SWARM.sh
