# Description

This notebook create the swarm jobs to run CPM on 100 iterations over the real data and 10,000 iterations over randomized data. 

This happens separately for each question in the SNYCQ

In [1]:
import os.path as osp
import os
from datetime import datetime
import getpass
from utils.basics import get_sbj_scan_list
from utils.basics import PRJ_DIR, SCRIPTS_DIR, RESOURCES_CPM_DIR

In [2]:
CPM_NITERATIONS      = 100       # Number of iterations on real data (to evaluate robustness against fold generation)
CPM_NULL_NITERATIONS = 10000     # Number of iterations used to build a null distribution
CORR_TYPE            = 'pearson' # Correlation type to use on the edge-selection step
E_SUMMARY_METRIC     = 'sum'     # How to summarize across selected edges on the final model
E_THR_P              = 0.01      # Threshold used on the edge-selection step

In [3]:
sbj_list, scan_list, snycq = get_sbj_scan_list(when='post_motion', return_snycq=True)

++ [post_motion] Number of subjects: 133 subjects
++ [post_motion] Number of scans:    471 scans


In [4]:
behaviors = list(snycq.columns)
print(behaviors)

['Positive', 'Negative', 'Future', 'Past', 'Myself', 'People', 'Surroundings', 'Vigilance', 'Images', 'Words', 'Specific', 'Intrusive']


In [5]:
username = getpass.getuser()
print('++ INFO: user working now --> %s' % username)

++ INFO: user working now --> javiergc


We will generate separate swarm files per question. Similarly we will separate the swarm jobs that are for computations on real data and those that are for the generation of the null distribution.

## 1. Swarm Jobs for the real data

In [6]:
#user specific folders
#=====================
swarm_folder = osp.join(PRJ_DIR,'SwarmFiles.{username}'.format(username=username))
logs_folder  = osp.join(PRJ_DIR,'Logs.{username}'.format(username=username))
swarm_path,logdir_path={},{} 
for behavior in behaviors:    
    swarm_path[behavior]  = osp.join(swarm_folder,'S15_CPM_{beh}.SWARM.sh'.format(beh=behavior))
    logdir_path[behavior] = osp.join(logs_folder, 'S15_CPM_{beh}.logs'.format(beh=behavior))

In [7]:
# create user specific folders if needed
# ======================================
if not osp.exists(swarm_folder):
    os.makedirs(swarm_folder)
    print('++ INFO: New folder for swarm files created [%s]' % swarm_folder)
for behavior in behaviors:
    if not osp.exists(logdir_path[behavior]):
        os.makedirs(logdir_path[behavior])
        print('++ INFO: New folder for log files created [%s]' % logdir_path[behavior])

In [8]:
for behavior in behaviors:
    # Open the file
    swarm_file = open(swarm_path[behavior], "w")
    # Log the date and time when the SWARM file is created
    swarm_file.write('#Create Time: %s' % datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
    swarm_file.write('\n')
    # Insert comment line with SWARM command
    swarm_file.write('#swarm -f {swarm_path} -g 8 -t 8 -b 10 --time 00:15:00 --partition quick,norm --logdir {logdir_path}'.format(swarm_path=swarm_path[behavior],logdir_path=logdir_path[behavior]))
    swarm_file.write('\n')
    for n_iter in range(CPM_NITERATIONS):
        out_dir = osp.join(RESOURCES_CPM_DIR,'results',behavior,CORR_TYPE,E_SUMMARY_METRIC)
        if not osp.exists(out_dir):
            os.makedirs(out_dir)
        swarm_file.write("export BEHAV_PATH={behav_path} FC_PATH={fc_path} OUT_DIR={output_dir} BEHAVIOR={behavior} NUM_FOLDS={k} NUM_ITER={n_iter} CORR_TYPE={corr_type} E_SUMMARY_METRIC={e_summary_metric} E_THR_P={e_thr_p} VERBOSE=True RANDOMIZE_BEHAVIOR=False; sh {scripts_folder}/S15_cpm_batch.sh".format(scripts_folder = SCRIPTS_DIR,
                           behav_path       = osp.join(RESOURCES_CPM_DIR,'behav_data.csv'),
                           fc_path          = osp.join(RESOURCES_CPM_DIR,'fc_data.csv'),
                           output_dir       = out_dir,
                           behavior         = behavior,
                           k                = 10,
                           n_iter           = n_iter + 1,
                           corr_type        = CORR_TYPE,
                           e_summary_metric = E_SUMMARY_METRIC,
                           e_thr_p          = E_THR_P))
        swarm_file.write('\n')
    swarm_file.close()

***
## 2. Swarm jobs for the Null Distributions

In [9]:
#user specific folders
#=====================
swarm_folder = osp.join(PRJ_DIR,'SwarmFiles.{username}'.format(username=username))
logs_folder  = osp.join(PRJ_DIR,'Logs.{username}'.format(username=username))
swarm_path,logdir_path={},{} 
for behavior in behaviors:    
    swarm_path[behavior]  = osp.join(swarm_folder,'S15_CPM_null_{beh}.SWARM.sh'.format(beh=behavior))
    logdir_path[behavior] = osp.join(logs_folder, 'S15_CPM_null_{beh}.logs'.format(beh=behavior))

In [10]:
# create user specific folders if needed
# ======================================
if not osp.exists(swarm_folder):
    os.makedirs(swarm_folder)
    print('++ INFO: New folder for swarm files created [%s]' % swarm_folder)
for behavior in behaviors:
    if not osp.exists(logdir_path[behavior]):
        os.makedirs(logdir_path[behavior])
        print('++ INFO: New folder for log files created [%s]' % logdir_path[behavior])

In [11]:
for behavior in behaviors:
    # Open the file
    swarm_file = open(swarm_path[behavior], "w")
    # Log the date and time when the SWARM file is created
    swarm_file.write('#Create Time: %s' % datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
    swarm_file.write('\n')
    # Insert comment line with SWARM command
    swarm_file.write('#swarm -f {swarm_path} -g 8 -t 8 -b 50 --time 00:04:30 --partition quick,norm --logdir {logdir_path}'.format(swarm_path=swarm_path[behavior],logdir_path=logdir_path[behavior]))
    swarm_file.write('\n')
    for n_iter in range(CPM_NULL_NITERATIONS):
        out_dir = osp.join(RESOURCES_CPM_DIR,'null_distribution',behavior,CORR_TYPE,E_SUMMARY_METRIC)
        if not osp.exists(out_dir):
            os.makedirs(out_dir)
        swarm_file.write("export BEHAV_PATH={behav_path} FC_PATH={fc_path} OUT_DIR={output_dir} BEHAVIOR={behavior} NUM_FOLDS={k} NUM_ITER={n_iter} CORR_TYPE={corr_type} E_SUMMARY_METRIC={e_summary_metric} E_THR_P={e_thr_p} VERBOSE=True RANDOMIZE_BEHAVIOR=True; sh {scripts_folder}/S15_cpm_batch.sh".format(scripts_folder = SCRIPTS_DIR,
                           behav_path       = osp.join(RESOURCES_CPM_DIR,'behav_data.csv'),
                           fc_path          = osp.join(RESOURCES_CPM_DIR,'fc_data.csv'),
                           output_dir       = out_dir,
                           behavior         = behavior,
                           k                = 10,
                           n_iter           = n_iter + 1,
                           corr_type        = CORR_TYPE,
                           e_summary_metric = E_SUMMARY_METRIC,
                           e_thr_p          = E_THR_P))
        swarm_file.write('\n')
    swarm_file.close()