In [1]:
import numpy as np, os, sys, subprocess
import matplotlib.pyplot as plt

In [2]:
# write sbatch header outlining job and requirements
# need 10GB to run high/lowT only 
# 15 GB to run hybrid
# need 60GB to run equilibrium training

def write_header(prefix):
    
    sbatch_header = '''#!/bin/bash

#SBATCH --mail-type=ALL
#SBATCH --mail-user=mikejones@uchicago.edu

#SBATCH --job-name=''' + prefix + '''

#SBATCH --output=''' + prefix + '''.out
##SBATCH --partition=broadwl
#SBATCH --partition=gm4-pmext
##SBATCH --partition=fela
#SBATCH --nodes=1 # SET NUM NODES 
#SBATCH --ntasks-per-node=1 # SETS NUM MPI RANKS
#SBATCH --cpus-per-task=10 # SET NUM THREADS (cores)
#SBATCH --mem=60GB # REQUEST MEMORY
##SBATCH --constraint=gold-6248
#SBATCH --qos=gm4-cpu

'''
    return sbatch_header 

def write_header_gpu(prefix):
    
    sbatch_header = '''#!/bin/bash

#SBATCH --mail-type=ALL
#SBATCH --mail-user=mikejones@uchicago.edu

#SBATCH --job-name=GPU''' + prefix + '''
#SBATCH --output=''' + prefix + '''.out   

##SBATCH --partition=fela
#SBATCH --partition=gm4-pmext
#SBATCH --nodes=1 # SET NUM NODES
#SBATCH --gres=gpu:1 # SET NUM GPUS
#SBATCH --ntasks-per-node=1 # SETS NUM MPI RANKS (1 PER GPU)
#SBATCH --cpus-per-task=10 # SET NUM THREADS (cores)
#SBATCH --mem=60GB # REQUEST MEMORY
#SBATCH --qos=gm4

module load cuda
'''
    return sbatch_header 

# write sbatch text given hyper parameters 
def write_runline(load_path, load_npy, n_trajs, n_frames, hde_name, 
            hde_lag, hde_nsm, nClusterCentres, ClusterStride, nMacroStates, save_dir):

    #populate run_line with hyperparams
    run_line =  ('python MSM_pipeline_anyseq.py' + # python run_SnRV_equ_test.py
                ' --load_path ' + load_path + 
                ' --load_npy ' + load_npy + 
                ' --n_traj ' + str(n_trajs) + 
                ' --n_frames ' + str(n_frames) +
                ' --hde_name ' + hde_name +
                ' --hde_lag ' + str(hde_lag) +
                ' --hde_nsm ' + str(hde_nsm) +
                ' --nClusterCentres ' + str(nClusterCentres) +
                ' --ClusterStride ' + str(ClusterStride) +
                ' --nMacroStates ' + str(nMacroStates) + 
                ' --save_dir ' + save_dir + '\n\n')
 
    return run_line

In [3]:
## write and run sbatch

def run_sbatch(name,sbatch_final):
    filename = name.replace('npy', 'sbatch')
    file = open(filename,"w+")

    file.write(sbatch_final)
    file.close()

    subprocess.call(['sbatch', filename])
    print ('ran ' + filename)


In [4]:
# list of parameters corresponding to each sequences
seq_list = [('GC-end-fix_313K_112-150001-190.npy', '100x100k_permute_lag_20_sm_5_ep_30_GC-end-fix_313K_112-150001-190.pkl', 5, 6),
            ('GC-end-fix_313K_112-150001-190.npy', '100x100k_permute_lag_20_sm_5_ep_30_GC-end-fix_313K_112-150001-190.pkl', 5, 5),
            ('GC-end-fix_313K_112-150001-190.npy', '100x100k_permute_lag_20_sm_5_ep_30_GC-end-fix_313K_112-150001-190.pkl', 4, 5),
            ('AT-all_306K_112-165001-190.npy', '100x100k_permute_lag_20_sm_5_ep_30_AT-all_306K_112-165001-190.pkl', 5, 6),
            ('GC-core_321K_112-165001-190.npy', '100x100k_permute_lag_20_sm_3_ep_30_GC-core_321K_112-165001-190.pkl', 3, 3),
            ('GC-mid-fix_319K_112-150001-190.npy', '100x100k_permute_lag_20_sm_2_ep_30_GC-mid-fix_319K_112-150001-190.pkl', 2, 2),
            ('GC-core_321K_112-165001-190.npy', '100x100k_permute_lag_20_sm_3_ep_30_GC-core_321K_112-165001-190.pkl', 3, 4),
            ('GC-mid-fix_319K_112-150001-190.npy', '100x100k_permute_lag_20_sm_2_ep_30_GC-mid-fix_319K_112-150001-190.pkl', 2, 3)]
'''
seq_list = [('GC-mid-fix_319K_112-150001-190.npy', '100x100k_permute_lag_20_sm_2_ep_30_GC-mid-fix_319K_112-150001-190.pkl', 3, 3),
           ('GC-mid-fix_319K_112-150001-190.npy', '100x100k_permute_lag_20_sm_2_ep_30_GC-mid-fix_319K_112-150001-190.pkl', 3, 4)]
'''

for seq in seq_list:
    
    # define arguments to pass into msm
    load_path= '/home/mikejones/scratch-midway2/srv/dna_data/'
    load_npy=seq[0]
    n_trajs=100
    n_frames=100001
    hde_name=seq[1]
    hde_lag=40
    hde_nsm=seq[2]
    nClusterCentres=200
    ClusterStride=10
    nMacroStates=seq[3]
    save_dir='./save_metatrajs/lag_40/'
    
    for nClusterCentres in [200]:
    
        # write sbatch file
        sbatch_text = write_header_gpu(load_npy)

        sbatch_text = sbatch_text + write_runline(load_path, load_npy, n_trajs, n_frames, hde_name, 
                hde_lag, hde_nsm, nClusterCentres, ClusterStride, nMacroStates, save_dir)

        # execute job
        run_sbatch(load_npy, sbatch_text)

ran GC-end-fix_313K_112-150001-190.sbatch
ran GC-end-fix_313K_112-150001-190.sbatch
ran GC-end-fix_313K_112-150001-190.sbatch
ran AT-all_306K_112-165001-190.sbatch
ran GC-core_321K_112-165001-190.sbatch
ran GC-mid-fix_319K_112-150001-190.sbatch
ran GC-core_321K_112-165001-190.sbatch
ran GC-mid-fix_319K_112-150001-190.sbatch
