# Run fitting as a batch job

In [1]:
import numpy as np
import pandas as pd

from myconfig import DATASET_DIR, DATA_DIR

In [2]:
SIM01_DIR = DATASET_DIR/'01-simulation01'
conditions = pd.read_csv(SIM01_DIR/'conditions.tsv', sep='\t')

In [3]:
root_seed = 2147671945
colaml = 'singularity exec -B $(readlink -v data) singularity/sif/jupyter+colaml.6c01617 colaml'
cmd_template = '{colaml} fit mmm -i {infile} -o {outfile} --lmax {lmax} --ncat {ncat} --seed {seed} -q'

with open('240628-batch-job-array.txt', 'wt') as file:
    print('conditionID', 'data_rep', 'fit_rep', 'lmax', 'ncat', 'seed', 'infile', 'outfile', 'cmd', file=file, sep='\t')    
    for cond_idx, conditionID, lmax, ncat, ntips, nOGs in conditions.itertuples():
        for data_rep in range(1, 11):
            infile  = f'datasets/01-simulation01/{conditionID}/{conditionID}-rep{data_rep:02d}.json.gz'
            rng = np.random.default_rng([root_seed, cond_idx, data_rep])
            for fit_rep, seed in enumerate(rng.integers(1, 2**16, size=10), start=1):
                outfile = f'data/batch/01-simulation01/{conditionID}/{conditionID}-rep{data_rep:02d}.{fit_rep:02d}.json.gz'
                cmd = cmd_template.format(
                    colaml  = colaml, 
                    lmax    = lmax, 
                    ncat    = ncat,
                    seed    = seed, 
                    infile  = infile, 
                    outfile = outfile
                )
                print(conditionID, data_rep, fit_rep, lmax, ncat, seed, infile, outfile, cmd, file=file, sep='\t')

In [4]:
pd.read_csv('240628-batch-job-array.txt', sep='\t')

Unnamed: 0,conditionID,data_rep,fit_rep,lmax,ncat,seed,infile,outfile,cmd
0,cond01,1,1,2,3,9571,datasets/01-simulation01/cond01/cond01-rep01.j...,data/batch/01-simulation01/cond01/cond01-rep01...,singularity exec -B $(readlink -v data) singul...
1,cond01,1,2,2,3,52166,datasets/01-simulation01/cond01/cond01-rep01.j...,data/batch/01-simulation01/cond01/cond01-rep01...,singularity exec -B $(readlink -v data) singul...
2,cond01,1,3,2,3,52812,datasets/01-simulation01/cond01/cond01-rep01.j...,data/batch/01-simulation01/cond01/cond01-rep01...,singularity exec -B $(readlink -v data) singul...
3,cond01,1,4,2,3,19655,datasets/01-simulation01/cond01/cond01-rep01.j...,data/batch/01-simulation01/cond01/cond01-rep01...,singularity exec -B $(readlink -v data) singul...
4,cond01,1,5,2,3,22601,datasets/01-simulation01/cond01/cond01-rep01.j...,data/batch/01-simulation01/cond01/cond01-rep01...,singularity exec -B $(readlink -v data) singul...
...,...,...,...,...,...,...,...,...,...
1895,cond19,10,6,6,3,6183,datasets/01-simulation01/cond19/cond19-rep10.j...,data/batch/01-simulation01/cond19/cond19-rep10...,singularity exec -B $(readlink -v data) singul...
1896,cond19,10,7,6,3,19452,datasets/01-simulation01/cond19/cond19-rep10.j...,data/batch/01-simulation01/cond19/cond19-rep10...,singularity exec -B $(readlink -v data) singul...
1897,cond19,10,8,6,3,25105,datasets/01-simulation01/cond19/cond19-rep10.j...,data/batch/01-simulation01/cond19/cond19-rep10...,singularity exec -B $(readlink -v data) singul...
1898,cond19,10,9,6,3,34034,datasets/01-simulation01/cond19/cond19-rep10.j...,data/batch/01-simulation01/cond19/cond19-rep10...,singularity exec -B $(readlink -v data) singul...


In [5]:
!head -n-0 240628-fitting-sbatch*.sh

==> 240628-fitting-sbatch1.sh <==
#!/bin/bash
#SBATCH --array 1-1000%80
#SBATCH --output data/batch/01-simulation01/log/%x.stdxxx-%3a
set -eu -o pipefail

cd $SLURM_SUBMIT_DIR
eval `awk -v ARRAYID=$(( $SLURM_ARRAY_TASK_ID+1 )) -F "\t" 'NR==ARRAYID {print $9}' notebooks/eval-with-simulation/240628-batch-job-array.txt`

==> 240628-fitting-sbatch2.sh <==
#!/bin/bash
#SBATCH --array 1-900%80
#SBATCH --output data/batch/01-simulation01/log/%x.stdxxx-%3a
set -eu -o pipefail

cd $SLURM_SUBMIT_DIR
eval `awk -v ARRAYID=$(( $SLURM_ARRAY_TASK_ID+1001 )) -F "\t" 'NR==ARRAYID {print $9}' notebooks/eval-with-simulation/240628-batch-job-array.txt`


Run outside this singularity image (@top level):

`sbatch notebooks/eval-with-simulation/240628-fitting-sbatch1.sh`  
`sbatch notebooks/eval-with-simulation/240628-fitting-sbatch2.sh`

In [6]:
# remove empty log files
!find {DATA_DIR}/batch/01-simulation01/log -type f -empty -delete