# Run fitting as a batch job -- fish dataset / MAP estimation

In [1]:
import numpy as np
import pandas as pd

from myconfig import DATASET_DIR, DATA_DIR

In [2]:
conditions = pd.read_csv('conditions-main.tsv', sep='\t')

In [3]:
root_seed = 732198749832
colaml = 'singularity exec -B $(readlink -v data) singularity/sif/jupyter+colaml.6c01617 colaml'
cmd_template = dict(
    mmm    = '{colaml} fit {model} -i {infile} -o {outfile} --lmax {lmax} --ncat {ncat} --seed {seed} -q --map', 
    mirage = '{colaml} fit {model} -i {infile} -o {outfile} --lmax {lmax} --nmixt {nmixt} --seed {seed} -q --map',
    branch = '{colaml} fit {model} -i {infile} -o {outfile} --lmax {lmax} --seed {seed} -q --map', 
)

with open('240717-batch-job-array-fish-map.txt', 'wt') as file:
    print('conditionID', 'fold', 'fit_rep', 'model', 'lmax', 'nmixt', 'ncat', 'seed', 'infile', 'outfile', 'cmd', file=file, sep='\t')    
    for cidx, conditionID, model, lmax, nmixt, ncat in conditions.itertuples():
        for fold in range(1, 6):
            rng = np.random.default_rng([root_seed, cidx, fold])
            for fit_rep, seed in enumerate(rng.integers(1, 2**16, size=10), start=1):
                infile  = f'datasets/03-fish/cv5/fish-lmax{lmax}-train{fold}-v0.json.gz'
                outfile = f'data/batch/03-fish-map/{conditionID}/{conditionID}-train{fold}.{fit_rep:02d}.json.gz'
                cmd = cmd_template[model].format(
                    colaml  = colaml , 
                    model   = model  , 
                    lmax    = lmax   , 
                    nmixt   = nmixt  ,
                    ncat    = ncat   ,
                    seed    = seed   , 
                    infile  = infile , 
                    outfile = outfile
                )
                print(conditionID, fold, fit_rep, model, lmax, nmixt, ncat, seed, infile, outfile, cmd, file=file, sep='\t')

In [4]:
pd.read_csv('240717-batch-job-array-fish-map.txt', sep='\t')

Unnamed: 0,conditionID,fold,fit_rep,model,lmax,nmixt,ncat,seed,infile,outfile,cmd
0,cond01,1,1,mmm,2,-1,1,56790,datasets/03-fish/cv5/fish-lmax2-train1-v0.json.gz,data/batch/03-fish-map/cond01/cond01-train1.01...,singularity exec -B $(readlink -v data) singul...
1,cond01,1,2,mmm,2,-1,1,46944,datasets/03-fish/cv5/fish-lmax2-train1-v0.json.gz,data/batch/03-fish-map/cond01/cond01-train1.02...,singularity exec -B $(readlink -v data) singul...
2,cond01,1,3,mmm,2,-1,1,35323,datasets/03-fish/cv5/fish-lmax2-train1-v0.json.gz,data/batch/03-fish-map/cond01/cond01-train1.03...,singularity exec -B $(readlink -v data) singul...
3,cond01,1,4,mmm,2,-1,1,63411,datasets/03-fish/cv5/fish-lmax2-train1-v0.json.gz,data/batch/03-fish-map/cond01/cond01-train1.04...,singularity exec -B $(readlink -v data) singul...
4,cond01,1,5,mmm,2,-1,1,21308,datasets/03-fish/cv5/fish-lmax2-train1-v0.json.gz,data/batch/03-fish-map/cond01/cond01-train1.05...,singularity exec -B $(readlink -v data) singul...
...,...,...,...,...,...,...,...,...,...,...,...
1145,cond23,5,6,branch,3,-1,-1,43975,datasets/03-fish/cv5/fish-lmax3-train5-v0.json.gz,data/batch/03-fish-map/cond23/cond23-train5.06...,singularity exec -B $(readlink -v data) singul...
1146,cond23,5,7,branch,3,-1,-1,18542,datasets/03-fish/cv5/fish-lmax3-train5-v0.json.gz,data/batch/03-fish-map/cond23/cond23-train5.07...,singularity exec -B $(readlink -v data) singul...
1147,cond23,5,8,branch,3,-1,-1,35350,datasets/03-fish/cv5/fish-lmax3-train5-v0.json.gz,data/batch/03-fish-map/cond23/cond23-train5.08...,singularity exec -B $(readlink -v data) singul...
1148,cond23,5,9,branch,3,-1,-1,6235,datasets/03-fish/cv5/fish-lmax3-train5-v0.json.gz,data/batch/03-fish-map/cond23/cond23-train5.09...,singularity exec -B $(readlink -v data) singul...


In [5]:
!head -n-0 240717-fitting-sbatch*.sh

==> 240717-fitting-sbatch3map.sh <==
#!/bin/bash
#SBATCH --array 1-150%60
#SBATCH --output data/batch/03-fish-map/log/%x.stdxxx-%3a
set -eu -o pipefail
export OPENBLAS_NUM_THREADS=1 MKL_NUM_THREADS=1 OMP_NUM_THREADS=1 VECLIB_NUM_THREADS=1 NUMEXPR_NUM_THREADS=1

cd $SLURM_SUBMIT_DIR
eval `awk -v ARRAYID=$(( $SLURM_ARRAY_TASK_ID+1 )) -F "\t" 'NR==ARRAYID {print $NF}' notebooks/fit-to-real-data/240717-batch-job-array-fish-map.txt`

==> 240717-fitting-sbatch4map.sh <==
#!/bin/bash
#SBATCH --array 1-1000%80
#SBATCH --output data/batch/03-fish-map/log/%x.stdxxx-%3a
set -eu -o pipefail
export OPENBLAS_NUM_THREADS=1 MKL_NUM_THREADS=1 OMP_NUM_THREADS=1 VECLIB_NUM_THREADS=1 NUMEXPR_NUM_THREADS=1

cd $SLURM_SUBMIT_DIR
eval `awk -v ARRAYID=$(( $SLURM_ARRAY_TASK_ID+151 )) -F "\t" 'NR==ARRAYID {print $NF}' notebooks/fit-to-real-data/240717-batch-job-array-fish-map.txt`


Run outside this singularity image (@top level):

`sbatch notebooks/fit-to-real-data/240717-fitting-sbatch3map.sh`  
`sbatch notebooks/fit-to-real-data/240717-fitting-sbatch4map.sh`


In [6]:
# remove empty log files
!find {DATA_DIR}/batch/03-fish-map/log -type f -empty -delete