# Benchmarking

## machine info

In [1]:
!lscpu | grep -i -e 'Model name' -e 'Socket(s)' -e 'numa'

Model name:                      Intel(R) Xeon(R) CPU E5-4627 v3 @ 2.60GHz
Socket(s):                       4
NUMA node(s):                    4
NUMA node0 CPU(s):               0-9
NUMA node1 CPU(s):               10-19
NUMA node2 CPU(s):               20-29
NUMA node3 CPU(s):               30-39


In [2]:
!free -w

               total        used        free      shared     buffers       cache   available
Mem:       528272404    17958956    86963236       27136       13844   423336368   507286632
Swap:        2097148     1977548      119600


## run

`singularity exec --home $(pwd):/home/jovyan singularity/sif/jupyter+colaml.6c01617 python3 notebooks/exec-time/240820-batch.py`


In [3]:
!cat 240820-batch.py

import os
import shlex
import subprocess

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from myconfig import DATASET_DIR, ROOT_DIR
SIM01_DIR = DATASET_DIR/'01-simulation01'
OUT_FILE = ROOT_DIR/'results'/'simulation-exec-time.jsonl'

os.chdir(os.path.dirname(__file__))

# clear files
subprocess.getoutput(f'> {OUT_FILE}')
subprocess.getoutput('> 240820-bench.log')

# machine info
subprocess.getoutput("lscpu | grep -i -e 'Model name' -e 'Socket(s)' -e 'numa' >> 240820-bench.log")
subprocess.getoutput("free -w >> 240820-bench.log")

conditions = pd.read_csv(SIM01_DIR/'conditions.tsv', sep='\t')
conditions_rep = pd.merge(
    conditions.assign(dummy=0), 
    pd.Series(range(1, 11), name='datarep').to_frame().assign(dummy=0), 
    on='dummy'
).drop(columns='dummy')

# randomize execution order　to average out external factors 
# such as CPU temperature and competing tasks
perm_seed = 4242424242
run_order = pd.concat([
    conditions_rep.sample(frac=1, random_state=np.arra

In [4]:
!cat benchmark.py

import os

# limit numpy/scipy threads
os.environ['OPENBLAS_NUM_THREADS'] \
= os.environ['MKL_NUM_THREADS'] \
= os.environ['VECLIB_NUM_THREADS'] \
= os.environ['OMP_NUM_THREADS'] \
= os.environ['NUMEXPR_NUM_THREADS'] \
= '1'

# limit numba threads
os.environ['NUMBA_DISABLE_JIT'] = '1'
os.environ['NUMBA_NUM_THREADS'] = '1'

# process on a single NUMA node
import psutil
p = psutil.Process(os.getpid())
p.cpu_affinity(cpus=range(10, 20))

# make sure numpy is working on a single thread
from threadpoolctl import threadpool_limits
threadpool_limits(1)

import argparse
import json
import time
import traceback

import numba
import numpy as np

from colaml import *
from colaml.__main__ import phytbl_from_json
from myconfig import DATASET_DIR, ROOT_DIR
SIM01_DIR = DATASET_DIR/'01-simulation01'

parser = argparse.ArgumentParser()
parser.add_argument('dataID' , type=int)
parser.add_argument('conditionID')
parser.add_argument('lmax'   , type=int)
parser.add_argument('ncat'   , type=int)
parser.add_

In [5]:
!cat 240820-bench.log | grep -ve '^2024-08-'

Model name:                      Intel(R) Xeon(R) CPU E5-4627 v3 @ 2.60GHz
Socket(s):                       4
NUMA node(s):                    4
NUMA node0 CPU(s):               0-9
NUMA node1 CPU(s):               10-19
NUMA node2 CPU(s):               20-29
NUMA node3 CPU(s):               30-39
               total        used        free      shared     buffers       cache   available
Mem:       528272404    13629148    91705540       27156       13844   422923872   511616488
Swap:        2097148     1977548      119600
