# Genesis4 Benchmark on Perlmutter (NERSC)

HPC systems often have special commands to invoke MPI jobs. NERSC uses slurm, which provides `srun` in place of `mpirun`.

In [None]:
from genesis.version4 import Genesis4
import os

The `MPI_RUN` class attribute needs to be replaced. For convenience, Genesis4 tries to detect NERSC:

In [None]:
Genesis4.MPI_RUN

In [None]:
Genesis4.WORKDIR

In [None]:
#Genesis4.COMMAND = '/path/to/genesis4'
#Genesis4.COMMAND_MPI = '/path/to/genesis4'
#WORKDIR = os.path.expandvars('$HOME/Scratch')
#Genesis4.MPI_RUN = 'srun -n {nproc} --ntasks-per-node {nproc} -c 1 {command_mpi}'

Additionally, the working directory must be on the scratch disk:

Instantiate with this workdir:

In [None]:
FILE = 'data/basic4/cu_hxr.in'
G = Genesis4(FILE)

Set up the benchmark

In [None]:
G.input['main'][6]['zstop'] = 40

Run with MPI

In [None]:
%%time
G.verbose=False
G.nproc = 8
G.run()

# Timing

In [None]:
MAX_CPUS = os.cpu_count()
MAX_CPUS

In [None]:
import time
def time1(nproc):
    t1 = time.time()
    G.verbose=False
    G.nproc = nproc
    G.run()
    dt = time.time() - t1
    return dt

#time1(MAX_CPUS)

In [None]:
%%time
nlist = []
tlist = []
n_cpu = MAX_CPUS
while n_cpu > 0:
    n = n_cpu
    nlist.append(n)
    dt = time1(n)
    tlist.append(dt)
    print(f'{n} cores, {dt:0.1f} s')
    n_cpu = n_cpu //2

In [None]:
nlist, tlist

# Some saved stats

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'

# Perlmutter
nlist, tlist = ([256, 128, 64, 32, 16, 8, 4, 2, 1],
 [11.364259004592896,
  8.756781578063965,
  15.518342971801758,
  27.61109709739685,
  48.553008794784546,
  95.51237463951111,
  182.06347823143005,
  361.24336671829224,
  556.2742402553558])
nlist = np.array(nlist)[::-1]
tlist = np.array(tlist)[::-1]

tref = tlist[0]

# M1 Max 
nlistmac, tlistmac =([8, 4, 2, 1],
 [38.151074171066284,
  75.41113114356995,
  146.3512842655182,
  285.11587405204773])
nlistmac = np.array(nlistmac)
tlistmac = np.array(tlistmac)



plt.plot(nlist, tlist/tref, marker='.', label='Genesis4 Perlmutter')
plt.plot(nlistmac, tlistmac/tref,  marker='.', label='Genesis4 M1 Max')
plt.plot(nlist,  1/(nlist/nlist[0]), '--', label='ideal')
plt.xscale('log')
plt.yscale('log')
plt.xlabel('n cores')
plt.ylabel('run time (normalized)')
plt.legend()