# Benchmarking numba vs fortran with multiprocessing

In [1]:
import multiprocessing as mp

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import timml

In [2]:
mp.cpu_count()  # includes hyper-threading count

12

In [3]:
# 0 is original ml.solve()
# all other numbers are passed to solve_mp(nproc=X)
nproc_list = [0, 2, 4, 6, 12]

In [4]:
def cbg(val):
    color = mpl.colors.rgb2hex(cmap(norm(val)))
    return f"background-color: {color}"


def ctxt(val):
    bg = cmap(norm(val))
    c = "White" if np.mean(bg[:3]) < 0.4 else "Black"
    return f"color: {c}"

## Model 1: LineSinks

In [5]:
def model1():
    # Create basic model elements
    ml = timml.ModelMaq(
        kaq=[2, 6, 4], z=[165, 140, 120, 80, 60, 0], c=[2000, 20000], npor=0.3
    )
    timml.Constant(ml, xr=20000, yr=20000, hr=175, layer=0)
    timml.CircAreaSink(ml, xc=10000, yc=10000, R=15000, N=0.0002, layer=0)
    timml.Well(ml, xw=10000, yw=8000, Qw=1000, rw=0.3, layers=0, label="well 1")
    timml.Well(ml, xw=12100, yw=10700, Qw=5000, rw=0.3, layers=2, label="well 2")
    timml.Well(ml, xw=10000, yw=4600, Qw=5000, rw=0.3, layers=[1, 2], label="maq well")
    #
    xy1 = [
        (833, 14261),
        (3229, 14843),
        (6094, 15885),
        (8385, 15677),
        (10781, 14895),
        (12753, 14976),
    ]
    hls1 = [176, 166]
    xy2 = [
        (356, 6976),
        (4043, 7153),
        (6176, 8400),
        (9286, 9820),
        (12266, 9686),
        (15066, 9466),
    ]
    hls2 = [174, 162]
    xy3 = [
        (1376, 1910),
        (4176, 2043),
        (6800, 1553),
        (9953, 2086),
        (14043, 2043),
        (17600, 976),
    ]
    hls3 = [170, 156]
    xy4 = [
        (9510, 19466),
        (12620, 17376),
        (12753, 14976),
        (13020, 12176),
        (15066, 9466),
        (16443, 7910),
        (17510, 5286),
        (17600, 976),
    ]
    hls4 = [170, np.nan, 166, np.nan, 162, np.nan, np.nan, 156]

    timml.HeadLineSinkString(ml, xy=xy1, hls=hls1, layers=0)
    timml.HeadLineSinkString(ml, xy=xy2, hls=hls2, layers=0)
    timml.HeadLineSinkString(ml, xy=xy3, hls=hls3, layers=0)
    timml.HeadLineSinkString(ml, xy=xy4, hls=hls4, layers=0)

    return ml

In [6]:
df_ls = pd.DataFrame(index=nproc_list, columns=["numba", "fortran"])

timml.bessel.set_bessel_method(method="numba")
print(timml.bessel.bessel)  # print module name to check if switch works

for nproc in df_ls.index:
    ml = model1()

    if nproc == 0:
        t_n = %timeit -o ml.solve(silent=True)
    else:
        t_n = %timeit -o ml.solve_mp(nproc=nproc, silent=True)

    df_ls.loc[nproc, "numba"] = t_n.average

# %%
# Fortran
timml.bessel.set_bessel_method(method="fortran")
print(timml.bessel.bessel)  # print module name to check if switch works

for nproc in df_ls.index:
    ml = model1()

    if nproc == 0:
        t_f = %timeit -o ml.solve(silent=True)
    else:
        t_f = %timeit -o ml.solve_mp(nproc=nproc, silent=True)

    df_ls.loc[nproc, "fortran"] = t_f.average

<module 'timml.besselaesnumba.besselaesnumba' from '/home/david/Github/timml/timml/besselaesnumba/besselaesnumba.py'>
63.1 ms ± 7.82 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
56.6 ms ± 3.41 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
66.8 ms ± 4.43 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
79.1 ms ± 3.59 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
110 ms ± 4.11 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
<fortran object>
60.8 ms ± 7.88 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
85.8 ms ± 25.7 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
66.1 ms ± 3.59 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
75.1 ms ± 2.46 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
108 ms ± 3.43 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [7]:
cmap = plt.cm.get_cmap("RdYlGn_r")
norm = mpl.colors.LogNorm(vmin=50, vmax=100)

df_ls.index.name = "nproc"
(df_ls * 1e3).style.applymap(cbg).applymap(ctxt)

Unnamed: 0_level_0,numba,fortran
nproc,Unnamed: 1_level_1,Unnamed: 2_level_1
0,63.098863,60.770613
2,56.611758,85.849108
4,66.751633,66.122373
6,79.098145,75.050343
12,109.522807,108.247371


In [8]:
df_ls["numba"] / df_ls["fortran"]

nproc
0     1.038312
2     0.659433
4     1.009517
6     1.053934
12    1.011783
dtype: object

In [9]:
# speedup with multiprocessing?  ->  no
1 / (df_ls / df_ls.iloc[0])

Unnamed: 0_level_0,numba,fortran
nproc,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1.0,1.0
2,1.114589,0.707877
4,0.945278,0.919063
6,0.797729,0.809731
12,0.576125,0.561405


## Model 2: Building Pit

In [10]:
def model2():
    kh = 2.0  # m/day
    f_ani = 0.05  # anisotropy factor
    kv = f_ani * kh
    ctop = 800.0  # resistance top leaky layer in days

    ztop = 0.0  # surface elevation
    # z_well = -13.0  # end depth of the wellscreen
    z_dw = -15.0  # bottom elevation of sheetpile wall
    z_extra = z_dw - 15.0  # extra layer
    zbot = -60.0  # bottom elevation of the model

    length = 40.0  # length building pit in m
    width = 30.0  # width building pit in m

    h_bem = -6.21  # m
    offset = 5.0  # distance groundwater extraction element from sheetpiles in m

    xy = [
        (-length / 2, -width / 2),
        (length / 2, -width / 2),
        (length / 2, width / 2),
        (-length / 2, width / 2),
        (-length / 2, -width / 2),
    ]

    z = np.array([ztop + 1, ztop, z_dw, z_dw, z_extra, z_extra, zbot])
    dz = z[1::2] - z[2::2]
    kh_arr = kh * np.ones(dz.shape)
    c = np.r_[np.array([ctop]), dz[:-1] / (2 * kv) + dz[1:] / (2 * kv)]

    ml = timml.ModelMaq(kaq=kh_arr, z=z, c=c, topboundary="semi", hstar=0.0)

    layers = np.arange(np.sum(z_dw <= ml.aq.zaqbot))
    last_lay_dw = layers[-1]

    timml.BuildingPit(
        ml,
        xy,
        kaq=kh_arr,
        z=z[1:],
        topboundary="conf",
        c=c[1:],
        order=4,
        ndeg=3,
        layers=layers,
    )

    timml.HeadLineSink(
        ml,
        x1=-length / 2 + offset,
        y1=width / 2 - offset,
        x2=length / 2 - offset,
        y2=width / 2 - offset,
        hls=h_bem,
        layers=np.arange(last_lay_dw + 1),
    )
    timml.HeadLineSink(
        ml,
        x1=-length / 2 + offset,
        y1=0,
        x2=length / 2 - offset,
        y2=0,
        hls=h_bem,
        layers=np.arange(last_lay_dw + 1),
    )
    timml.HeadLineSink(
        ml,
        x1=-length / 2 + offset,
        y1=-width / 2 + offset,
        x2=length / 2 - offset,
        y2=-width / 2 + offset,
        hls=h_bem,
        layers=np.arange(last_lay_dw + 1),
    )

    return ml

In [11]:
df_bp = pd.DataFrame(index=nproc_list, columns=["numba", "fortran"])

timml.bessel.set_bessel_method(method="numba")
print(timml.bessel.bessel)

for nproc in df_bp.index:
    ml = model2()

    if nproc == 0:
        t_n = %timeit -o ml.solve(silent=True)
    else:
        t_n = %timeit -o ml.solve_mp(nproc=nproc, silent=True)

    df_bp.loc[nproc, "numba"] = t_n.average

timml.bessel.set_bessel_method(method="fortran")
print(timml.bessel.bessel)

for nproc in df_bp.index:
    ml = model2()

    if nproc == 0:
        t_f = %timeit -o ml.solve(silent=True)
    else:
        t_f = %timeit -o ml.solve_mp(nproc=nproc, silent=True)

    df_bp.loc[nproc, "fortran"] = t_f.average

<module 'timml.besselaesnumba.besselaesnumba' from '/home/david/Github/timml/timml/besselaesnumba/besselaesnumba.py'>
683 ms ± 24.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
414 ms ± 13.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
273 ms ± 7.52 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
267 ms ± 27 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
267 ms ± 13.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
<fortran object>
456 ms ± 6.69 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
255 ms ± 10.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
202 ms ± 9.76 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
199 ms ± 6.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
224 ms ± 10.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
norm = mpl.colors.LogNorm(vmin=200, vmax=600)

df_bp.index.name = "nproc"
(df_bp * 1e3).style.applymap(cbg).applymap(ctxt)

Unnamed: 0_level_0,numba,fortran
nproc,Unnamed: 1_level_1,Unnamed: 2_level_1
0,682.813134,455.737164
2,413.56049,255.041644
4,272.796072,202.45203
6,266.875414,198.993623
12,267.26177,223.631861


In [13]:
df_bp["numba"] / df_bp["fortran"]

nproc
0     1.498261
2     1.621541
4      1.34746
6     1.341125
12    1.195097
dtype: object

In [14]:
# speedup with multiprocessing? --> yes
1 / (df_bp / df_bp.iloc[0])

Unnamed: 0_level_0,numba,fortran
nproc,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1.0,1.0
2,1.65106,1.786913
4,2.503017,2.251087
6,2.558546,2.29021
12,2.554848,2.03789


## Model 3: 2 inhomogeneities

In [15]:
def model3():
    ml = timml.ModelMaq(kaq=[10, 20], z=[20, 0, -10, -30], c=[4000])
    xy1 = [
        (0, 600),
        (-100, 400),
        (-100, 200),
        (100, 100),
        (300, 100),
        (500, 100),
        (700, 300),
        (700, 500),
        (600, 700),
        (400, 700),
        (200, 600),
    ]
    timml.PolygonInhomMaq(
        ml,
        xy=xy1,
        kaq=[2, 80],
        z=[20, 0, -10, -30],
        c=[500],
        topboundary="conf",
        order=4,
        ndeg=2,
    )
    xy2 = [
        (0, 600),
        (200, 600),
        (400, 700),
        (400, 900),
        (200, 1100),
        (0, 1000),
        (-100, 800),
    ]
    timml.PolygonInhomMaq(
        ml,
        xy=xy2,
        kaq=[2, 8],
        z=[20, 0, -10, -30],
        c=[50],
        topboundary="conf",
        order=4,
        ndeg=2,
    )
    timml.Constant(ml, xr=1000, yr=0, hr=40)
    timml.Uflow(ml, slope=0.002, angle=-45)
    timml.Well(ml, xw=400, yw=400, Qw=500, rw=0.2, layers=0)
    return ml

In [16]:
df = pd.DataFrame(index=nproc_list, columns=["numba", "fortran"])

timml.bessel.set_bessel_method(method="numba")
print(timml.bessel.bessel)

for nproc in df.index:
    ml = model3()

    if nproc == 0:
        t_n = %timeit -o ml.solve(silent=True)
    else:
        t_n = %timeit -o ml.solve_mp(nproc=nproc, silent=True)

    df.loc[nproc, "numba"] = t_n.average

timml.bessel.set_bessel_method(method="fortran")
print(timml.bessel.bessel)

for nproc in df.index:
    ml = model3()

    if nproc == 0:
        t_f = %timeit -o ml.solve(silent=True)
    else:
        t_f = %timeit -o ml.solve_mp(nproc=nproc, silent=True)

    df.loc[nproc, "fortran"] = t_f.average

<module 'timml.besselaesnumba.besselaesnumba' from '/home/david/Github/timml/timml/besselaesnumba/besselaesnumba.py'>
1.29 s ± 92.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
789 ms ± 65.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
504 ms ± 14.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
496 ms ± 56.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
455 ms ± 10.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
<fortran object>
964 ms ± 11.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
589 ms ± 14.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
406 ms ± 26.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
456 ms ± 77.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
419 ms ± 19.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
norm = mpl.colors.LogNorm(vmin=300, vmax=1300)

df.index.name = "nproc"
(df * 1e3).style.applymap(cbg).applymap(ctxt)

Unnamed: 0_level_0,numba,fortran
nproc,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1292.377238,964.076019
2,789.374683,589.482877
4,503.717363,406.243932
6,495.830997,455.830646
12,454.623364,418.791852


In [18]:
df["numba"] / df["fortran"]

nproc
0     1.340535
2     1.339097
4     1.239938
6     1.087753
12    1.085559
dtype: object

In [19]:
# speedup with multiprocessing?  --> yes
1 / (df / df.iloc[0])

Unnamed: 0_level_0,numba,fortran
nproc,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1.0,1.0
2,1.637216,1.635461
4,2.565679,2.373146
6,2.606487,2.114987
12,2.842743,2.302041
