In [1]:
import numpy as np
import boost_histogram as bh
from concurrent.futures import ThreadPoolExecutor
from functools import reduce
from operator import add

This notebook experiments with parallel filling.

In [2]:
hist_linear = bh.histogram(bh.axis.regular(1000,0,1))
hist_badlin = bh.histogram(bh.axis.regular(1000,0,1))
hist_ltomic = bh.histogram(bh.axis.regular(1000,0,1),
                          storage=bh.storage.AtomicInt())
hist_atomic = bh.histogram(bh.axis.regular(1000,0,1),
                                storage=bh.storage.AtomicInt())

print('hist_linear:', type(hist_linear))
print('hist_atomic:', type(hist_atomic))

vals = np.random.rand(1_000_000)

hist_linear: <class 'histogram.hist.regular_int_1d'>
hist_atomic: <class 'histogram.hist.regular_int_atomic_1d'>


This is a traditional fill.

In [3]:
%%time
hist_linear(vals)

CPU times: user 284 ms, sys: 3.85 ms, total: 288 ms
Wall time: 295 ms


This is a single threaded atomic fill.

In [4]:
%%time
hist_ltomic(vals)

CPU times: user 290 ms, sys: 4.76 ms, total: 295 ms
Wall time: 301 ms


This is a threaded fill (storage not threadsafe, so will get the wrong result; just for comparison)

In [5]:
%%time
with ThreadPoolExecutor(4) as pool:
    for i in range(4):
        v = pool.submit(hist_badlin, vals[i*250_000:(i+1)*250_000])

CPU times: user 505 ms, sys: 4.76 ms, total: 509 ms
Wall time: 148 ms


In [6]:
compare = np.asarray(hist_linear) == np.asarray(hist_badlin)
print(compare, np.all(compare))

[ True  True False ...  True  True  True] False


This is a threaded fill, this time with atomics

In [7]:
%%time
with ThreadPoolExecutor(4) as pool:
    for i in range(4):
        pool.submit(hist_atomic, vals[i*250_000:(i+1)*250_000])

CPU times: user 511 ms, sys: 4.91 ms, total: 516 ms
Wall time: 154 ms


In [8]:
compare = np.asarray(hist_linear) == np.asarray(hist_atomic)
print(compare, np.all(compare))

[ True  True  True ...  True  True  True] True


This makes four seperate histograms, then fills them and adds at the end.

In [9]:
def fun(x):
    hist = bh.histogram(bh.axis.regular(1000,0,1))
    hist(x)
    return hist

In [10]:
%%time
with ThreadPoolExecutor(4) as pool:
    for i in range(4):
        results = pool.map(fun, (vals[i*250_000:(i+1)*250_000] for i in range(4)))

CPU times: user 1.99 s, sys: 15.6 ms, total: 2.01 s
Wall time: 599 ms


In [11]:
hist_quad = reduce(add, results)

In [12]:
compare = np.asarray(hist_linear) == np.asarray(hist_quad)
print(compare, np.all(compare))

[ True  True  True ...  True  True  True] True
