In [1]:
import boost_histogram as bh
import numpy as np
from numpy.testing import assert_allclose

In [2]:
import os
threads = os.cpu_count()
print(f"threads: {threads}")

threads: 4


Threaded fill test setup

In [3]:
from concurrent.futures import ThreadPoolExecutor

def chunks(N, vals):
    "Chunks vals into N chunks, returns chunks"
    frac = len(vals)//N
    for n in range(N):
        yield vals[n*frac:(n+1)*frac]

def threaded_fill(hist, threads, *data):
    def fun(*args):
        h = hist.copy().reset()
        return h.fill(*args)
    
    chunks_list = [chunks(threads, d) for d in data]
    
    with ThreadPoolExecutor(threads) as pool:
        results = pool.map(fun, *chunks_list)
        
    for h in results:
        hist += h

### Testing setup

This is just a simple 1D and 2D dataset to use for performance runs. The testing setup is the same as "MBP" in [this post](https://iscinumpy.gitlab.io/post/histogram-speeds-in-python/), a dual-core MacBook Pro 2015.

In [4]:
bins=(100, 100)
ranges=((-1,1),(-1,1))
bins = np.asarray(bins).astype(np.int64)
ranges = np.asarray(ranges).astype(np.float64)
    
edges = (np.linspace(*ranges[0,:], bins[0]+1),
         np.linspace(*ranges[1,:], bins[1]+1))

In [5]:
np.random.seed(42)
vals = np.random.normal(size=[2, 1_000_000]).astype(np.float32)
vals1d = np.random.normal(size=[10_000_000]).astype(np.float32)

#### Traditional 1D Numpy Histogram

This is reasonably optimized; it should provide good perforance.

In [6]:
answer, e = np.histogram(vals1d, bins=bins[0], range=ranges[0])

In [7]:
%%timeit
h, _ = np.histogram(vals1d, bins=bins[0], range=ranges[0])
assert_allclose(h, answer, atol=1)

191 ms ± 93.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


#### Boost histogram 1D


In [8]:
%%timeit
hist = bh.Histogram(bh.axis.Regular(bins[0], *ranges[0]), storage=bh.storage.Int64())
hist.fill(vals1d)
assert_allclose(hist, answer, atol=1)

89.7 ms ± 659 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


#### Boost histogram 1D numpy clone

In [9]:
%%timeit
h, _ = bh.numpy.histogram(vals1d, bins=bins[0], range=ranges[0])
assert_allclose(h, answer, atol=1)

93.3 ms ± 1.15 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


#### Boost histogram in 1D, threaded

In [10]:
%%timeit
hist = bh.Histogram(bh.axis.Regular(bins[0], *ranges[0]), storage=bh.storage.Int64())

threaded_fill(hist, threads, vals1d)
assert_allclose(hist, answer, atol=1)

39.9 ms ± 297 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


---

#### Traditional 2D Numpy histogram

Not as well optimized for regular filling.

In [11]:
answer2, *ledges = np.histogram2d(*vals, bins=bins, range=ranges)

In [12]:
%%timeit
H, *ledges = np.histogram2d(*vals, bins=bins, range=ranges)
assert_allclose(H, answer2, atol=1)

103 ms ± 807 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


#### Boost histogram in 2D

In [13]:
%%timeit
hist = bh.Histogram(bh.axis.Regular(bins[0], *ranges[0]),
                    bh.axis.Regular(bins[1], *ranges[1]))
hist.fill(*vals)
assert_allclose(hist, answer2, atol=1)

18.9 ms ± 417 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


#### Boost histogram 2D numpy clone

In [14]:
%%timeit
H, *ledges = bh.numpy.histogram2d(*vals, bins=bins, range=ranges)
assert_allclose(H, answer2, atol=1)

19.7 ms ± 548 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


#### Boost histogram in 2D, threaded

In [15]:
%%timeit
hist = bh.Histogram(bh.axis.Regular(bins[0], *ranges[0]),
                    bh.axis.Regular(bins[1], *ranges[1]))

threaded_fill(hist, threads, *vals)
assert_allclose(hist, answer2, atol=1)

9.4 ms ± 86.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
