In [None]:
import histogram as bh
import numpy as np

### Testing setup

This is just a simple 1D and 2D dataset to use for performance runs

In [3]:
bins=(100, 100)
ranges=((-1,1),(-1,1))
bins = np.asarray(bins).astype(np.int64)
ranges = np.asarray(ranges).astype(np.float64)
    
edges = (np.linspace(*ranges[0,:], bins[0]+1),
         np.linspace(*ranges[1,:], bins[1]+1))

In [4]:
np.random.seed(42)
vals = np.random.normal(size=[2, 1_000_000]).astype(np.float32)
vals1d = np.random.normal(size=[10_000_000]).astype(np.float32)

#### Traditional 1D Numpy Histogram

This is reasonably optimized; it should provide good perforance.

In [6]:
%%timeit
h, _ = np.histogram(vals1d, bins=bins[0], range=ranges[0])

152 ms ± 4.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [22]:
h, e = np.histogram(vals1d, bins=bins[0], range=ranges[0])
h

array([49139, 49761, 50805, 51651, 52915, 53467, 54582, 55463, 56731,
       57515, 58204, 59331, 60454, 61279, 61484, 63097, 63870, 64696,
       65378, 66135, 66613, 68022, 68356, 69019, 70288, 70713, 71097,
       72622, 72585, 73233, 73779, 74213, 74787, 75335, 75858, 76667,
       77048, 76647, 77833, 78253, 78345, 78266, 78985, 79224, 79101,
       79834, 79703, 80245, 79816, 79830, 79760, 79438, 79972, 79972,
       79466, 79343, 78753, 78487, 78659, 78400, 78090, 77723, 76940,
       76995, 76693, 75812, 75232, 75182, 75009, 74176, 73549, 72582,
       72038, 71691, 70884, 70175, 69502, 68426, 67972, 67464, 66436,
       65355, 64569, 63118, 63364, 62000, 61065, 59846, 59184, 58408,
       57381, 56651, 55795, 54707, 54081, 53015, 51995, 51000, 49697,
       48852])

#### Boost histogram in 1D: Axis vector

In [24]:
%%timeit
hist = bh.dense_int_histogram(bh.axis.regular_axes([bh.axis.regular(bins[0], *ranges[0])]))
hist.fill(vals1d)

119 ms ± 1.79 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [25]:
hist = bh.dense_int_histogram(bh.axis.regular_axes([bh.axis.regular(bins[0], *ranges[0])]))
hist.fill(vals1d)
np.array([hist.at(i) for i in range(100)])

array([49139, 49761, 50805, 51651, 52915, 53467, 54582, 55463, 56731,
       57515, 58204, 59331, 60454, 61279, 61484, 63097, 63870, 64696,
       65378, 66135, 66613, 68022, 68356, 69019, 70288, 70713, 71097,
       72622, 72585, 73233, 73779, 74213, 74787, 75335, 75858, 76667,
       77048, 76647, 77833, 78253, 78345, 78266, 78985, 79224, 79101,
       79834, 79703, 80245, 79816, 79830, 79760, 79438, 79972, 79972,
       79466, 79343, 78753, 78487, 78659, 78400, 78090, 77723, 76940,
       76995, 76693, 75812, 75232, 75182, 75009, 74176, 73549, 72582,
       72038, 71691, 70884, 70175, 69502, 68426, 67972, 67464, 66436,
       65355, 64569, 63118, 63364, 62000, 61065, 59846, 59184, 58408,
       57381, 56651, 55795, 54707, 54081, 53015, 51995, 51000, 49697,
       48852])

#### Boost histogram in 1D: Dense Tuple

In [26]:
%%timeit
hist = bh.int_1d_histogram((bh.axis.regular(bins[0], *ranges[0]),))
hist.fill(vals1d)

89.3 ms ± 4.09 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [27]:
hist = bh.int_1d_histogram((bh.axis.regular(bins[0], *ranges[0]),))
hist.fill(vals1d)
np.array([hist.at(i) for i in range(100)])

array([49139, 49761, 50805, 51651, 52915, 53467, 54582, 55463, 56731,
       57515, 58204, 59331, 60454, 61279, 61484, 63097, 63870, 64696,
       65378, 66135, 66613, 68022, 68356, 69019, 70288, 70713, 71097,
       72622, 72585, 73233, 73779, 74213, 74787, 75335, 75858, 76667,
       77048, 76647, 77833, 78253, 78345, 78266, 78985, 79224, 79101,
       79834, 79703, 80245, 79816, 79830, 79760, 79438, 79972, 79972,
       79466, 79343, 78753, 78487, 78659, 78400, 78090, 77723, 76940,
       76995, 76693, 75812, 75232, 75182, 75009, 74176, 73549, 72582,
       72038, 71691, 70884, 70175, 69502, 68426, 67972, 67464, 66436,
       65355, 64569, 63118, 63364, 62000, 61065, 59846, 59184, 58408,
       57381, 56651, 55795, 54707, 54081, 53015, 51995, 51000, 49697,
       48852])

---

#### Traditional 2D Numpy histogram

Not as well optimized for regular filling.

In [37]:
%%timeit
H, *ledges = np.histogram2d(*vals, bins=bins, range=ranges)

115 ms ± 458 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [29]:
np.histogram2d(*vals, bins=bins, range=ranges)[0][0,0]

23.0

#### Boost histogram in 2D: Axis vector

In [32]:
%%timeit
hist = bh.dense_int_histogram(bh.axis.regular_axes([bh.axis.regular(bins[0], *ranges[0]),
                                                    bh.axis.regular(bins[1], *ranges[1])]))
hist.fill(vals)

22.3 ms ± 660 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [35]:
hist = bh.dense_int_histogram(bh.axis.regular_axes([bh.axis.regular(bins[0], *ranges[0]),
                                                    bh.axis.regular(bins[1], *ranges[1])]))
hist.fill(vals)
hist.at(0,0)

23

#### Boost histogram in 2D: Axis Tuple

In [36]:
%%timeit
hist = bh.int_2d_histogram((bh.axis.regular(bins[0], *ranges[0]),
                            bh.axis.regular(bins[1], *ranges[1])))
hist.fill(vals)

17.3 ms ± 82.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [38]:
hist = bh.int_2d_histogram((bh.axis.regular(bins[0], *ranges[0]),
                            bh.axis.regular(bins[1], *ranges[1])))
hist.fill(vals)
hist.at(0,0)

23