In [1]:
import histogram as bh
import numpy as np

### Testing setup

This is just a simple 1D and 2D dataset to use for performance runs. The testing setup is the same as "MBP" in [this post](https://iscinumpy.gitlab.io/post/histogram-speeds-in-python/).

In [2]:
bins=(100, 100)
ranges=((-1,1),(-1,1))
bins = np.asarray(bins).astype(np.int64)
ranges = np.asarray(ranges).astype(np.float64)
    
edges = (np.linspace(*ranges[0,:], bins[0]+1),
         np.linspace(*ranges[1,:], bins[1]+1))

In [3]:
np.random.seed(42)
vals = np.random.normal(size=[2, 1_000_000]).astype(np.float32)
vals1d = np.random.normal(size=[10_000_000]).astype(np.float32)

#### Traditional 1D Numpy Histogram

This is reasonably optimized; it should provide good perforance.

In [4]:
%%timeit
h, _ = np.histogram(vals1d, bins=bins[0], range=ranges[0])

148 ms ± 1.55 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [5]:
h, e = np.histogram(vals1d, bins=bins[0], range=ranges[0])
h

array([49139, 49761, 50805, 51651, 52915, 53467, 54582, 55463, 56731,
       57515, 58204, 59331, 60454, 61279, 61484, 63097, 63870, 64696,
       65378, 66135, 66613, 68022, 68356, 69019, 70288, 70713, 71097,
       72622, 72585, 73233, 73779, 74213, 74787, 75335, 75858, 76667,
       77048, 76647, 77833, 78253, 78345, 78266, 78985, 79224, 79101,
       79834, 79703, 80245, 79816, 79830, 79760, 79438, 79972, 79972,
       79466, 79343, 78753, 78487, 78659, 78400, 78090, 77723, 76940,
       76995, 76693, 75812, 75232, 75182, 75009, 74176, 73549, 72582,
       72038, 71691, 70884, 70175, 69502, 68426, 67972, 67464, 66436,
       65355, 64569, 63118, 63364, 62000, 61065, 59846, 59184, 58408,
       57381, 56651, 55795, 54707, 54081, 53015, 51995, 51000, 49697,
       48852])

#### Boost histogram: General vector

This can take any axis type

In [6]:
%%timeit
hist = bh.hist.any_int([bh.axis.regular(bins[0], *ranges[0])])
hist(vals1d)

164 ms ± 4.63 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [7]:
hist = bh.hist.regular_int([bh.axis.regular(bins[0], *ranges[0])])
hist(vals1d)
np.asarray(hist)

array([1587613,   49139,   49761,   50805,   51651,   52915,   53467,
         54582,   55463,   56731,   57515,   58204,   59331,   60454,
         61279,   61484,   63097,   63870,   64696,   65378,   66135,
         66613,   68022,   68356,   69019,   70288,   70713,   71097,
         72622,   72585,   73233,   73779,   74213,   74787,   75335,
         75858,   76667,   77048,   76647,   77833,   78253,   78345,
         78266,   78985,   79224,   79101,   79834,   79703,   80245,
         79816,   79830,   79760,   79438,   79972,   79972,   79466,
         79343,   78753,   78487,   78659,   78400,   78090,   77723,
         76940,   76995,   76693,   75812,   75232,   75182,   75009,
         74176,   73549,   72582,   72038,   71691,   70884,   70175,
         69502,   68426,   67972,   67464,   66436,   65355,   64569,
         63118,   63364,   62000,   61065,   59846,   59184,   58408,
         57381,   56651,   55795,   54707,   54081,   53015,   51995,
         51000,   49

In [8]:
np.asarray(hist)

array([1587613,   49139,   49761,   50805,   51651,   52915,   53467,
         54582,   55463,   56731,   57515,   58204,   59331,   60454,
         61279,   61484,   63097,   63870,   64696,   65378,   66135,
         66613,   68022,   68356,   69019,   70288,   70713,   71097,
         72622,   72585,   73233,   73779,   74213,   74787,   75335,
         75858,   76667,   77048,   76647,   77833,   78253,   78345,
         78266,   78985,   79224,   79101,   79834,   79703,   80245,
         79816,   79830,   79760,   79438,   79972,   79972,   79466,
         79343,   78753,   78487,   78659,   78400,   78090,   77723,
         76940,   76995,   76693,   75812,   75232,   75182,   75009,
         74176,   73549,   72582,   72038,   71691,   70884,   70175,
         69502,   68426,   67972,   67464,   66436,   65355,   64569,
         63118,   63364,   62000,   61065,   59846,   59184,   58408,
         57381,   56651,   55795,   54707,   54081,   53015,   51995,
         51000,   49

#### Boost histogram in 1D: Axis vector

In [9]:
%%timeit
hist = bh.hist.regular_int([bh.axis.regular(bins[0], *ranges[0])])
hist(vals1d)

97.6 ms ± 9.47 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [10]:
hist = bh.hist.regular_int([bh.axis.regular(bins[0], *ranges[0])])
hist(vals1d)
np.asarray(hist)

array([1587613,   49139,   49761,   50805,   51651,   52915,   53467,
         54582,   55463,   56731,   57515,   58204,   59331,   60454,
         61279,   61484,   63097,   63870,   64696,   65378,   66135,
         66613,   68022,   68356,   69019,   70288,   70713,   71097,
         72622,   72585,   73233,   73779,   74213,   74787,   75335,
         75858,   76667,   77048,   76647,   77833,   78253,   78345,
         78266,   78985,   79224,   79101,   79834,   79703,   80245,
         79816,   79830,   79760,   79438,   79972,   79972,   79466,
         79343,   78753,   78487,   78659,   78400,   78090,   77723,
         76940,   76995,   76693,   75812,   75232,   75182,   75009,
         74176,   73549,   72582,   72038,   71691,   70884,   70175,
         69502,   68426,   67972,   67464,   66436,   65355,   64569,
         63118,   63364,   62000,   61065,   59846,   59184,   58408,
         57381,   56651,   55795,   54707,   54081,   53015,   51995,
         51000,   49

#### Boost histogram in 1D: Dense Tuple

In [11]:
%%timeit
hist = bh.hist.regular_int_1d((bh.axis.regular(bins[0], *ranges[0]),))
hist(vals1d)

94.6 ms ± 4.27 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [12]:
hist = bh.hist.regular_int_1d((bh.axis.regular(bins[0], *ranges[0]),))
hist(vals1d)
np.asarray(hist)

array([1587613,   49139,   49761,   50805,   51651,   52915,   53467,
         54582,   55463,   56731,   57515,   58204,   59331,   60454,
         61279,   61484,   63097,   63870,   64696,   65378,   66135,
         66613,   68022,   68356,   69019,   70288,   70713,   71097,
         72622,   72585,   73233,   73779,   74213,   74787,   75335,
         75858,   76667,   77048,   76647,   77833,   78253,   78345,
         78266,   78985,   79224,   79101,   79834,   79703,   80245,
         79816,   79830,   79760,   79438,   79972,   79972,   79466,
         79343,   78753,   78487,   78659,   78400,   78090,   77723,
         76940,   76995,   76693,   75812,   75232,   75182,   75009,
         74176,   73549,   72582,   72038,   71691,   70884,   70175,
         69502,   68426,   67972,   67464,   66436,   65355,   64569,
         63118,   63364,   62000,   61065,   59846,   59184,   58408,
         57381,   56651,   55795,   54707,   54081,   53015,   51995,
         51000,   49

#### Boost histogram in 1D: Dense Tuple (no overflow/underflow)

In [13]:
%%timeit
hist = bh.hist.regular_int_noflow_1d((bh.axis.regular_noflow(bins[0], *ranges[0]),))
hist(vals1d)

87 ms ± 4.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
hist = bh.hist.regular_int_noflow_1d((bh.axis.regular_noflow(bins[0], *ranges[0]),))
hist(vals1d)
np.asarray(hist)

array([49139, 49761, 50805, 51651, 52915, 53467, 54582, 55463, 56731,
       57515, 58204, 59331, 60454, 61279, 61484, 63097, 63870, 64696,
       65378, 66135, 66613, 68022, 68356, 69019, 70288, 70713, 71097,
       72622, 72585, 73233, 73779, 74213, 74787, 75335, 75858, 76667,
       77048, 76647, 77833, 78253, 78345, 78266, 78985, 79224, 79101,
       79834, 79703, 80245, 79816, 79830, 79760, 79438, 79972, 79972,
       79466, 79343, 78753, 78487, 78659, 78400, 78090, 77723, 76940,
       76995, 76693, 75812, 75232, 75182, 75009, 74176, 73549, 72582,
       72038, 71691, 70884, 70175, 69502, 68426, 67972, 67464, 66436,
       65355, 64569, 63118, 63364, 62000, 61065, 59846, 59184, 58408,
       57381, 56651, 55795, 54707, 54081, 53015, 51995, 51000, 49697,
       48852], dtype=uint64)

---

#### Traditional 2D Numpy histogram

Not as well optimized for regular filling.

In [15]:
%%timeit
H, *ledges = np.histogram2d(*vals, bins=bins, range=ranges)

134 ms ± 9.86 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [16]:
np.histogram2d(*vals, bins=bins, range=ranges)[0]

array([[23., 21., 26., ..., 28., 36., 26.],
       [17., 33., 30., ..., 23., 16., 22.],
       [21., 22., 29., ..., 22., 16., 26.],
       ...,
       [23., 27., 22., ..., 30., 22., 23.],
       [17., 32., 22., ..., 19., 29., 25.],
       [20., 38., 25., ..., 24., 13., 20.]])

#### Boost histogram in 2D: Axis vector

In [17]:
%%timeit
hist = bh.hist.regular_int([bh.axis.regular(bins[0], *ranges[0]),
                            bh.axis.regular(bins[1], *ranges[1])])
hist(*vals)

18.6 ms ± 732 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [18]:
hist = bh.hist.regular_int([bh.axis.regular(bins[0], *ranges[0]),
                            bh.axis.regular(bins[1], *ranges[1])])
hist(*vals)
np.asarray(hist)

array([[25628,   767,   817, ...,   816,   727, 25266],
       [  779,    23,    21, ...,    36,    26,   810],
       [  794,    17,    33, ...,    16,    22,   799],
       ...,
       [  793,    17,    32, ...,    29,    25,   832],
       [  749,    20,    38, ...,    13,    20,   735],
       [25253,   748,   788, ...,   796,   805, 25147]], dtype=uint64)

#### Boost histogram in 2D: Axis Tuple

In [19]:
%%timeit
hist = bh.hist.regular_int_2d((bh.axis.regular(bins[0], *ranges[0]),
                               bh.axis.regular(bins[1], *ranges[1])))
hist(*vals)

19 ms ± 809 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [20]:
hist = bh.hist.regular_int_2d((bh.axis.regular(bins[0], *ranges[0]),
                               bh.axis.regular(bins[1], *ranges[1])))
hist(*vals)
np.asarray(hist)

array([[25628,   767,   817, ...,   816,   727, 25266],
       [  779,    23,    21, ...,    36,    26,   810],
       [  794,    17,    33, ...,    16,    22,   799],
       ...,
       [  793,    17,    32, ...,    29,    25,   832],
       [  749,    20,    38, ...,    13,    20,   735],
       [25253,   748,   788, ...,   796,   805, 25147]], dtype=uint64)

#### Boost histogram in 2D: Axis Tuple noflow

In [21]:
%%timeit
hist = bh.hist.regular_int_noflow_2d((bh.axis.regular_noflow(bins[0], *ranges[0]),
                               bh.axis.regular_noflow(bins[1], *ranges[1])))
hist(*vals)

18.4 ms ± 1.33 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [22]:
hist = bh.hist.regular_int_noflow_2d((bh.axis.regular_noflow(bins[0], *ranges[0]),
                               bh.axis.regular_noflow(bins[1], *ranges[1])))
hist(*vals)
np.asarray(hist)

array([[23, 21, 26, ..., 28, 36, 26],
       [17, 33, 30, ..., 23, 16, 22],
       [21, 22, 29, ..., 22, 16, 26],
       ...,
       [23, 27, 22, ..., 30, 22, 23],
       [17, 32, 22, ..., 19, 29, 25],
       [20, 38, 25, ..., 24, 13, 20]], dtype=uint64)