/
TH1.py
357 lines (295 loc) · 11.8 KB
/
TH1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE
"""
This module defines the behaviors of ``TH1`` and its subclasses (not including ``TH2``,
``TH3``, or ``TProfile``).
"""
from __future__ import annotations
import numpy
import uproot
boost_metadata = {"name": "fName", "label": "fTitle"}
boost_axis_metadata = {"name": "fName", "label": "fTitle"}
def _boost_axis(axis, metadata):
boost_histogram = uproot.extras.boost_histogram()
fNbins = axis.member("fNbins")
fXbins = axis.member("fXbins", none_if_missing=True)
fLabels = axis.member("fLabels", none_if_missing=True)
if fLabels is not None:
try:
labels = [int(x) for x in fLabels]
category_cls = boost_histogram.axis.IntCategory
except ValueError:
labels = [str(x) for x in fLabels]
category_cls = boost_histogram.axis.StrCategory
out = category_cls(labels)
elif fXbins is None or len(fXbins) != fNbins + 1:
out = boost_histogram.axis.Regular(
fNbins,
axis.member("fXmin"),
axis.member("fXmax"),
underflow=True,
overflow=True,
)
else:
out = boost_histogram.axis.Variable(fXbins, underflow=True, overflow=True)
for k, v in metadata.items():
setattr(out, k, axis.member(v))
return out
class Histogram:
"""
Abstract class for histograms.
"""
@property
def name(self):
"""
The name of the histogram.
"""
return self.member("fName")
@property
def title(self):
"""
The title of the histogram.
"""
return self.member("fTitle")
def __eq__(self, other):
"""
Two histograms are equal if their axes are equal, their values are equal,
and their variances are equal.
"""
if type(self) != type(other):
return False
if self.axes != other.axes:
return False
self_values, self_variances = self._values_variances(True)
other_values, other_variances = other._values_variances(True)
values_equal = numpy.array_equal(self_values, other_values)
variances_equal = numpy.array_equal(self_variances, other_variances)
return values_equal and variances_equal
def __ne__(self, other):
"""
Some versions of Python don't automatically negate __eq__.
"""
return not self.__eq__(other)
@property
def axes(self):
"""
A tuple of all :doc:`uproot.behaviors.TAxis.TAxis` objects.
"""
raise NotImplementedError(repr(self))
def axis(self, axis):
"""
Returns a specified :doc:`uproot.behaviors.TAxis.TAxis` object.
The ``axis`` can be specified as
* a non-negative integer: ``0`` is the first axis, ``1`` is the second,
and ``2`` is the third.
* a negative integer: ``-1`` is the last axis, ``-2`` is the
second-to-last, and ``-3`` is the third-to-last.
* a string: ``"x"`` is the first axis, ``"y"`` is the second, and ``"z"``
is the third
(assuming that the histogram dimension supports a given ``axis``).
"""
raise NotImplementedError(repr(self))
@property
def weighted(self):
"""
True if the histogram has weights (``fSumw2``); False otherwise.
"""
sumw2 = self.member("fSumw2")
return (
sumw2 is not None
and len(sumw2) > 0
and len(sumw2) == self.member("fNcells")
)
@property
def kind(self):
"""
The meaning of this object: ``"COUNT"`` for true histograms (TH*) and
``"MEAN"`` for profile plots (TProfile*).
"""
raise NotImplementedError(repr(self))
def values(self, flow=False):
"""
Args:
flow (bool): If True, include underflow and overflow bins before and
after the normal (finite-width) bins.
Bin contents as a 1, 2, or 3 dimensional ``numpy.ndarray``. The
``numpy.dtype`` of this array depends on the histogram type.
Setting ``flow=True`` increases the length of each dimension by two.
"""
raise NotImplementedError(repr(self))
def errors(self, flow=False):
"""
Args:
flow (bool): If True, include underflow and overflow bins before and
after the normal (finite-width) bins.
Errors (uncertainties) in the :ref:`uproot.behaviors.TH1.Histogram.values`
as a 1, 2, or 3 dimensional ``numpy.ndarray`` of ``numpy.float64``.
If ``fSumw2`` (weights) are available, they will be used in the
calculation of the errors. If not, errors are assumed to be the square
root of the values.
Setting ``flow=True`` increases the length of each dimension by two.
"""
values, variances = self._values_variances(flow)
return numpy.sqrt(variances)
def variances(self, flow=False):
"""
Args:
flow (bool): If True, include underflow and overflow bins before and
after the normal (finite-width) bins.
Variances (uncertainties squared) in the
:ref:`uproot.behaviors.TH1.Histogram.values` as a 1, 2, or 3
dimensional ``numpy.ndarray`` of ``numpy.float64``.
If ``fSumw2`` (weights) are available, they will be used in the
calculation of the variances. If not, variances are assumed to be equal
to the values.
Setting ``flow=True`` increases the length of each dimension by two.
"""
values, variances = self._values_variances(flow)
return variances
def counts(self, flow=False):
"""
Args:
flow (bool): If True, include underflow and overflow bins before and
after the normal (finite-width) bins.
Returns the (possibly weighted) number of entries in each bin. For
histograms, this is equal to :ref:`uproot.behaviors.TH1.Histogram.values`.
"""
return self.values(flow=flow)
def to_boost(self, metadata=None, axis_metadata=None):
"""
Args:
metadata (dict of str \u2192 str): Metadata to collect (keys) and
their C++ class member names (values).
axis_metadata (dict of str \u2192 str): Metadata to collect from
each axis.
Converts the histogram into a ``boost-histogram`` object.
"""
assert len(self.axes) <= 3, "Only 1D, 2D, and 3D histograms are supported"
if axis_metadata is None:
axis_metadata = boost_axis_metadata
if metadata is None:
metadata = boost_metadata
boost_histogram = uproot.extras.boost_histogram()
sumw2 = None
if self.weighted: # ensures self.member("fSumw2") exists
values, sumw2 = self._values_variances(flow=True)
storage = boost_histogram.storage.Weight()
else:
values = self.values(flow=True)
if issubclass(values.dtype.type, numpy.integer):
storage = boost_histogram.storage.Int64()
else:
storage = boost_histogram.storage.Double()
axes = [
_boost_axis(self.member(name), axis_metadata)
for name in ["fXaxis", "fYaxis", "fZaxis"][0 : len(self.axes)]
]
out = boost_histogram.Histogram(*axes, storage=storage)
for k, v in metadata.items():
setattr(out, k, self.member(v))
assert len(values.shape) == len(
axes
), "Number of dimensions must match number of axes"
for i, axis in enumerate(axes):
if not isinstance(
axis,
(boost_histogram.axis.IntCategory, boost_histogram.axis.StrCategory),
):
continue
slicer = (slice(None),) * i + (slice(1, None),)
values = values[slicer]
if sumw2 is not None:
sumw2 = sumw2[slicer]
view = out.view(flow=True)
if sumw2 is not None:
assert (
sumw2.shape == values.shape
), "weights (fSumw2) and values should have same shape"
view.value = values
view.variance = sumw2
else:
view[...] = values
return out
def to_hist(self, metadata=None, axis_metadata=None):
"""
Args:
metadata (dict of str \u2192 str): Metadata to collect (keys) and
their C++ class member names (values).
axis_metadata (dict of str \u2192 str): Metadata to collect from
each axis.
Converts the histogram into a ``hist`` object.
"""
if axis_metadata is None:
axis_metadata = boost_axis_metadata
if metadata is None:
metadata = boost_metadata
return uproot.extras.hist().Hist(
self.to_boost(metadata=metadata, axis_metadata=axis_metadata)
)
# Support direct conversion to histograms, such as bh.Histogram(self) or hist.Hist(self)
def _to_boost_histogram_(self):
return self.to_boost()
class TH1(Histogram):
"""
Behaviors for one-dimensional histograms: descendants of ROOT's
``TH1``, not including ``TProfile``, ``TH2``, ``TH3``, or their descendants.
"""
@property
def axes(self):
return (self.member("fXaxis"),)
def axis(self, axis=0): # default axis for one-dimensional is intentional
if axis == 0 or axis == -1 or axis == "x":
return self.member("fXaxis")
else:
raise ValueError("axis must be 0 (-1) or 'x' for a TH1")
@property
def kind(self):
return "COUNT"
def values(self, flow=False):
if hasattr(self, "_values"):
values = self._values
else:
(values,) = self.base(uproot.models.TArray.Model_TArray)
values = numpy.asarray(values, dtype=values.dtype.newbyteorder("="))
self._values = values
if flow:
return values
else:
return values[1:-1]
def _values_variances(self, flow):
values = self.values(flow=True)
if hasattr(self, "_variances"):
variances = self._variances
else:
variances = numpy.zeros(values.shape, dtype=numpy.float64)
sumw2 = self.member("fSumw2", none_if_missing=True)
if sumw2 is not None and len(sumw2) == self.member("fNcells"):
sumw2 = numpy.asarray(sumw2, dtype=sumw2.dtype.newbyteorder("="))
sumw2 = numpy.reshape(sumw2, values.shape)
positive = sumw2 > 0
variances[positive] = sumw2[positive]
else:
positive = values > 0
variances[positive] = values[positive]
self._variances = variances
if flow:
return values, variances
else:
return values[1:-1], variances[1:-1]
def to_numpy(self, flow=False, dd=False):
"""
Args:
flow (bool): If True, include underflow and overflow bins; otherwise,
only normal (finite-width) bins are included.
dd (bool): If True, the return type follows
`numpy.histogramdd <https://numpy.org/doc/stable/reference/generated/numpy.histogramdd.html>`__;
otherwise, it follows `numpy.histogram <https://numpy.org/doc/stable/reference/generated/numpy.histogram.html>`__
and `numpy.histogram2d <https://numpy.org/doc/stable/reference/generated/numpy.histogram2d.html>`__.
Converts the histogram into a form like the ones produced by the NumPy
histogram functions.
"""
values = self.values(flow=flow)
xedges = self.axis(0).edges(flow=flow)
if dd:
return values, (xedges,)
else:
return values, xedges