/
TProfile.py
336 lines (267 loc) · 12.2 KB
/
TProfile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE
"""
This module defines the behavior of ``TProfile``.
"""
from __future__ import annotations
import numpy
import uproot
from uproot.behaviors.TH1 import boost_axis_metadata, boost_metadata
_kERRORMEAN = 0
_kERRORSPREAD = 1
_kERRORSPREADI = 2
_kERRORSPREADG = 3
# closely follows the ROOT function, using the same names (with 'root_' prepended)
# https://github.com/root-project/root/blob/ffc7c588ac91aca30e75d356ea971129ee6a836a/hist/hist/src/TProfileHelper.h#L141-L163
def _effective_counts_1d(fBinEntries, fBinSumw2, fNcells):
root_sumOfWeights = fBinEntries
root_sumOfWeights = numpy.asarray(root_sumOfWeights, dtype=numpy.float64)
root_sumOfWeightSquare = fBinSumw2
root_sumOfWeightSquare = numpy.asarray(root_sumOfWeightSquare, dtype=numpy.float64)
if len(root_sumOfWeightSquare) == 0 or len(root_sumOfWeightSquare) != fNcells:
return root_sumOfWeights
positive = root_sumOfWeightSquare > 0
out = numpy.zeros(len(root_sumOfWeights), dtype=numpy.float64)
out[positive] = root_sumOfWeights[positive] ** 2 / root_sumOfWeightSquare[positive]
return out
# duplicates the first part of '_values_errors_1d'
def _values_1d(fBinEntries, root_cont):
root_sum = fBinEntries
root_sum = numpy.asarray(root_sum, dtype=numpy.float64)
nonzero = root_sum != 0
root_contsum = numpy.zeros(len(root_cont), dtype=numpy.float64)
root_contsum[nonzero] = root_cont[nonzero] / root_sum[nonzero]
return root_contsum
def _error_mode_str(error_mode):
if error_mode is None or error_mode == _kERRORMEAN or error_mode == "":
return ""
elif error_mode == _kERRORSPREAD or error_mode == "s":
return "S"
elif error_mode == _kERRORSPREADI or error_mode == "i":
return "I"
elif error_mode == _kERRORSPREADG or error_mode == "g":
return "G"
else:
return "_"
# closely follows the ROOT function, using the same names (with 'root_' prepended)
# https://github.com/root-project/root/blob/ffc7c588ac91aca30e75d356ea971129ee6a836a/hist/hist/src/TProfileHelper.h#L660-L721
def _values_errors_1d(error_mode, fBinEntries, root_cont, fSumw2, fNcells, fBinSumw2):
if error_mode is None or error_mode == _kERRORMEAN or error_mode == "":
error_mode = _kERRORMEAN
elif error_mode == _kERRORSPREAD or error_mode == "s":
error_mode = _kERRORSPREAD
elif error_mode == _kERRORSPREADI or error_mode == "i":
error_mode = _kERRORSPREADI
elif error_mode == _kERRORSPREADG or error_mode == "g":
error_mode = _kERRORSPREADG
else:
raise ValueError(
"error_mode must be None/0/'' for error-on-mean,\n"
" 1/'s' for spread (variance),\n"
" 2/'i' for integer spread (using sqrt(12)),\n"
" or 3/'g' for Gaussian spread\n"
" not "
+ repr(error_mode)
+ "see https://root.cern.ch/doc/master/classTProfile.html"
)
root_sum = fBinEntries
root_sum = numpy.asarray(root_sum, dtype=numpy.float64)
nonzero = root_sum != 0
root_contsum = numpy.zeros(len(root_cont), dtype=numpy.float64)
root_contsum[nonzero] = root_cont[nonzero] / root_sum[nonzero]
if error_mode == _kERRORSPREADG:
out = numpy.zeros(len(root_cont), dtype=numpy.float64)
out[nonzero] = 1.0 / numpy.sqrt(root_sum[nonzero])
return root_contsum, out
root_err2 = fSumw2
if root_err2 is None or len(root_err2) != fNcells:
root_err2 = numpy.zeros(len(root_cont), dtype=numpy.float64)
else:
root_err2 = numpy.asarray(root_err2, dtype=numpy.float64)
root_neff = _effective_counts_1d(fBinEntries, fBinSumw2, fNcells)
root_eprim2 = numpy.zeros(len(root_cont), dtype=numpy.float64)
root_eprim2[nonzero] = abs(
root_err2[nonzero] / root_sum[nonzero] - root_contsum[nonzero] ** 2
)
root_eprim = numpy.sqrt(root_eprim2)
if error_mode == _kERRORSPREADI:
numer = numpy.ones(len(root_cont), dtype=numpy.float64)
denom = numpy.full(len(root_cont), numpy.sqrt(12 * root_neff))
eprim_nonzero = root_eprim != 0
numer[eprim_nonzero] = root_eprim[eprim_nonzero]
denom[eprim_nonzero] = numpy.sqrt(root_neff[eprim_nonzero])
out = numpy.zeros(len(root_cont), dtype=numpy.float64)
out[nonzero] = numer[nonzero] / denom[nonzero]
return root_contsum, out
if error_mode == _kERRORSPREAD:
root_eprim[~nonzero] = 0.0
return root_contsum, root_eprim
out = numpy.zeros(len(root_cont), dtype=numpy.float64)
out[nonzero] = root_eprim[nonzero] / numpy.sqrt(root_neff[nonzero])
return root_contsum, out
class Profile(uproot.behaviors.TH1.Histogram):
"""
Abstract class for profile plots.
"""
@property
def kind(self):
return "MEAN"
def counts(self, flow=False):
"""
Args:
flow (bool): If True, include underflow and overflow bins before and
after the normal (finite-width) bins.
The effective number of entries, which is a step in the calculation of
:ref:`uproot.behaviors.TProfile.Profile.values`. The calculation
of profile errors exactly follows ROOT's "effective entries", but in a
vectorized NumPy form.
"""
raise NotImplementedError(repr(self))
def values(self, flow=False):
"""
Args:
flow (bool): If True, include underflow and overflow bins before and
after the normal (finite-width) bins.
Mean value of each bin as a 1, 2, or 3 dimensional ``numpy.ndarray`` of
``numpy.float64``.
Setting ``flow=True`` increases the length of each dimension by two.
"""
raise NotImplementedError(repr(self))
def errors(self, flow=False, error_mode=""):
"""
Args:
flow (bool): If True, include underflow and overflow bins before and
after the normal (finite-width) bins.
error_mode (str): Choose a method for calculating the errors (see below).
Errors (uncertainties) in the :ref:`uproot.behaviors.TH1.Histogram.values`
as a 1, 2, or 3 dimensional ``numpy.ndarray`` of ``numpy.float64``.
The calculation of profile errors exactly follows ROOT's function, but
in a vectorized NumPy form. The ``error_mode`` may be
* ``""`` for standard error on the mean
* ``"s"`` for spread
* ``"i"`` for integer data
* ``"g"`` for Gaussian
following `ROOT's profile documentation <https://root.cern.ch/doc/master/classTProfile.html>`__.
Setting ``flow=True`` increases the length of each dimension by two.
"""
values, errors = self._values_errors(flow, error_mode)
return errors
def variances(self, flow=False, error_mode=""):
"""
Args:
flow (bool): If True, include underflow and overflow bins before and
after the normal (finite-width) bins.
error_mode (str): Choose a method for calculating the errors (see below).
Variances (uncertainties squared) in the
:ref:`uproot.behaviors.TH1.Histogram.values` as a 1, 2, or 3
dimensional ``numpy.ndarray`` of ``numpy.float64``.
The calculation of profile variances exactly follows ROOT's function, but
in a vectorized NumPy form. The ``error_mode`` may be
* ``""`` for standard error on the mean (squared)
* ``"s"`` for spread (squared)
* ``"i"`` for integer data (squared)
* ``"g"`` for Gaussian (squared)
following `ROOT's profile documentation <https://root.cern.ch/doc/master/classTProfile.html>`__.
Setting ``flow=True`` increases the length of each dimension by two.
"""
values, errors = self._values_errors(flow, error_mode)
return numpy.square(errors)
class TProfile(Profile):
"""
Behaviors for one-dimensional profiles: ROOT's ``TProfile``.
"""
no_inherit = (uproot.behaviors.TH1.TH1,)
@property
def axes(self):
return (self.member("fXaxis"),)
def axis(self, axis=0): # default axis for one-dimensional is intentional
if axis == 0 or axis == -1 or axis == "x":
return self.member("fXaxis")
else:
raise ValueError("axis must be 0 (-1) or 'x' for a TProfile")
@property
def weighted(self):
fBinSumw2 = self.member("fBinSumw2", none_if_missing=True)
return fBinSumw2 is None or len(fBinSumw2) != len(self.member("fNcells"))
def counts(self, flow=True):
out = _effective_counts_1d(
self.member("fBinEntries"),
self.member("fBinSumw2"),
self.member("fNcells"),
)
if flow:
return out
else:
return out[1:-1]
def values(self, flow=False):
if hasattr(self, "_values"):
values = self._values
else:
(root_cont,) = self.base(uproot.models.TArray.Model_TArray)
root_cont = numpy.asarray(root_cont, dtype=numpy.float64)
values = _values_1d(
self.member("fBinEntries"),
root_cont,
)
self._values = values
if flow:
return values
else:
return values[1:-1]
def _values_errors(self, flow, error_mode):
attr = "_errors" + _error_mode_str(error_mode)
if hasattr(self, attr):
values = self._values
errors = getattr(self, attr)
else:
(root_cont,) = self.base(uproot.models.TArray.Model_TArray)
root_cont = numpy.asarray(root_cont, dtype=numpy.float64)
fSumw2 = self.member("fSumw2", none_if_missing=True)
if fSumw2 is not None:
fSumw2 = numpy.asarray(fSumw2)
values, errors = _values_errors_1d(
error_mode,
numpy.asarray(self.member("fBinEntries")),
root_cont,
fSumw2,
self.member("fNcells"),
numpy.asarray(self.member("fBinSumw2")),
)
self._values = values
setattr(self, attr, errors)
if flow:
return values, errors
else:
return values[1:-1], errors[1:-1]
def to_boost(self, metadata=boost_metadata, axis_metadata=boost_axis_metadata):
boost_histogram = uproot.extras.boost_histogram()
effective_counts = self.counts(flow=True)
_, errors = self._values_errors(True, self.member("fErrorMode"))
values = self._bases[0]._bases[-1]
variances = numpy.square(errors)
sum_of_bin_weights = numpy.asarray(self.member("fBinEntries"))
storage = boost_histogram.storage.WeightedMean()
xaxis = uproot.behaviors.TH1._boost_axis(self.member("fXaxis"), axis_metadata)
out = boost_histogram.Histogram(xaxis, storage=storage)
for k, v in metadata.items():
setattr(out, k, self.member(v))
if isinstance(xaxis, boost_histogram.axis.StrCategory):
effective_counts = effective_counts[1:]
values = values[1:]
variances = variances[1:]
sum_of_bin_weights = sum_of_bin_weights[1:]
out.metadata = {"fSumw2": self.member("fSumw2")}
view = out.view(flow=True)
# https://github.com/root-project/root/blob/ffc7c588ac91aca30e75d356ea971129ee6a836a/hist/hist/src/TProfileHelper.h#L668-L671
with numpy.errstate(divide="ignore", invalid="ignore"):
sum_of_bin_weights_squared = (sum_of_bin_weights**2) / effective_counts
# TODO: Drop this when boost-histogram has a way to set using the constructor.
# New version should look something like this:
# view[...] = np.stack(sum_of_bin_weights, sum_of_bin_weights_squared, values, variances)
# Current / classic version:
view["sum_of_weights"] = sum_of_bin_weights
view["sum_of_weights_squared"] = sum_of_bin_weights_squared
view["value"] = values
view["_sum_of_weighted_deltas_squared"] = variances * (
sum_of_bin_weights - sum_of_bin_weights_squared / sum_of_bin_weights
)
return out