-
Notifications
You must be signed in to change notification settings - Fork 81
/
writexml.py
275 lines (243 loc) · 9.55 KB
/
writexml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
import logging
import os
import shutil
import pkg_resources
import xml.etree.cElementTree as ET
import numpy as np
import uproot
from uproot_methods.classes import TH1
_ROOT_DATA_FILE = None
log = logging.getLogger(__name__)
# 'spec' gets passed through all functions as NormFactor is a unique case of having
# parameter configurations stored at the modifier-definition-spec level. This means
# that build_modifier() needs access to the measurements. The call stack is:
#
# writexml
# ->build_channel
# ->build_sample
# ->build_modifier
#
# Therefore, 'spec' needs to be threaded through all these calls.
def _make_hist_name(channel, sample, modifier='', prefix='hist', suffix=''):
return "{prefix}{middle}{suffix}".format(
prefix=prefix,
suffix=suffix,
middle='_'.join(filter(lambda x: x, [channel, sample, modifier])),
)
def _export_root_histogram(histname, data):
h = TH1.from_numpy((np.asarray(data), np.arange(len(data) + 1)))
h._fName = histname
# NB: uproot crashes for some reason, figure out why later
# if histname in _ROOT_DATA_FILE:
# raise KeyError('Duplicate key {0} being written.'.format(histname))
_ROOT_DATA_FILE[histname] = h
# https://stackoverflow.com/a/4590052
def indent(elem, level=0):
i = "\n" + level * " "
if elem:
if not elem.text or not elem.text.strip():
elem.text = i + " "
if not elem.tail or not elem.tail.strip():
elem.tail = i
for subelem in elem:
indent(subelem, level + 1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def build_measurement(measurementspec):
config = measurementspec['config']
name = measurementspec['name']
poi = config['poi']
# we want to know which parameters are fixed (constant)
# and to additionally extract the luminosity information
fixed_params = []
lumi = 1.0
lumierr = 0.0
for parameter in config['parameters']:
if parameter.get('fixed', False):
pname = parameter['name']
if pname == 'lumi':
fixed_params.append('Lumi')
else:
fixed_params.append(pname)
# we found luminosity, so handle it
if parameter['name'] == 'lumi':
lumi = parameter['auxdata'][0]
lumierr = parameter['sigmas'][0]
# define measurement
meas = ET.Element(
"Measurement",
Name=name,
Lumi=str(lumi),
LumiRelErr=str(lumierr),
ExportOnly=str(True),
)
poiel = ET.Element('POI')
poiel.text = poi
meas.append(poiel)
# add fixed parameters (constant)
if fixed_params:
se = ET.Element('ParamSetting', Const='True')
se.text = ' '.join(fixed_params)
meas.append(se)
return meas
def build_modifier(spec, modifierspec, channelname, samplename, sampledata):
if modifierspec['name'] == 'lumi':
return None
mod_map = {
'histosys': 'HistoSys',
'staterror': 'StatError',
'normsys': 'OverallSys',
'shapesys': 'ShapeSys',
'normfactor': 'NormFactor',
'shapefactor': 'ShapeFactor',
}
attrs = {'Name': modifierspec['name']}
if modifierspec['type'] == 'histosys':
attrs['HistoNameLow'] = _make_hist_name(
channelname, samplename, modifierspec['name'], suffix='Low'
)
attrs['HistoNameHigh'] = _make_hist_name(
channelname, samplename, modifierspec['name'], suffix='High'
)
_export_root_histogram(attrs['HistoNameLow'], modifierspec['data']['lo_data'])
_export_root_histogram(attrs['HistoNameHigh'], modifierspec['data']['hi_data'])
elif modifierspec['type'] == 'normsys':
attrs['High'] = str(modifierspec['data']['hi'])
attrs['Low'] = str(modifierspec['data']['lo'])
elif modifierspec['type'] == 'normfactor':
# NB: only look at first measurement for normfactor configs. In order
# to dump as HistFactory XML, this has to be the same for all
# measurements or it will not work correctly. Why?
#
# Unlike other modifiers, NormFactor has the unique circumstance of
# defining its parameter configurations at the modifier level inside
# the channel specification, instead of at the measurement level, like
# all of the other modifiers.
#
# However, since I strive for perfection, the "Const" attribute will
# never be set here, but at the per-measurement configuration instead
# like all other parameters. This is an acceptable compromise.
#
# Lastly, if a normfactor parameter configuration doesn't exist in the
# first measurement parameter configuration, then set defaults.
val = 1
low = 0
high = 10
for p in spec['measurements'][0]['config']['parameters']:
if p['name'] == modifierspec['name']:
val = p['inits'][0]
low, high = p['bounds'][0]
attrs['Val'] = str(val)
attrs['Low'] = str(low)
attrs['High'] = str(high)
elif modifierspec['type'] == 'staterror':
attrs['Activate'] = 'True'
attrs['HistoName'] = _make_hist_name(
channelname, samplename, modifierspec['name']
)
del attrs['Name']
# need to make this a relative uncertainty stored in ROOT file
_export_root_histogram(
attrs['HistoName'],
np.divide(
modifierspec['data'],
sampledata,
out=np.zeros_like(sampledata),
where=np.asarray(sampledata) != 0,
dtype='float',
).tolist(),
)
elif modifierspec['type'] == 'shapesys':
attrs['ConstraintType'] = 'Poisson'
attrs['HistoName'] = _make_hist_name(
channelname, samplename, modifierspec['name']
)
# need to make this a relative uncertainty stored in ROOT file
_export_root_histogram(
attrs['HistoName'],
[
np.divide(
a, b, out=np.zeros_like(a), where=np.asarray(b) != 0, dtype='float'
)
for a, b in np.array((modifierspec['data'], sampledata)).T
],
)
else:
log.warning(
'Skipping {0}({1}) for now'.format(
modifierspec['name'], modifierspec['type']
)
)
modifier = ET.Element(mod_map[modifierspec['type']], **attrs)
return modifier
def build_sample(spec, samplespec, channelname):
histname = _make_hist_name(channelname, samplespec['name'])
attrs = {
'Name': samplespec['name'],
'HistoName': histname,
'InputFile': _ROOT_DATA_FILE._path,
'NormalizeByTheory': 'False',
}
sample = ET.Element('Sample', **attrs)
for modspec in samplespec['modifiers']:
# if lumi modifier added for this sample, need to set NormalizeByTheory
if modspec['type'] == 'lumi':
sample.attrib.update({'NormalizeByTheory': 'True'})
modifier = build_modifier(
spec, modspec, channelname, samplespec['name'], samplespec['data']
)
if modifier is not None:
sample.append(modifier)
_export_root_histogram(histname, samplespec['data'])
return sample
def build_data(obsspec, channelname):
histname = _make_hist_name(channelname, 'data')
data = ET.Element('Data', HistoName=histname, InputFile=_ROOT_DATA_FILE._path)
observation = next((obs for obs in obsspec if obs['name'] == channelname), None)
_export_root_histogram(histname, observation['data'])
return data
def build_channel(spec, channelspec, obsspec):
channel = ET.Element(
'Channel', Name=channelspec['name'], InputFile=_ROOT_DATA_FILE._path
)
if obsspec:
data = build_data(obsspec, channelspec['name'])
channel.append(data)
for samplespec in channelspec['samples']:
channel.append(build_sample(spec, samplespec, channelspec['name']))
return channel
def writexml(spec, specdir, data_rootdir, resultprefix):
global _ROOT_DATA_FILE
shutil.copyfile(
pkg_resources.resource_filename(__name__, 'schemas/HistFactorySchema.dtd'),
os.path.join(os.path.dirname(specdir), 'HistFactorySchema.dtd'),
)
combination = ET.Element(
"Combination", OutputFilePrefix=os.path.join('.', specdir, resultprefix)
)
with uproot.recreate(os.path.join(data_rootdir, 'data.root')) as _ROOT_DATA_FILE:
for channelspec in spec['channels']:
channelfilename = os.path.join(
specdir, '{0:s}_{1:s}.xml'.format(resultprefix, channelspec['name'])
)
with open(channelfilename, 'w') as channelfile:
channel = build_channel(spec, channelspec, spec.get('observations'))
indent(channel)
channelfile.write(
"<!DOCTYPE Channel SYSTEM '../HistFactorySchema.dtd'>\n\n"
)
channelfile.write(
ET.tostring(channel, encoding='utf-8').decode('utf-8')
)
inp = ET.Element("Input")
inp.text = channelfilename
combination.append(inp)
for measurement in spec['measurements']:
combination.append(build_measurement(measurement))
indent(combination)
return "<!DOCTYPE Combination SYSTEM 'HistFactorySchema.dtd'>\n\n".encode(
"utf-8"
) + ET.tostring(combination, encoding='utf-8')