-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
df106_HiggsToFourLeptons.C
292 lines (263 loc) · 13.9 KB
/
df106_HiggsToFourLeptons.C
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
/// \file
/// \ingroup tutorial_dataframe
/// \notebook -draw
/// The Higgs to four lepton analysis from the ATLAS Open Data release of 2020, with RDataFrame.
///
/// This tutorial is the Higgs to four lepton analysis from the ATLAS Open Data release in 2020
/// (http://opendata.atlas.cern/release/2020/documentation/). The data was taken with the ATLAS detector
/// during 2016 at a center-of-mass energy of 13 TeV. The decay of the Standard Model Higgs boson
/// to two Z bosons and subsequently to four leptons is called the "golden channel". The selection leads
/// to a narrow invariant mass peak on top a relatively smooth and small background, revealing
/// the Higgs at 125 GeV.
/// The analysis is translated to an RDataFrame workflow processing about 300 MB of simulated events and data.
///
/// Lepton selection efficiency corrections ("scale factors") are applied to simulated samples to correct for the
/// differences in the trigger, reconstruction, and identification efficiencies in simulation compared to real data.
/// Systematic uncertainties for those scale factors are evaluated and the Vary function of RDataFrame is used to
/// propagate the variations to the final four leptons mass distribution.
///
/// See the [corresponding spec json file](https://github.com/root-project/root/blob/master/tutorials/dataframe/df106_HiggsToFourLeptons_spec.json).
///
/// \macro_code
/// \macro_image
///
/// \date March 2020, August 2022, August 2023
/// \authors Stefan Wunsch (KIT, CERN), Julia Mathe (CERN), Marta Czurylo (CERN)
#include "TInterpreter.h"
#include <Math/Vector4D.h>
#include <ROOT/RDFHelpers.hxx>
#include <ROOT/RDataFrame.hxx>
#include <ROOT/RVec.hxx>
#include <TCanvas.h>
#include <TGraph.h>
#include <TH1D.h>
#include <THStack.h>
#include <TLatex.h>
#include <TLegend.h>
#include <TProfile.h>
#include <TStyle.h>
using namespace ROOT::VecOps;
using PtEtaPhiEVectorF = ROOT::Math::LorentzVector<ROOT::Math::PtEtaPhiE4D<float>>;
using ROOT::RVecF;
using ROOT::RDF::RSampleInfo;
using namespace ROOT::RDF::Experimental;
// Define functions needed in the analysis
// Select events for the analysis
bool GoodElectronsAndMuons(const ROOT::RVecI &type, const RVecF &pt, const RVecF &eta, const RVecF &phi, const RVecF &e,
const RVecF &trackd0pv, const RVecF &tracksigd0pv, const RVecF &z0)
{
for (size_t i = 0; i < type.size(); i++) {
PtEtaPhiEVectorF p(0.001 * pt[i], eta[i], phi[i], 0.001 * e[i]);
if (type[i] == 11) {
if (pt[i] < 7000 || abs(eta[i]) > 2.47 || abs(trackd0pv[i] / tracksigd0pv[i]) > 5 ||
abs(z0[i] * sin(p.Theta())) > 0.5)
return false;
} else {
if (abs(trackd0pv[i] / tracksigd0pv[i]) > 5 || abs(z0[i] * sin(p.Theta())) > 0.5)
return false;
}
}
return true;
}
// Compute the invariant mass of a four-lepton-system.
float ComputeInvariantMass(const RVecF &pt, const RVecF &eta, const RVecF &phi, const RVecF &e)
{
PtEtaPhiEVectorF p1(pt[0], eta[0], phi[0], e[0]);
PtEtaPhiEVectorF p2(pt[1], eta[1], phi[1], e[1]);
PtEtaPhiEVectorF p3(pt[2], eta[2], phi[2], e[2]);
PtEtaPhiEVectorF p4(pt[3], eta[3], phi[3], e[3]);
return 0.001 * (p1 + p2 + p3 + p4).M();
}
void df106_HiggsToFourLeptons()
{
// Enable Multithreading
ROOT::EnableImplicitMT();
// Create the RDataFrame from the spec json file. The df106_HiggsToFourLeptons_spec.json is provided in the same
// folder as this tutorial
std::string dataset_spec = gROOT->GetTutorialsDir() + std::string("/dataframe/df106_HiggsToFourLeptons_spec.json");
ROOT::RDataFrame df = ROOT::RDF::Experimental::FromSpec(dataset_spec);
// Add the ProgressBar feature
ROOT::RDF::Experimental::AddProgressBar(df);
#ifndef __CLING__
// If this tutorial is compiled, rather than run as a ROOT macro, the interpreter needs to be fed the signatures
// of all the functions we want to JIT in our analysis, as well as any type used in those signatures.
// clang-format off
gInterpreter->Declare(
"using ROOT::RVecF;"
"bool GoodElectronsAndMuons(const ROOT::RVecI &type, const RVecF &pt, const RVecF &eta, const RVecF &phi, const RVecF &e,"
"const RVecF &trackd0pv, const RVecF &tracksigd0pv, const RVecF &z0);"
"float ComputeInvariantMass(const RVecF &pt, const RVecF &eta, const RVecF &phi, const RVecF &e);"
);
// clang-format on
#endif
// Perform the analysis
// Access metadata information that is stored in the JSON config file of the RDataFrame
// The metadata contained in the JSON file is accessible within a `DefinePerSample` call, through the `RSampleInfo`
// class
auto df_analysis =
df.DefinePerSample("xsecs", [](unsigned int slot, const RSampleInfo &id) { return id.GetD("xsecs"); })
.DefinePerSample("lumi", [](unsigned int slot, const RSampleInfo &id) { return id.GetD("lumi"); })
.DefinePerSample("sumws", [](unsigned int slot, const RSampleInfo &id) { return id.GetD("sumws"); })
.DefinePerSample("sample_category",
[](unsigned int slot, const RSampleInfo &id) { return id.GetS("sample_category"); })
// Apply an MC correction for the ZZ decay due to missing gg->ZZ process
.DefinePerSample("scale",
[](unsigned int slot, const ROOT::RDF::RSampleInfo &id) {
return id.Contains("mc_363490.llll.4lep.root") ? 1.3f : 1.0f;
})
// Select electron or muon trigger
.Filter("trigE || trigM")
// Select events with exactly four good leptons conserving charge and lepton numbers
// Note that all collections are RVecs and good_lep is the mask for the good leptons.
// The lepton types are PDG numbers and set to 11 or 13 for an electron or muon
// irrespective of the charge.
.Define("good_lep",
"abs(lep_eta) < 2.5 && lep_pt > 5000 && lep_ptcone30 / lep_pt < 0.3 && lep_etcone20 / lep_pt < 0.3")
.Filter("Sum(good_lep) == 4")
.Filter("Sum(lep_charge[good_lep]) == 0")
.Define("goodlep_sumtypes", "Sum(lep_type[good_lep])")
.Filter("goodlep_sumtypes == 44 || goodlep_sumtypes == 52 || goodlep_sumtypes == 48")
// Apply additional cuts depending on lepton flavour
.Filter(
"GoodElectronsAndMuons(lep_type[good_lep], lep_pt[good_lep], lep_eta[good_lep], lep_phi[good_lep], "
"lep_E[good_lep], lep_trackd0pvunbiased[good_lep], lep_tracksigd0pvunbiased[good_lep], lep_z0[good_lep])")
// Create new columns with the kinematics of good leptons
.Define("goodlep_pt", "lep_pt[good_lep]")
.Define("goodlep_eta", "lep_eta[good_lep]")
.Define("goodlep_phi", "lep_phi[good_lep]")
.Define("goodlep_E", "lep_E[good_lep]")
.Define("goodlep_type", "lep_type[good_lep]")
// Select leptons with high transverse momentum
.Filter("goodlep_pt[0] > 25000 && goodlep_pt[1] > 15000 && goodlep_pt[2] > 10000")
// Compute invariant mass
.Define("m4l", "ComputeInvariantMass(goodlep_pt, goodlep_eta, goodlep_phi, goodlep_E)")
// Reweighting of the samples is different for "data" and "MC"
.DefinePerSample("reweighting", [](unsigned int slot, const RSampleInfo &id) { return id.Contains("mc"); });
// Define the weight column (scale factor) for the MC samples
auto df_mc = df_analysis.Filter("reweighting == true")
.Define("weight", ("scaleFactor_ELE * scaleFactor_MUON * scaleFactor_LepTRIGGER * "
"scaleFactor_PILEUP * mcWeight * scale * xsecs / sumws * lumi"));
// Book histograms for individual MC samples
auto df_higgs = df_mc.Filter(R"(sample_category == "higgs")")
.Histo1D<float>(ROOT::RDF::TH1DModel("higgs", "m4l", 24, 80, 170), "m4l", "weight");
auto df_zz = df_mc.Filter("sample_category == \"zz\"")
.Histo1D<float>(ROOT::RDF::TH1DModel("zz", "m4l", 24, 80, 170), "m4l", "weight");
auto df_other = df_mc.Filter("sample_category == \"other\"")
.Histo1D<float>(ROOT::RDF::TH1DModel("other", "m4l", 24, 80, 170), "m4l", "weight");
// Book the invariant mass histogram for the data
auto df_h_mass_data = df_analysis.Filter("reweighting == false")
.Filter("sample_category == \"data\"")
.Define("weight_", []() { return 1; })
.Histo1D<float>(ROOT::RDF::TH1DModel("data", "m4l", 24, 80, 170), "m4l", "weight_");
// Evaluate the systematic uncertainty
// The systematic uncertainty in this analysis is the MC scale factor uncertainty that depends on lepton
// kinematics such as pT or pseudorapidity.
// Muons uncertainties are negligible, as stated in https://atlas.web.cern.ch/Atlas/GROUPS/PHYSICS/PAPERS/MUON
// Electrons uncertainties are evaluated based on the plots available in https://doi.org/10.48550/arXiv.1908.0
// The uncertainties are linearly interpolated, using the `TGraph::Eval()` method, to cover a range of pT values
// covered by the analysis.
const std::vector<double> x{5.50e3, 5.52e3, 12.54e3, 17.43e3, 22.40e3, 27.48e3, 30e3, 10000e3};
const std::vector<double> y{0.06628, 0.06395, 0.06396, 0.03372, 0.02441, 0.01403, 0, 0};
TGraph graph(x.size(), x.data(), y.data());
// Use the Vary method to add the systematic variations to the total MC scale factor ("weight") of the analysis
// The input consists of the input column to be varied and the lambda function to compute the systematic variations.
// The new output columns contain the varied values of the input column.
auto df_with_variations_mc =
df_mc
.Vary("weight",
[&graph](double x, const RVecF &pt, const RVec<unsigned int> &type) {
const auto v = Mean(Map(pt[type == 11], [&graph](auto p) { return graph.Eval(p); }));
return RVec<double>{(1 + v) * x, (1 - v) * x};
},
{"weight", "goodlep_pt", "goodlep_type"}, {"up", "down"})
.Histo1D<float>(ROOT::RDF::TH1DModel("Invariant Mass", "m4l", 24, 80, 170), "m4l", "weight");
// Create the total MC scale factor histograms: "nominal", "weight:up" and "weight:down".
auto histos_mc = VariationsFor(df_with_variations_mc);
// Evaluate the total MC uncertainty based on the variations. Note, in this case the uncertainties are symmetric.
for (unsigned int i = 0; i < histos_mc["nominal"].GetXaxis()->GetNbins(); i++) {
histos_mc["nominal"].SetBinError(
i, (histos_mc["weight:up"].GetBinContent(i) - histos_mc["nominal"].GetBinContent(i)));
}
// Make the plot of the data, individual MC contributions and the total MC scale factor systematic variations.
gROOT->SetStyle("ATLAS");
// Create canvas with pad
auto c = new TCanvas("c", " ", 600, 600);
auto pad = new TPad("upper_pad", "", 0, 0, 1, 1);
pad->SetTickx(0);
pad->SetTicky(0);
pad->Draw();
pad->cd();
// Draw stack with MC contributions
// Draw cloned histograms to preserve graphics when original objects goes out of scope
df_other->SetFillColor(kViolet - 9);
df_zz->SetFillColor(kAzure - 9);
df_higgs->SetFillColor(kRed + 2);
auto stack = new THStack("stack", "");
auto h_other = static_cast<TH1 *>(df_other->Clone());
stack->Add(h_other);
auto h_zz = static_cast<TH1 *>(df_zz->Clone());
stack->Add(h_zz);
auto h_higgs = static_cast<TH1 *>(df_higgs->Clone());
stack->Add(h_higgs);
stack->Draw("HIST");
// stack histogram can be accessed only after drawing
stack->GetHistogram()->SetTitle("");
stack->GetHistogram()->GetXaxis()->SetLabelSize(0.035);
stack->GetHistogram()->GetXaxis()->SetTitleSize(0.045);
stack->GetHistogram()->GetXaxis()->SetTitleOffset(1.3);
stack->GetHistogram()->GetXaxis()->SetTitle("m_{4l}^{H#rightarrow ZZ} [GeV]");
stack->GetHistogram()->GetYaxis()->SetLabelSize(0.035);
stack->GetHistogram()->GetYaxis()->SetTitleSize(0.045);
stack->GetHistogram()->GetYaxis()->SetTitle("Events");
stack->SetMaximum(35);
stack->GetHistogram()->GetYaxis()->ChangeLabel(1, -1, 0);
// Draw MC scale factor and variations
histos_mc["nominal"].SetFillColor(kBlack);
histos_mc["nominal"].SetFillStyle(3254);
auto h_nominal = histos_mc["nominal"].DrawClone("E2 same");
histos_mc["weight:up"].SetLineColor(kGreen + 2);
auto h_weight_up = histos_mc["weight:up"].DrawClone("HIST same");
histos_mc["weight:down"].SetLineColor(kBlue + 2);
auto h_weight_down = histos_mc["weight:down"].DrawClone("HIST same");
// Draw data histogram
df_h_mass_data->SetMarkerStyle(20);
df_h_mass_data->SetMarkerSize(1.);
df_h_mass_data->SetLineWidth(2);
df_h_mass_data->SetLineColor(kBlack);
df_h_mass_data->SetStats(false);
auto h_mass_data = df_h_mass_data->DrawClone("E sames");
// Add legend
auto legend = new TLegend(0.57, 0.65, 0.94, 0.94);
legend->SetTextFont(42);
legend->SetFillStyle(0);
legend->SetBorderSize(0);
legend->SetTextSize(0.025);
legend->SetTextAlign(32);
legend->AddEntry(h_mass_data, "Data", "lep");
legend->AddEntry(h_higgs, "Higgs MC", "f");
legend->AddEntry(h_zz, "ZZ MC", "f");
legend->AddEntry(h_other, "Other MC", "f");
legend->AddEntry(h_weight_down, "Total MC Variations Down", "l");
legend->AddEntry(h_weight_up, "Total MC Variations Up", "l");
legend->AddEntry(h_nominal, "Total MC Uncertainty", "f");
legend->Draw();
// Add ATLAS label
TLatex atlas_label;
atlas_label.SetTextFont(70);
atlas_label.SetTextSize(0.04);
atlas_label.DrawLatexNDC(0.19, 0.85, "ATLAS");
TLatex data_label;
data_label.SetTextFont(42);
data_label.DrawLatexNDC(0.19 + 0.13, 0.85, "Open Data");
TLatex header;
data_label.SetTextFont(42);
header.SetTextSize(0.035);
header.DrawLatexNDC(0.21, 0.8, "#sqrt{s} = 13 TeV, 10 fb^{-1}");
// Save the plot.
c->SaveAs("df106_HiggsToFourLeptons_cpp.png");
std::cout << "Saved figure to df106_HiggsToFourLeptons_cpp.png" << std::endl;
}
int main()
{
df106_HiggsToFourLeptons();
}