From 98a29d45177775822dbfa8f2177ab262dafb93eb Mon Sep 17 00:00:00 2001 From: Carsten Burgard Date: Wed, 22 Oct 2025 18:02:18 +0200 Subject: [PATCH 1/3] allowing control of binned data via protodataset in hypotest calculator --- roofit/roofitcore/inc/RooGlobalFunc.h | 2 +- roofit/roofitcore/src/RooAbsPdf.cxx | 2 +- roofit/roofitcore/src/RooBinnedGenContext.cxx | 2 +- roofit/roofitcore/src/RooGenContext.cxx | 2 +- roofit/roofitcore/src/RooGlobalFunc.cxx | 2 +- roofit/roostats/inc/RooStats/ToyMCSampler.h | 6 +++--- roofit/roostats/src/HypoTestCalculatorGeneric.cxx | 10 ++++++++-- roofit/roostats/src/ToyMCSampler.cxx | 2 +- 8 files changed, 17 insertions(+), 11 deletions(-) diff --git a/roofit/roofitcore/inc/RooGlobalFunc.h b/roofit/roofitcore/inc/RooGlobalFunc.h index 75f7dda24932c..60bbfb6efb94c 100644 --- a/roofit/roofitcore/inc/RooGlobalFunc.h +++ b/roofit/roofitcore/inc/RooGlobalFunc.h @@ -358,7 +358,7 @@ RooCmdArg Conditional(const RooArgSet& pdfSet, const RooArgSet& depSet, bool dep * @{ */ // RooAbsPdf::generate arguments -RooCmdArg ProtoData(const RooDataSet& protoData, bool randomizeOrder=false, bool resample=false) ; +RooCmdArg ProtoData(const RooAbsData& protoData, bool randomizeOrder=false, bool resample=false) ; RooCmdArg NumEvents(Int_t numEvents) ; RooCmdArg NumEvents(double numEvents) ; RooCmdArg AutoBinned(bool flag=true) ; diff --git a/roofit/roofitcore/src/RooAbsPdf.cxx b/roofit/roofitcore/src/RooAbsPdf.cxx index 4e4b050f17769..ffec802c53d32 100644 --- a/roofit/roofitcore/src/RooAbsPdf.cxx +++ b/roofit/roofitcore/src/RooAbsPdf.cxx @@ -1191,7 +1191,7 @@ RooAbsGenContext* RooAbsPdf::autoGenContext(const RooArgSet &vars, const RooData /// as binned generation is always executed at the top-level node for a regular /// PDF, so for those it only mattes that the top-level node is tagged. /// -/// ProtoData(const RooDataSet& data, bool randOrder) +/// ProtoData(const RooAbsData& data, bool randOrder) /// Use specified dataset as prototype dataset. If randOrder in ProtoData() is set to true, /// the order of the events in the dataset will be read in a random order if the requested /// number of events to be generated does not match the number of events in the prototype dataset. diff --git a/roofit/roofitcore/src/RooBinnedGenContext.cxx b/roofit/roofitcore/src/RooBinnedGenContext.cxx index 3a53b28e30eb9..dbcc5652e1bf3 100644 --- a/roofit/roofitcore/src/RooBinnedGenContext.cxx +++ b/roofit/roofitcore/src/RooBinnedGenContext.cxx @@ -60,7 +60,7 @@ RooBinnedGenContext::RooBinnedGenContext(const RooAbsPdf &model, const RooArgSet if (prototype) { RooArgSet coefNSet(vars) ; - coefNSet.add(*prototype->get()) ; + coefNSet.add(*prototype->get(),true) ; _pdf->fixAddCoefNormalization(coefNSet) ; } diff --git a/roofit/roofitcore/src/RooGenContext.cxx b/roofit/roofitcore/src/RooGenContext.cxx index ed19687122f9d..5dfb2ea416395 100644 --- a/roofit/roofitcore/src/RooGenContext.cxx +++ b/roofit/roofitcore/src/RooGenContext.cxx @@ -87,7 +87,7 @@ RooGenContext::RooGenContext(const RooAbsPdf &model, const RooArgSet &vars, // Optionally fix RooAddPdf normalizations if (prototype&&_pdfClone->dependsOn(*prototype->get())) { RooArgSet fullNormSet(vars) ; - fullNormSet.add(*prototype->get()) ; + fullNormSet.add(*prototype->get(),true) ; _pdfClone->fixAddCoefNormalization(fullNormSet) ; } diff --git a/roofit/roofitcore/src/RooGlobalFunc.cxx b/roofit/roofitcore/src/RooGlobalFunc.cxx index a9bd2713f6002..c4e0bfa49defb 100644 --- a/roofit/roofitcore/src/RooGlobalFunc.cxx +++ b/roofit/roofitcore/src/RooGlobalFunc.cxx @@ -807,7 +807,7 @@ RooCmdArg Conditional(const RooArgSet &pdfSet, const RooArgSet &depSet, bool dep }; // RooAbsPdf::generate arguments -RooCmdArg ProtoData(const RooDataSet &protoData, bool randomizeOrder, bool resample) +RooCmdArg ProtoData(const RooAbsData &protoData, bool randomizeOrder, bool resample) { return RooCmdArg("PrototypeData", randomizeOrder, resample, 0, 0, nullptr, nullptr, &protoData, nullptr); } diff --git a/roofit/roostats/inc/RooStats/ToyMCSampler.h b/roofit/roostats/inc/RooStats/ToyMCSampler.h index 4da726a460a53..fea96386e8907 100644 --- a/roofit/roostats/inc/RooStats/ToyMCSampler.h +++ b/roofit/roostats/inc/RooStats/ToyMCSampler.h @@ -229,14 +229,14 @@ class ToyMCSampler: public TestStatSampler { fAdaptiveLowLimit = low_threshold; } - void SetProtoData(const RooDataSet* d) { fProtoData = d; } + void SetProtoData(const RooAbsData* d) { fProtoData = d; } protected: const RooArgList* EvaluateAllTestStatistics(RooAbsData& data, const RooArgSet& poi, DetailedOutputAggregator& detOutAgg); /// helper for GenerateToyData - std::unique_ptr Generate(RooAbsPdf &pdf, RooArgSet &observables, const RooDataSet *protoData=nullptr, int forceEvents=0) const; + std::unique_ptr Generate(RooAbsPdf &pdf, RooArgSet &observables, const RooAbsData *protoData=nullptr, int forceEvents=0) const; /// helper method for clearing the cache virtual void ClearCache(); @@ -272,7 +272,7 @@ class ToyMCSampler: public TestStatSampler { double fAdaptiveLowLimit; double fAdaptiveHighLimit; - const RooDataSet *fProtoData = nullptr; ///< in dev + const RooAbsData *fProtoData = nullptr; ///< in dev mutable NuisanceParametersSampler *fNuisanceParametersSampler = nullptr; ///SetProtoData(&data); + const bool dataIsBinned = dynamic_cast(fData) != nullptr; + toymcs->SetGenerateBinned(dataIsBinned); // if observed is RooDataHist -> generate RooDataHist toys + + fDefaultSampler = toymcs; + fTestStatSampler = toymcs; } diff --git a/roofit/roostats/src/ToyMCSampler.cxx b/roofit/roostats/src/ToyMCSampler.cxx index 4249038bf13cc..a0cd86392a808 100644 --- a/roofit/roostats/src/ToyMCSampler.cxx +++ b/roofit/roostats/src/ToyMCSampler.cxx @@ -476,7 +476,7 @@ RooAbsData* ToyMCSampler::GenerateToyData(RooArgSet& paramPoint, double& weight, /// or whether it should use the expected number of events. It also takes /// into account the option to generate a binned data set (*i.e.* RooDataHist). -std::unique_ptr ToyMCSampler::Generate(RooAbsPdf &pdf, RooArgSet &observables, const RooDataSet* protoData, int forceEvents) const { +std::unique_ptr ToyMCSampler::Generate(RooAbsPdf &pdf, RooArgSet &observables, const RooAbsData* protoData, int forceEvents) const { if(fProtoData) { protoData = fProtoData; From 6dbedd7eb8fb8d0b3252ce9e4858695811d425c3 Mon Sep 17 00:00:00 2001 From: Carsten Burgard Date: Thu, 23 Oct 2025 11:23:25 +0200 Subject: [PATCH 2/3] addec contributors to README --- README/ReleaseNotes/v638/index.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README/ReleaseNotes/v638/index.md b/README/ReleaseNotes/v638/index.md index 328ef46d9ff86..bee35287e8efc 100644 --- a/README/ReleaseNotes/v638/index.md +++ b/README/ReleaseNotes/v638/index.md @@ -13,7 +13,9 @@ The following people have contributed to this new version: Bertrand Bellenot, CERN/EP-SFT,\ Jakob Blomer, CERN/EP-SFT,\ Lukas Breitwieser, CERN/EP-SFT,\ + Carsten Burgard, University of Hamburg and TU Dortmund,\ Philippe Canal, FNAL,\ + Simon Cello, TU Dortmund,\ Olivier Couet, CERN/EP-SFT,\ Marta Czurylo, CERN/EP-SFT,\ Florine de Geus, CERN/EP-SFT and University of Twente,\ From b2f731f344c13de08f9ab65cde82dc0b4324365f Mon Sep 17 00:00:00 2001 From: Carsten Burgard Date: Fri, 24 Oct 2025 10:52:00 +0200 Subject: [PATCH 3/3] clang format applied --- roofit/roofitcore/inc/RooGlobalFunc.h | 2 +- roofit/roofitcore/src/RooAbsPdf.cxx | 23 ++++++------ roofit/roofitcore/src/RooBinnedGenContext.cxx | 2 +- roofit/roofitcore/src/RooGenContext.cxx | 2 +- roofit/roostats/inc/RooStats/ToyMCSampler.h | 5 ++- .../src/HypoTestCalculatorGeneric.cxx | 4 +- roofit/roostats/src/ToyMCSampler.cxx | 37 ++++++++++--------- 7 files changed, 40 insertions(+), 35 deletions(-) diff --git a/roofit/roofitcore/inc/RooGlobalFunc.h b/roofit/roofitcore/inc/RooGlobalFunc.h index 60bbfb6efb94c..5f4268256737c 100644 --- a/roofit/roofitcore/inc/RooGlobalFunc.h +++ b/roofit/roofitcore/inc/RooGlobalFunc.h @@ -358,7 +358,7 @@ RooCmdArg Conditional(const RooArgSet& pdfSet, const RooArgSet& depSet, bool dep * @{ */ // RooAbsPdf::generate arguments -RooCmdArg ProtoData(const RooAbsData& protoData, bool randomizeOrder=false, bool resample=false) ; +RooCmdArg ProtoData(const RooAbsData &protoData, bool randomizeOrder = false, bool resample = false); RooCmdArg NumEvents(Int_t numEvents) ; RooCmdArg NumEvents(double numEvents) ; RooCmdArg AutoBinned(bool flag=true) ; diff --git a/roofit/roofitcore/src/RooAbsPdf.cxx b/roofit/roofitcore/src/RooAbsPdf.cxx index ffec802c53d32..983c157eeae35 100644 --- a/roofit/roofitcore/src/RooAbsPdf.cxx +++ b/roofit/roofitcore/src/RooAbsPdf.cxx @@ -1157,8 +1157,6 @@ RooAbsGenContext* RooAbsPdf::autoGenContext(const RooArgSet &vars, const RooData return context ; } - - //////////////////////////////////////////////////////////////////////////////// /// Generate a new dataset containing the specified variables with events sampled from our distribution. /// Generate the specified number of events or expectedEvents() if not specified. @@ -1179,11 +1177,13 @@ RooAbsGenContext* RooAbsPdf::autoGenContext(const RooArgSet &vars, const RooData /// `Extended()` If no number of events to be generated is given, /// use expected number of events from extended likelihood term. /// This evidently only works for extended PDFs. -/// `GenBinned(const char* tag)` Use binned generation for all component pdfs that have 'setAttribute(tag)' set -/// `AutoBinned(bool flag)` Automatically deploy binned generation for binned distributions (e.g. RooHistPdf, sums and products of +/// `GenBinned(const char* tag)` Use binned generation for all component pdfs that have +/// 'setAttribute(tag)' set `AutoBinned(bool flag)` Automatically deploy binned generation for +/// binned distributions (e.g. RooHistPdf, sums and products of /// RooHistPdfs etc) /// \note Datasets that are generated in binned mode are returned as weighted unbinned datasets. This means that -/// for each bin, there will be one event in the dataset with a weight corresponding to the (possibly randomised) bin content. +/// for each bin, there will be one event in the dataset with a weight corresponding to the (possibly randomised) bin +/// content. /// /// /// `AllBinned()` As above, but for all components. @@ -1195,12 +1195,13 @@ RooAbsGenContext* RooAbsPdf::autoGenContext(const RooArgSet &vars, const RooData /// Use specified dataset as prototype dataset. If randOrder in ProtoData() is set to true, /// the order of the events in the dataset will be read in a random order if the requested /// number of events to be generated does not match the number of events in the prototype dataset. -/// \note If ProtoData() is used, the specified existing dataset as a prototype: the new dataset will contain -/// the same number of events as the prototype (unless otherwise specified), and any prototype variables not in -/// whatVars will be copied into the new dataset for each generated event and also used to set our PDF parameters. -/// The user can specify a number of events to generate that will override the default. The result is a -/// copy of the prototype dataset with only variables in whatVars randomized. Variables in whatVars that -/// are not in the prototype will be added as new columns to the generated dataset. +/// \note If ProtoData() is used, the specified existing dataset as a prototype: the new dataset will +/// contain the same number of events as the prototype (unless otherwise specified), and any prototype +/// variables not in whatVars will be copied into the new dataset for each generated event and also used +/// to set our PDF parameters. The user can specify a number of events to generate that will override the +/// default. The result is a copy of the prototype dataset with only variables in whatVars randomized. +/// Variables in whatVars that are not in the prototype will be added as new columns to the generated +/// dataset. /// /// /// diff --git a/roofit/roofitcore/src/RooBinnedGenContext.cxx b/roofit/roofitcore/src/RooBinnedGenContext.cxx index dbcc5652e1bf3..7da5af80d7820 100644 --- a/roofit/roofitcore/src/RooBinnedGenContext.cxx +++ b/roofit/roofitcore/src/RooBinnedGenContext.cxx @@ -60,7 +60,7 @@ RooBinnedGenContext::RooBinnedGenContext(const RooAbsPdf &model, const RooArgSet if (prototype) { RooArgSet coefNSet(vars) ; - coefNSet.add(*prototype->get(),true) ; + coefNSet.add(*prototype->get(), true); _pdf->fixAddCoefNormalization(coefNSet) ; } diff --git a/roofit/roofitcore/src/RooGenContext.cxx b/roofit/roofitcore/src/RooGenContext.cxx index 5dfb2ea416395..a08f481d45c81 100644 --- a/roofit/roofitcore/src/RooGenContext.cxx +++ b/roofit/roofitcore/src/RooGenContext.cxx @@ -87,7 +87,7 @@ RooGenContext::RooGenContext(const RooAbsPdf &model, const RooArgSet &vars, // Optionally fix RooAddPdf normalizations if (prototype&&_pdfClone->dependsOn(*prototype->get())) { RooArgSet fullNormSet(vars) ; - fullNormSet.add(*prototype->get(),true) ; + fullNormSet.add(*prototype->get(), true); _pdfClone->fixAddCoefNormalization(fullNormSet) ; } diff --git a/roofit/roostats/inc/RooStats/ToyMCSampler.h b/roofit/roostats/inc/RooStats/ToyMCSampler.h index fea96386e8907..7967fec7705c7 100644 --- a/roofit/roostats/inc/RooStats/ToyMCSampler.h +++ b/roofit/roostats/inc/RooStats/ToyMCSampler.h @@ -229,14 +229,15 @@ class ToyMCSampler: public TestStatSampler { fAdaptiveLowLimit = low_threshold; } - void SetProtoData(const RooAbsData* d) { fProtoData = d; } + void SetProtoData(const RooAbsData *d) { fProtoData = d; } protected: const RooArgList* EvaluateAllTestStatistics(RooAbsData& data, const RooArgSet& poi, DetailedOutputAggregator& detOutAgg); /// helper for GenerateToyData - std::unique_ptr Generate(RooAbsPdf &pdf, RooArgSet &observables, const RooAbsData *protoData=nullptr, int forceEvents=0) const; + std::unique_ptr Generate(RooAbsPdf &pdf, RooArgSet &observables, + const RooAbsData *protoData = nullptr, int forceEvents = 0) const; /// helper method for clearing the cache virtual void ClearCache(); diff --git a/roofit/roostats/src/HypoTestCalculatorGeneric.cxx b/roofit/roostats/src/HypoTestCalculatorGeneric.cxx index e9923cabe685c..1b5713c8c6b67 100644 --- a/roofit/roostats/src/HypoTestCalculatorGeneric.cxx +++ b/roofit/roostats/src/HypoTestCalculatorGeneric.cxx @@ -66,8 +66,8 @@ HypoTestCalculatorGeneric::HypoTestCalculatorGeneric( auto toymcs = new ToyMCSampler(*fDefaultTestStat, 1000); // --- Ensure the ToyMCSampler generates toys with the same structure as the observed data toymcs->SetProtoData(&data); - const bool dataIsBinned = dynamic_cast(fData) != nullptr; - toymcs->SetGenerateBinned(dataIsBinned); // if observed is RooDataHist -> generate RooDataHist toys + const bool dataIsBinned = dynamic_cast(fData) != nullptr; + toymcs->SetGenerateBinned(dataIsBinned); // if observed is RooDataHist -> generate RooDataHist toys fDefaultSampler = toymcs; fTestStatSampler = toymcs; diff --git a/roofit/roostats/src/ToyMCSampler.cxx b/roofit/roostats/src/ToyMCSampler.cxx index a0cd86392a808..582c8b93d18a4 100644 --- a/roofit/roostats/src/ToyMCSampler.cxx +++ b/roofit/roostats/src/ToyMCSampler.cxx @@ -476,27 +476,30 @@ RooAbsData* ToyMCSampler::GenerateToyData(RooArgSet& paramPoint, double& weight, /// or whether it should use the expected number of events. It also takes /// into account the option to generate a binned data set (*i.e.* RooDataHist). -std::unique_ptr ToyMCSampler::Generate(RooAbsPdf &pdf, RooArgSet &observables, const RooAbsData* protoData, int forceEvents) const { +std::unique_ptr +ToyMCSampler::Generate(RooAbsPdf &pdf, RooArgSet &observables, const RooAbsData *protoData, int forceEvents) const +{ - if(fProtoData) { - protoData = fProtoData; - forceEvents = protoData->numEntries(); - } + if (fProtoData) { + protoData = fProtoData; + forceEvents = protoData->numEntries(); + } - std::unique_ptr data; - int events = forceEvents; - if(events == 0) events = fNEvents; + std::unique_ptr data; + int events = forceEvents; + if (events == 0) + events = fNEvents; - // cannot use multigen when the nuisance parameters change for every toy - bool useMultiGen = (fUseMultiGen || fgAlwaysUseMultiGen) && !fNuisanceParametersSampler; + // cannot use multigen when the nuisance parameters change for every toy + bool useMultiGen = (fUseMultiGen || fgAlwaysUseMultiGen) && !fNuisanceParametersSampler; - if (events == 0) { - if (!pdf.canBeExtended() || pdf.expectedEvents(observables) <= 0) { - std::stringstream ss; - ss << "ToyMCSampler: Error : pdf is not extended and number of events per toy is zero"; - oocoutE(nullptr,InputArguments) << ss.str() << std::endl; - throw std::runtime_error(ss.str()); - } + if (events == 0) { + if (!pdf.canBeExtended() || pdf.expectedEvents(observables) <= 0) { + std::stringstream ss; + ss << "ToyMCSampler: Error : pdf is not extended and number of events per toy is zero"; + oocoutE(nullptr, InputArguments) << ss.str() << std::endl; + throw std::runtime_error(ss.str()); + } if(fGenerateBinned) { if(protoData) data = std::unique_ptr{pdf.generate(observables, AllBinned(), Extended(), ProtoData(*protoData, true, true))}; else data = std::unique_ptr{pdf.generate(observables, AllBinned(), Extended())};