From a780e91b641526f170b8197abacbcfd9b69121f9 Mon Sep 17 00:00:00 2001 From: Kim Albertsson Date: Tue, 18 Jul 2017 18:55:34 +0200 Subject: [PATCH] TMVA BDT grad boost (#706) * MethodBDT annotate gradboost * MethodBDT remove binary reweighting for multiclass * MethodBDT increase response scaling to match lit. for multiclass See Jerome H. Freidman "Greedy Function Approximation: A Gradient Boosting Machine" for details (p.1201). * Fix gradboost response event weight The response of the gradient boosting was calculated using `sum(|y_ik|(1-|y_ik|)) * w^2` for each decision node region where w is the event weight and y_ik is the pseudoresidual. This fix changes the weighing to `sum(|y_ik|(1-|y_ik|)) * w`, that is the first expression is weighted linearly per event. Symptoms include heavy bias towards events with w >> 1 and elimination of events with w << 1. * clang-format --- tmva/tmva/src/DecisionTree.cxx | 8 +++++--- tmva/tmva/src/MethodBDT.cxx | 16 +++++++++++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/tmva/tmva/src/DecisionTree.cxx b/tmva/tmva/src/DecisionTree.cxx index 05063dcb6515f..052a791feee30 100644 --- a/tmva/tmva/src/DecisionTree.cxx +++ b/tmva/tmva/src/DecisionTree.cxx @@ -1714,10 +1714,12 @@ Double_t TMVA::DecisionTree::CheckEvent( const TMVA::Event * e, Bool_t UseYesNoL } - if ( DoRegression() ){ + if (DoRegression()) { + // Note: This path is also taken for MethodBDT with analysis type + // kClassification and kMulticlass when using GradBoost. + // See TMVA::MethodBDT::InitGradBoost return current->GetResponse(); - } - else { + } else { if (UseYesNoLeaf) return Double_t ( current->GetNodeType() ); else return current->GetPurity(); } diff --git a/tmva/tmva/src/MethodBDT.cxx b/tmva/tmva/src/MethodBDT.cxx index 1dd547c9be08a..d0deefae4b564 100644 --- a/tmva/tmva/src/MethodBDT.cxx +++ b/tmva/tmva/src/MethodBDT.cxx @@ -850,7 +850,12 @@ void TMVA::MethodBDT::InitEventSample( void ) if (fPairNegWeightsGlobal) PreProcessNegativeEventWeights(); } - if (!DoRegression() && !fSkipNormalization){ + if (DoRegression()) { + // Regression, no reweighting to do + } else if (DoMulticlass()) { + // Multiclass, only gradboost is supported. No reweighting. + } else if (!fSkipNormalization) { + // Binary classification. Log() << kDEBUG << "\t For classification trees, "<< Endl; Log() << kDEBUG << " \tthe effective number of backgrounds is scaled to match "<& eventSample auto &v = leaves[node]; auto target = e->GetTarget(cls); v.sumWeightTarget += target * weight; - v.sum2 += fabs(target) * (1.0-fabs(target)) * weight * weight; + v.sum2 += fabs(target) * (1.0 - fabs(target)) * weight; } for (auto &iLeave : leaves) { constexpr auto minValue = 1e-30; if (iLeave.second.sum2 < minValue) { iLeave.second.sum2 = minValue; } - iLeave.first->SetResponse(fShrinkage/DataInfo().GetNClasses() * iLeave.second.sumWeightTarget/iLeave.second.sum2); + const Double_t K = DataInfo().GetNClasses(); + iLeave.first->SetResponse(fShrinkage * (K - 1) / K * iLeave.second.sumWeightTarget / iLeave.second.sum2); } //call UpdateTargets before next tree is grown