From a780e91b641526f170b8197abacbcfd9b69121f9 Mon Sep 17 00:00:00 2001
From: Kim Albertsson <ketost@gmail.com>
Date: Tue, 18 Jul 2017 18:55:34 +0200
Subject: [PATCH] TMVA BDT grad boost (#706)

* MethodBDT annotate gradboost

* MethodBDT remove binary reweighting for multiclass

* MethodBDT increase response scaling to match lit. for multiclass

See Jerome H. Freidman "Greedy Function Approximation: A Gradient
Boosting Machine" for details (p.1201).

* Fix gradboost response event weight

The response of the gradient boosting was calculated using
`sum(|y_ik|(1-|y_ik|)) * w^2` for each decision node region
where w is the event weight and y_ik is the pseudoresidual.

This fix changes the weighing to `sum(|y_ik|(1-|y_ik|)) * w`,
that is the first expression is weighted linearly per event.

Symptoms include heavy bias towards events with w >> 1 and
elimination of events with w << 1.

* clang-format
---
 tmva/tmva/src/DecisionTree.cxx |  8 +++++---
 tmva/tmva/src/MethodBDT.cxx    | 16 +++++++++++++---
 2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/tmva/tmva/src/DecisionTree.cxx b/tmva/tmva/src/DecisionTree.cxx
index 05063dcb6515f..052a791feee30 100644
--- a/tmva/tmva/src/DecisionTree.cxx
+++ b/tmva/tmva/src/DecisionTree.cxx
@@ -1714,10 +1714,12 @@ Double_t TMVA::DecisionTree::CheckEvent( const TMVA::Event * e, Bool_t UseYesNoL
 
    }
 
-   if ( DoRegression() ){
+   if (DoRegression()) {
+      // Note: This path is also taken for MethodBDT with analysis type
+      // kClassification and kMulticlass when using GradBoost.
+      // See TMVA::MethodBDT::InitGradBoost
       return current->GetResponse();
-   }
-   else {
+   } else {
       if (UseYesNoLeaf) return Double_t ( current->GetNodeType() );
       else              return current->GetPurity();
    }
diff --git a/tmva/tmva/src/MethodBDT.cxx b/tmva/tmva/src/MethodBDT.cxx
index 1dd547c9be08a..d0deefae4b564 100644
--- a/tmva/tmva/src/MethodBDT.cxx
+++ b/tmva/tmva/src/MethodBDT.cxx
@@ -850,7 +850,12 @@ void TMVA::MethodBDT::InitEventSample( void )
       if (fPairNegWeightsGlobal) PreProcessNegativeEventWeights();
    }
 
-   if (!DoRegression() && !fSkipNormalization){
+   if (DoRegression()) {
+      // Regression, no reweighting to do
+   } else if (DoMulticlass()) {
+      // Multiclass, only gradboost is supported. No reweighting.
+   } else if (!fSkipNormalization) {
+      // Binary classification.
       Log() << kDEBUG << "\t<InitEventSample> For classification trees, "<< Endl;
       Log() << kDEBUG << " \tthe effective number of backgrounds is scaled to match "<<Endl;
       Log() << kDEBUG << " \tthe signal. Otherwise the first boosting step would do 'just that'!"<<Endl;
@@ -1281,8 +1286,12 @@ void TMVA::MethodBDT::Train()
                   << "Please change boost option accordingly (GradBoost)."
                   << Endl;
          }
+
          UInt_t nClasses = DataInfo().GetNClasses();
          for (UInt_t i=0;i<nClasses;i++){
+            // Careful: If fSepType is nullptr, the tree will be considered a regression tree and
+            // use the correct output for gradboost (response rather than yesnoleaf) in checkEvent.
+            // See TMVA::MethodBDT::InitGradBoost.
             fForest.push_back( new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), i,
                                                  fRandomisedTrees, fUseNvars, fUsePoissonNvars, fMaxDepth,
                                                  itree*nClasses+i, fNodePurityLimit, itree*nClasses+1));
@@ -1492,14 +1501,15 @@ Double_t TMVA::MethodBDT::GradBoost(std::vector<const TMVA::Event*>& eventSample
       auto &v = leaves[node];
       auto target = e->GetTarget(cls);
       v.sumWeightTarget += target * weight;
-      v.sum2 += fabs(target) * (1.0-fabs(target)) * weight * weight;
+      v.sum2 += fabs(target) * (1.0 - fabs(target)) * weight;
    }
    for (auto &iLeave : leaves) {
       constexpr auto minValue = 1e-30;
       if (iLeave.second.sum2 < minValue) {
          iLeave.second.sum2 = minValue;
       }
-      iLeave.first->SetResponse(fShrinkage/DataInfo().GetNClasses() * iLeave.second.sumWeightTarget/iLeave.second.sum2);
+      const Double_t K = DataInfo().GetNClasses();
+      iLeave.first->SetResponse(fShrinkage * (K - 1) / K * iLeave.second.sumWeightTarget / iLeave.second.sum2);
    }
 
    //call UpdateTargets before next tree is grown