TMVA BDT grad boost (#706)

* MethodBDT annotate gradboost * MethodBDT remove binary reweighting for multiclass * MethodBDT increase response scaling to match lit. for multiclass See Jerome H. Freidman "Greedy Function Approximation: A Gradient Boosting Machine" for details (p.1201). * Fix gradboost response event weight The response of the gradient boosting was calculated using `sum(|y_ik|(1-|y_ik|)) * w^2` for each decision node region where w is the event weight and y_ik is the pseudoresidual. This fix changes the weighing to `sum(|y_ik|(1-|y_ik|)) * w`, that is the first expression is weighted linearly per event. Symptoms include heavy bias towards events with w >> 1 and elimination of events with w << 1. * clang-format
root-project · Jul 18, 2017 · a780e91 · a780e91
1 parent 8b0280b
commit a780e91
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 6 deletions.
diff --git a/tmva/tmva/src/DecisionTree.cxx b/tmva/tmva/src/DecisionTree.cxx
@@ -1714,10 +1714,12 @@ Double_t TMVA::DecisionTree::CheckEvent( const TMVA::Event * e, Bool_t UseYesNoL
 
    }
 
-   if ( DoRegression() ){
+   if (DoRegression()) {
+      // Note: This path is also taken for MethodBDT with analysis type
+      // kClassification and kMulticlass when using GradBoost.
+      // See TMVA::MethodBDT::InitGradBoost
       return current->GetResponse();
-   }
-   else {
+   } else {
       if (UseYesNoLeaf) return Double_t ( current->GetNodeType() );
       else              return current->GetPurity();
    }

diff --git a/tmva/tmva/src/MethodBDT.cxx b/tmva/tmva/src/MethodBDT.cxx
@@ -850,7 +850,12 @@ void TMVA::MethodBDT::InitEventSample( void )
       if (fPairNegWeightsGlobal) PreProcessNegativeEventWeights();
    }
 
-   if (!DoRegression() && !fSkipNormalization){
+   if (DoRegression()) {
+      // Regression, no reweighting to do
+   } else if (DoMulticlass()) {
+      // Multiclass, only gradboost is supported. No reweighting.
+   } else if (!fSkipNormalization) {
+      // Binary classification.
       Log() << kDEBUG << "\t<InitEventSample> For classification trees, "<< Endl;
       Log() << kDEBUG << " \tthe effective number of backgrounds is scaled to match "<<Endl;
       Log() << kDEBUG << " \tthe signal. Otherwise the first boosting step would do 'just that'!"<<Endl;
@@ -1281,8 +1286,12 @@ void TMVA::MethodBDT::Train()
                   << "Please change boost option accordingly (GradBoost)."
                   << Endl;
          }
+
          UInt_t nClasses = DataInfo().GetNClasses();
          for (UInt_t i=0;i<nClasses;i++){
+            // Careful: If fSepType is nullptr, the tree will be considered a regression tree and
+            // use the correct output for gradboost (response rather than yesnoleaf) in checkEvent.
+            // See TMVA::MethodBDT::InitGradBoost.
             fForest.push_back( new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), i,
                                                  fRandomisedTrees, fUseNvars, fUsePoissonNvars, fMaxDepth,
                                                  itree*nClasses+i, fNodePurityLimit, itree*nClasses+1));
@@ -1492,14 +1501,15 @@ Double_t TMVA::MethodBDT::GradBoost(std::vector<const TMVA::Event*>& eventSample
       auto &v = leaves[node];
       auto target = e->GetTarget(cls);
       v.sumWeightTarget += target * weight;
-      v.sum2 += fabs(target) * (1.0-fabs(target)) * weight * weight;
+      v.sum2 += fabs(target) * (1.0 - fabs(target)) * weight;
    }
    for (auto &iLeave : leaves) {
       constexpr auto minValue = 1e-30;
       if (iLeave.second.sum2 < minValue) {
          iLeave.second.sum2 = minValue;
       }
-      iLeave.first->SetResponse(fShrinkage/DataInfo().GetNClasses() * iLeave.second.sumWeightTarget/iLeave.second.sum2);
+      const Double_t K = DataInfo().GetNClasses();
+      iLeave.first->SetResponse(fShrinkage * (K - 1) / K * iLeave.second.sumWeightTarget / iLeave.second.sum2);
    }
 
    //call UpdateTargets before next tree is grown