In [1]:
import ROOT
from ROOT import TMVA

In [2]:
#Creating output file, factory object and opening input files
outputFile = ROOT.TFile("BDTresultfiles/TMVAfinal10.root", "RECREATE")
factory = TMVA.Factory("tmvaTest", outputFile, "")
dataLoader = TMVA.DataLoader("dataset")

trainFile = ROOT.TFile("trainData.root")
testFile = ROOT.TFile("testData.root")

In [3]:
#Getting the TTree objects from input files
sigTrain = trainFile.Get("sig")
bkgTrain = trainFile.Get("bkg")
nSigTrain = sigTrain.GetEntries()
nBkgTrain = bkgTrain.GetEntries()

sigTest = testFile.Get("sig")
bkgTest = testFile.Get("bkg")
nSigTest = sigTest.GetEntries()
nBkgTest = bkgTest.GetEntries()

In [4]:
#Print num events
print(f"{nSigTrain}, {nBkgTrain}")
print(f"{nSigTest}, {nBkgTest}")

#GlobalEventWeights
#Thus for equivalent weights
sigWeight = 1.0
bkgWeight = float(nSigTrain)/float(nBkgTrain)
print(f"{sigWeight}, {bkgWeight}")

1049610, 15649959
1048532, 15652710
1.0, 0.06706790733445372


In [5]:
dataLoader.AddSignalTree(sigTrain, sigWeight, TMVA.Types.kTraining)
dataLoader.AddBackgroundTree(bkgTrain, bkgWeight, TMVA.Types.kTraining)
dataLoader.AddSignalTree(sigTest, sigWeight, TMVA.Types.kTesting)
dataLoader.AddBackgroundTree(bkgTest, bkgWeight, TMVA.Types.kTesting)

<HEADER> DataSetInfo              : [dataset] : Added class "Signal"
                         : Add Tree sig of type Signal with 1049610 events
<HEADER> DataSetInfo              : [dataset] : Added class "Background"
                         : Add Tree bkg of type Background with 15649959 events
                         : Add Tree sig of type Signal with 1048532 events
                         : Add Tree bkg of type Background with 15652710 events


In [6]:
#Define the input variables that shall be used for the MVA training
#(the variables used in the expression must exist in the original TTree).
dataLoader.AddVariable("ADC_mean", 'F')
dataLoader.AddVariable("nhits_min", 'F')
dataLoader.AddVariable("entry_dist", 'F')
dataLoader.AddVariable("exit_dist", 'F')
dataLoader.AddVariable("docasqrx_max", 'F')
dataLoader.AddVariable("docasqry_max", 'F')

In [7]:
factory.BookMethod(dataLoader, TMVA.Types.kBDT, "BDTfinal10","NTrees=800:BoostType=AdaBoost:MaxDepth=5:MinNodeSize=1.0:AdaBoostBeta=0.5:nCuts=50:UseBaggedBoost=True:BaggedSampleFraction=0.6"
)


<cppyy.gbl.TMVA.MethodBDT object at 0x63bb359f8f50>

<HEADER> Factory                  : Booking method: BDTfinal10
                         : 
                         : Rebuilding Dataset dataset
                         : Building event vectors for type 0 Signal
                         : Dataset[dataset] :  create input formulas for tree sig
                         : Building event vectors for type 1 Signal
                         : Dataset[dataset] :  create input formulas for tree sig
                         : Building event vectors for type 0 Background
                         : Dataset[dataset] :  create input formulas for tree bkg
                         : Building event vectors for type 1 Background
                         : Dataset[dataset] :  create input formulas for tree bkg
<HEADER> DataSetFactory           : [dataset] : Number of events in input trees
                         : 
                         : 
                         : Dataset[dataset] : Weight renormalisation mode: "EqualNumEvents": renormalises all e

In [8]:
#Train, Test and Evaluate all methods
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()

<HEADER> Factory                  : Train all methods
<HEADER> Factory                  : [dataset] : Create Transformation "I" with events from all classes.
                         : 
<HEADER>                          : Transformation, Variable selection : 
                         : Input : variable 'ADC_mean' <---> Output : variable 'ADC_mean'
                         : Input : variable 'nhits_min' <---> Output : variable 'nhits_min'
                         : Input : variable 'entry_dist' <---> Output : variable 'entry_dist'
                         : Input : variable 'exit_dist' <---> Output : variable 'exit_dist'
                         : Input : variable 'docasqrx_max' <---> Output : variable 'docasqrx_max'
                         : Input : variable 'docasqry_max' <---> Output : variable 'docasqry_max'
<HEADER> TFHandler_Factory        :     Variable            Mean            RMS    [        Min            Max ]
                         : ------------------------------------

0%, time left: unknown
6%, time left: 2 hrs : 37 mins
12%, time left: 2 hrs : 25 mins
18%, time left: 2 hrs : 43 mins
25%, time left: 2 hrs : 29 mins
31%, time left: 2 hrs : 12 mins
37%, time left: 2 hrs : 2 mins
43%, time left: 1 hr : 51 mins
50%, time left: 1 hr : 38 mins
56%, time left: 1 hr : 25 mins
62%, time left: 1 hr : 14 mins
68%, time left: 1 hr : 2 mins
75%, time left: 50 mins
81%, time left: 38 mins
87%, time left: 26 mins
93%, time left: 13 mins
0%, time left: unknown
6%, time left: 1 hr : 20 mins
12%, time left: 1 hr : 6 mins
18%, time left: 53 mins
25%, time left: 47 mins
31%, time left: 41 mins
37%, time left: 35 mins
43%, time left: 31 mins
50%, time left: 25 mins
56%, time left: 22 mins
62%, time left: 19 mins
68%, time left: 16 mins
75%, time left: 12 mins
81%, time left: 9 mins
87%, time left: 6 mins
93%, time left: 177 sec
0%, time left: unknown
6%, time left: 48 mins
12%, time left: 32 mins
18%, time left: 26 mins
25%, time left: 22 mins
31%, time left: 20 mins
37

In [9]:
#Save output and finish up
outputFile.Close()
print("wrote root file TMVA.root")
print("TMVA analysis is done")

wrote root file TMVA.root
TMVA analysis is done
