In [11]:
import ROOT
from ROOT import TMVA

In [12]:
#Creating output file, factory object and opening input files
outputFile = ROOT.TFile("TMVAcheck.root", "RECREATE")
factory = TMVA.Factory("tmvaTest", outputFile, "")
dataLoader = TMVA.DataLoader("dataset")

trainFile = ROOT.TFile("smalltrainData.root")
testFile = ROOT.TFile("smalltestData.root")

In [13]:
#Getting the TTree objects from input files
sigTrain = trainFile.Get("sig")
bkgTrain = trainFile.Get("bkg")
nSigTrain = sigTrain.GetEntries()
nBkgTrain = bkgTrain.GetEntries()

sigTest = testFile.Get("sig")
bkgTest = testFile.Get("bkg")
nSigTest = sigTest.GetEntries()
nBkgTest = bkgTest.GetEntries()

In [14]:
#Print num events
print(f"{nSigTrain}, {nBkgTrain}")
print(f"{nSigTest}, {nBkgTest}")

#GlobalEventWeights
#Thus for equivalent weights
sigWeight = 1.0
bkgWeight = float(nSigTrain)/float(nBkgTrain)
print(f"{sigWeight}, {bkgWeight}")

104918, 1565055
105370, 1563066
1.0, 0.0670378996265307


In [15]:
dataLoader.AddSignalTree(sigTrain, sigWeight, TMVA.Types.kTraining)
dataLoader.AddBackgroundTree(bkgTrain, bkgWeight, TMVA.Types.kTraining)
dataLoader.AddSignalTree(sigTest, sigWeight, TMVA.Types.kTesting)
dataLoader.AddBackgroundTree(bkgTest, bkgWeight, TMVA.Types.kTesting)

<HEADER> DataSetInfo              : [dataset] : Added class "Signal"
                         : Add Tree sig of type Signal with 104918 events
<HEADER> DataSetInfo              : [dataset] : Added class "Background"
                         : Add Tree bkg of type Background with 1565055 events
                         : Add Tree sig of type Signal with 105370 events
                         : Add Tree bkg of type Background with 1563066 events


In [16]:
#Define the input variables that shall be used for the MVA training
#(the variables used in the expression must exist in the original TTree).
# dataLoader.AddVariable("ADC_mean", 'F')
dataLoader.AddVariable("nhits_min", 'F')
dataLoader.AddVariable("entry_dist", 'F')
# dataLoader.AddVariable("exit_dist", 'F')
dataLoader.AddVariable("docasqrx_max", 'F')
dataLoader.AddVariable("docasqry_max", 'F')

In [17]:
factory.BookMethod(dataLoader, TMVA.Types.kBDT, "BDTcheck","NTrees=300:BoostType=AdaBoost:MaxDepth=3:MinNodeSize=2.5:Shrinkage=0.1:nCuts=20:UseBaggedBoost=True:BaggedSampleFraction=0.5")


<cppyy.gbl.TMVA.MethodBDT object at 0x62ded7a9ced0>

<HEADER> Factory                  : Booking method: BDTcheck
                         : 
                         : Rebuilding Dataset dataset
                         : Building event vectors for type 0 Signal
                         : Dataset[dataset] :  create input formulas for tree sig
                         : Building event vectors for type 1 Signal
                         : Dataset[dataset] :  create input formulas for tree sig
                         : Building event vectors for type 0 Background
                         : Dataset[dataset] :  create input formulas for tree bkg
                         : Building event vectors for type 1 Background
                         : Dataset[dataset] :  create input formulas for tree bkg
<HEADER> DataSetFactory           : [dataset] : Number of events in input trees
                         : 
                         : 
                         : Dataset[dataset] : Weight renormalisation mode: "EqualNumEvents": renormalises all eve

In [18]:
#Train, Test and Evaluate all methods
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()

<HEADER> Factory                  : Train all methods
<HEADER> Factory                  : [dataset] : Create Transformation "I" with events from all classes.
                         : 
<HEADER>                          : Transformation, Variable selection : 
                         : Input : variable 'nhits_min' <---> Output : variable 'nhits_min'
                         : Input : variable 'entry_dist' <---> Output : variable 'entry_dist'
                         : Input : variable 'docasqrx_max' <---> Output : variable 'docasqrx_max'
                         : Input : variable 'docasqry_max' <---> Output : variable 'docasqry_max'
<HEADER> TFHandler_Factory        :     Variable            Mean            RMS    [        Min            Max ]
                         : -------------------------------------------------------------------------------
                         :    nhits_min:        62.386        49.067   [        0.0000        713.00 ]
                         :   entry_

0%, time left: unknown
6%, time left: 149 sec
13%, time left: 141 sec
19%, time left: 129 sec
25%, time left: 119 sec
31%, time left: 111 sec
38%, time left: 106 sec
44%, time left: 94 sec
50%, time left: 81 sec
56%, time left: 69 sec
63%, time left: 58 sec
69%, time left: 48 sec
75%, time left: 38 sec
81%, time left: 28 sec
88%, time left: 18 sec
94%, time left: 9 sec
0%, time left: unknown
6%, time left: 18 sec
12%, time left: 17 sec
18%, time left: 15 sec
25%, time left: 13 sec
31%, time left: 12 sec
37%, time left: 11 sec
43%, time left: 10 sec
50%, time left: 8 sec
56%, time left: 7 sec
62%, time left: 6 sec
68%, time left: 5 sec
75%, time left: 4 sec
81%, time left: 3 sec
87%, time left: 2 sec
93%, time left: 1 sec
0%, time left: unknown
6%, time left: 13 sec
12%, time left: 12 sec
18%, time left: 12 sec
25%, time left: 10 sec
31%, time left: 10 sec
37%, time left: 9 sec
43%, time left: 8 sec
50%, time left: 7 sec
56%, time left: 6 sec
62%, time left: 5 sec
68%, time left: 4 sec


In [19]:
#Save output and finish up
outputFile.Close()
print("wrote root file TMVA.root")
print("TMVA analysis is done")

wrote root file TMVA.root
TMVA analysis is done
