In [1]:
import ROOT
from ROOT import TMVA

In [4]:
#Creating output file, factory object and opening input files
outputFile = ROOT.TFile("TMVA.root", "RECREATE")
factory = TMVA.Factory("tmvaTest", outputFile, "")
dataLoader = TMVA.DataLoader("dataset")

trainFile = ROOT.TFile("../modelInputData/trainData.root")
validFile = ROOT.TFile("../modelInputData/validData.root")

In [None]:
#Getting the TTree objects from input files
sigTrain = trainFile.Get("sig")
bkgTrain = trainFile.Get("bkg")
nSigTrain = sigTrain.GetEntries()
nBkgTrain = bkgTrain.GetEntries()

sigTest = validFile.Get("sig")
bkgTest = validFile.Get("bkg")
nSigTest = sigTest.GetEntries()
nBkgTest = bkgTest.GetEntries()

In [None]:
#Print num events
print(f"{nSigTrain}, {nBkgTrain}")
print(f"{nSigTest}, {nBkgTest}")

#GlobalEventWeights
#Thus for equivalent weights
sigWeight = 1.0
bkgWeight = float(nSigTrain)/float(nBkgTrain)
print(f"{sigWeight}, {bkgWeight}")

1259600, 14411455
420370, 4804556
1.0, 0.08740269459260012


In [8]:
dataLoader.AddSignalTree(sigTrain, sigWeight, TMVA.Types.kTraining)
dataLoader.AddBackgroundTree(bkgTrain, bkgWeight, TMVA.Types.kTraining)
dataLoader.AddSignalTree(sigTest, sigWeight, TMVA.Types.kTesting)
dataLoader.AddBackgroundTree(bkgTest, bkgWeight, TMVA.Types.kTesting)

<HEADER> DataSetInfo              : [dataset] : Added class "Signal"
                         : Add Tree sig of type Signal with 1259600 events
<HEADER> DataSetInfo              : [dataset] : Added class "Background"
                         : Add Tree bkg of type Background with 14411455 events
                         : Add Tree sig of type Signal with 420370 events
                         : Add Tree bkg of type Background with 4804556 events


In [9]:
#Define the input variables that shall be used for the MVA training
#(the variables used in the expression must exist in the original TTree).
dataLoader.AddVariable("ADC_mean", 'F')
dataLoader.AddVariable("nhits_min", 'F')
dataLoader.AddVariable("entry_dist", 'F')
dataLoader.AddVariable("exit_dist", 'F')
dataLoader.AddVariable("docasqrx_max", 'F')
dataLoader.AddVariable("docasqry_max", 'F')

In [10]:
#Booking Method
factory.BookMethod(dataLoader, TMVA.Types.kBDT,
                   "BDT200", "NTrees=200:BoostType=AdaBoost")

<cppyy.gbl.TMVA.MethodBDT object at 0x60094a588af0>

<HEADER> Factory                  : Booking method: BDT200
                         : 
                         : Rebuilding Dataset dataset
                         : Building event vectors for type 0 Signal
                         : Dataset[dataset] :  create input formulas for tree sig
                         : Building event vectors for type 1 Signal
                         : Dataset[dataset] :  create input formulas for tree sig
                         : Building event vectors for type 0 Background
                         : Dataset[dataset] :  create input formulas for tree bkg
                         : Building event vectors for type 1 Background
                         : Dataset[dataset] :  create input formulas for tree bkg
<HEADER> DataSetFactory           : [dataset] : Number of events in input trees
                         : 
                         : 
                         : Dataset[dataset] : Weight renormalisation mode: "EqualNumEvents": renormalises all event

In [11]:
#Train, Test and Evaluate all methods
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()

<HEADER> Factory                  : Train all methods
<HEADER> Factory                  : [dataset] : Create Transformation "I" with events from all classes.
                         : 
<HEADER>                          : Transformation, Variable selection : 
                         : Input : variable 'ADC_mean' <---> Output : variable 'ADC_mean'
                         : Input : variable 'nhits_min' <---> Output : variable 'nhits_min'
                         : Input : variable 'entry_dist' <---> Output : variable 'entry_dist'
                         : Input : variable 'exit_dist' <---> Output : variable 'exit_dist'
                         : Input : variable 'docasqrx_max' <---> Output : variable 'docasqrx_max'
                         : Input : variable 'docasqry_max' <---> Output : variable 'docasqry_max'
<HEADER> TFHandler_Factory        :     Variable            Mean            RMS    [        Min            Max ]
                         : ------------------------------------

0%, time left: unknown
7%, time left: 21 mins
13%, time left: 17 mins
19%, time left: 17 mins
25%, time left: 16 mins
32%, time left: 14 mins
38%, time left: 13 mins
44%, time left: 11 mins
50%, time left: 11 mins
57%, time left: 10 mins
63%, time left: 8 mins
69%, time left: 6 mins
75%, time left: 5 mins
82%, time left: 243 sec
88%, time left: 165 sec
94%, time left: 79 sec
0%, time left: unknown
6%, time left: 6 mins
12%, time left: 5 mins
18%, time left: 240 sec
25%, time left: 183 sec
31%, time left: 148 sec
37%, time left: 123 sec
43%, time left: 102 sec
50%, time left: 83 sec
56%, time left: 69 sec
62%, time left: 58 sec
68%, time left: 47 sec
75%, time left: 35 sec
81%, time left: 25 sec
87%, time left: 16 sec
93%, time left: 8 sec
0%, time left: unknown
6%, time left: 5 mins
12%, time left: 207 sec
18%, time left: 152 sec
25%, time left: 106 sec
31%, time left: 82 sec
37%, time left: 65 sec
43%, time left: 52 sec
50%, time left: 41 sec
56%, time left: 33 sec
62%, time left: 26 

In [12]:
#Save output and finish up
outputFile.Close()
print("wrote root file TMVA.root")
print("TMVA analysis is done")

wrote root file TMVA.root
TMVA analysis is done
