In [2]:
import ROOT
from ROOT import TMVA

In [3]:
#Creating output file, factory object and opening input files
outputFile = ROOT.TFile("TMVAcheck.root", "RECREATE")
factory = TMVA.Factory("tmvaTest", outputFile, "")
dataLoader = TMVA.DataLoader("dataset")

trainFile = ROOT.TFile("smalltrainData.root")
testFile = ROOT.TFile("smalltestData.root")

In [4]:
#Getting the TTree objects from input files
sigTrain = trainFile.Get("sig")
bkgTrain = trainFile.Get("bkg")
nSigTrain = sigTrain.GetEntries()
nBkgTrain = bkgTrain.GetEntries()

sigTest = testFile.Get("sig")
bkgTest = testFile.Get("bkg")
nSigTest = sigTest.GetEntries()
nBkgTest = bkgTest.GetEntries()

In [5]:
#Print num events
print(f"{nSigTrain}, {nBkgTrain}")
print(f"{nSigTest}, {nBkgTest}")

#GlobalEventWeights
#Thus for equivalent weights
sigWeight = 1.0
bkgWeight = float(nSigTrain)/float(nBkgTrain)
print(f"{sigWeight}, {bkgWeight}")

104918, 1565055
105370, 1563066
1.0, 0.0670378996265307


In [6]:
dataLoader.AddSignalTree(sigTrain, sigWeight, TMVA.Types.kTraining)
dataLoader.AddBackgroundTree(bkgTrain, bkgWeight, TMVA.Types.kTraining)
dataLoader.AddSignalTree(sigTest, sigWeight, TMVA.Types.kTesting)
dataLoader.AddBackgroundTree(bkgTest, bkgWeight, TMVA.Types.kTesting)

<HEADER> DataSetInfo              : [dataset] : Added class "Signal"
                         : Add Tree sig of type Signal with 104918 events
<HEADER> DataSetInfo              : [dataset] : Added class "Background"
                         : Add Tree bkg of type Background with 1565055 events
                         : Add Tree sig of type Signal with 105370 events
                         : Add Tree bkg of type Background with 1563066 events


In [7]:
#Define the input variables that shall be used for the MVA training
#(the variables used in the expression must exist in the original TTree).
dataLoader.AddVariable("ADC_mean", 'F')
dataLoader.AddVariable("nhits_min", 'F')
dataLoader.AddVariable("entry_dist", 'F')
dataLoader.AddVariable("exit_dist", 'F')
dataLoader.AddVariable("docasqrx_max", 'F')
dataLoader.AddVariable("docasqry_max", 'F')

In [10]:
factory.BookMethod(dataLoader, TMVA.Types.kBDT, "BDTcheck","NTrees=800:BoostType=Grad:MaxDepth=3:MinNodeSize=1.0:Shrinkage=0.05:nCuts=200:UseBaggedBoost=True:BaggedSampleFraction=0.6")


TypeError: none of the 3 overloaded methods succeeded. Full details:
  TMVA::MethodBase* TMVA::Factory::BookMethod(TMVA::DataLoader* loader, TString theMethodName, TString methodTitle, TString theOption = "") =>
    TypeError: could not convert argument 2
  TMVA::MethodBase* TMVA::Factory::BookMethod(TMVA::DataLoader* loader, TMVA::Types::EMVA theMethod, TString methodTitle, TString theOption = "") =>
    runtime_error: FATAL error
  TMVA::MethodBase* TMVA::Factory::BookMethod(TMVA::DataLoader*, TMVA::Types::EMVA, TString, TString, TMVA::Types::EMVA, TString) =>
    TypeError: takes at least 6 arguments (4 given)

<FATAL>                          : Booking failed since method with title <BDTcheck> already exists in with DataSet Name <dataset>  
***> abort program execution
<FATAL>                          : Booking failed since method with title <BDTcheck> already exists in with DataSet Name <dataset>  Booking failed since method with title <BDTcheck> already exists in with DataSet Name <dataset>  
***> abort program execution


In [11]:
#Train, Test and Evaluate all methods
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()

<HEADER> Factory                  : Booking failed since method with title <BDTcheck> already exists in with DataSet Name <dataset>  Booking failed since method with title <BDTcheck> already exists in with DataSet Name <dataset>  Train all methods
<HEADER> Factory                  : [dataset] : Create Transformation "I" with events from all classes.
                         : 
<HEADER>                          : Transformation, Variable selection : 
                         : Input : variable 'ADC_mean' <---> Output : variable 'ADC_mean'
                         : Input : variable 'nhits_min' <---> Output : variable 'nhits_min'
                         : Input : variable 'entry_dist' <---> Output : variable 'entry_dist'
                         : Input : variable 'exit_dist' <---> Output : variable 'exit_dist'
                         : Input : variable 'docasqrx_max' <---> Output : variable 'docasqrx_max'
                         : Input : variable 'docasqry_max' <---> Output : variab

0%, time left: unknown
6%, time left: 12 mins
12%, time left: 12 mins
18%, time left: 11 mins
25%, time left: 10 mins
31%, time left: 9 mins
37%, time left: 8 mins
43%, time left: 7 mins
50%, time left: 6 mins
56%, time left: 5 mins
62%, time left: 5 mins
68%, time left: 260 sec
75%, time left: 206 sec
81%, time left: 153 sec
87%, time left: 102 sec
93%, time left: 50 sec
0%, time left: unknown
6%, time left: 46 sec
12%, time left: 42 sec
18%, time left: 38 sec
25%, time left: 33 sec
31%, time left: 29 sec
37%, time left: 26 sec
43%, time left: 23 sec
50%, time left: 20 sec
56%, time left: 17 sec
62%, time left: 15 sec
68%, time left: 13 sec
75%, time left: 10 sec
81%, time left: 7 sec
87%, time left: 5 sec
93%, time left: 2 sec
0%, time left: unknown
6%, time left: 39 sec
12%, time left: 33 sec
18%, time left: 29 sec
25%, time left: 25 sec
31%, time left: 23 sec
37%, time left: 20 sec
43%, time left: 18 sec
50%, time left: 16 sec
56%, time left: 14 sec
62%, time left: 12 sec
68%, time

In [12]:
#Save output and finish up
outputFile.Close()
print("wrote root file TMVA.root")
print("TMVA analysis is done")

wrote root file TMVA.root
TMVA analysis is done
