In [3]:
import ROOT
from get_dataset import get_dataset

In [4]:
# create the 'moons' dataset with 10k points
# saves signal and background in ROOT file 'moons.root' with trees 'signal' and 'background'
get_dataset('moons', 10000)

Info in <TCanvas::Print>: png file moons.png has been created


In [5]:
input_file = ROOT.TFile('moons.root', 'READ')  # read in the data file
signal_tree = input_file.Get('signal')  # identify the signal tree
background_tree = input_file.Get('background')  # identify the background tree
output_file = ROOT.TFile('training-output.root', "RECREATE")  # create an output file for the training results
factory = ROOT.TMVA.Factory('TMVAClassification', output_file)  # setup a TMVA factory

In [6]:
dataloader = ROOT.TMVA.DataLoader('dataset')  # name our dataset 'dataset' because I'm uncreative
# add floating point x and y variables
dataloader.AddVariable('x', 'F')
dataloader.AddVariable('y', 'F')

In [7]:
# add signal and background trees
dataloader.AddSignalTree(signal_tree)
dataloader.AddBackgroundTree(background_tree)

<HEADER> DataSetInfo              : [dataset] : Added class "Signal"
                         : Add Tree signal of type Signal with 5000 events
<HEADER> DataSetInfo              : [dataset] : Added class "Background"
                         : Add Tree background of type Background with 5000 events


In [8]:
# set up the training and testing datapoints
cut = ROOT.TCut('')
dataloader.PrepareTrainingAndTestTree(cut, 'SplitMode=Random:NormMode=NumEvents')

                         : Dataset[dataset] : Class index : 0  name : Signal
                         : Dataset[dataset] : Class index : 1  name : Background


In [9]:
# book the BDT as a method to use
# apply training options here
factory.BookMethod(dataloader,
                   ROOT.TMVA.Types.kBDT,
                   'BDT',
                   'nTrees=100:maxDepth=4:BoostType=AdaBoost')

<cppyy.gbl.TMVA.MethodBDT object at 0x7feeeb7a3c00>

<HEADER> Factory                  : Booking method: BDT
                         : 
                         : Rebuilding Dataset dataset
                         : Building event vectors for type 2 Signal
                         : Dataset[dataset] :  create input formulas for tree signal
                         : Building event vectors for type 2 Background
                         : Dataset[dataset] :  create input formulas for tree background
<HEADER> DataSetFactory           : [dataset] : Number of events in input trees
                         : 
                         : 
                         : Number of training and testing events
                         : ---------------------------------------------------------------------------
                         : Signal     -- training events            : 2500
                         : Signal     -- testing events             : 2500
                         : Signal     -- training and testing events: 5000
                   

In [10]:
# let TMVA do its magic
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()
output_file.Close()

<HEADER> Factory                  : Train all methods
<HEADER> Factory                  : [dataset] : Create Transformation "I" with events from all classes.
                         : 
<HEADER>                          : Transformation, Variable selection : 
                         : Input : variable 'x' <---> Output : variable 'x'
                         : Input : variable 'y' <---> Output : variable 'y'
<HEADER> TFHandler_Factory        : Variable        Mean        RMS   [        Min        Max ]
                         : -----------------------------------------------------------
                         :        x:    0.48582    0.89197   [    -1.5168     2.4488 ]
                         :        y:    0.49447    0.35098   [   -0.32595     1.2310 ]
                         : -----------------------------------------------------------
                         : Ranking input variables (method unspecific)...
<HEADER> IdTransformation         : Ranking result (top variable is be

1%, time left: unknown
8%, time left: 0 sec
14%, time left: 0 sec
20%, time left: 0 sec
26%, time left: 0 sec
33%, time left: 0 sec
39%, time left: 0 sec
45%, time left: 0 sec
51%, time left: 0 sec
58%, time left: 0 sec
64%, time left: 0 sec
70%, time left: 0 sec
76%, time left: 0 sec
83%, time left: 0 sec
89%, time left: 0 sec
95%, time left: 0 sec
0%, time left: unknown
7%, time left: 0 sec
13%, time left: 0 sec
19%, time left: 0 sec
25%, time left: 0 sec
32%, time left: 0 sec
38%, time left: 0 sec
44%, time left: 0 sec
50%, time left: 0 sec
57%, time left: 0 sec
63%, time left: 0 sec
69%, time left: 0 sec
75%, time left: 0 sec
82%, time left: 0 sec
88%, time left: 0 sec
94%, time left: 0 sec
0%, time left: unknown
7%, time left: 0 sec
13%, time left: 0 sec
19%, time left: 0 sec
25%, time left: 0 sec
32%, time left: 0 sec
38%, time left: 0 sec
44%, time left: 0 sec
50%, time left: 0 sec
57%, time left: 0 sec
63%, time left: 0 sec
69%, time left: 0 sec
75%, time left: 0 sec
82%, time 