In [1]:
import ROOT

# in order to start TMVA
ROOT.TMVA.Tools.Instance()

<cppyy.gbl.TMVA.Tools object at 0x3f1a5cd0>

In [2]:
# open file
path = '/media/leoperes/Acer/Users/le0_p/ntuples_grid_newVertex/00a_newVertex/'
stree = ROOT.TFile.Open(path+'atm_hA_BR.root')

# get trees 
tree_s = stree.Get("ana/Atm")
tree_b = stree.Get("ana/Atm")

fout = ROOT.TFile("BDT_Atm.root","RECREATE")

In [3]:

# define factory with options
factory = ROOT.TMVA.Factory("TMVAClassification", fout, #TMVA::Factory( "<JobName>", outputFile, "<options>" );
                            ":".join([    "!V", # Verbose flag, default=False
                                          "!Silent", # Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)
                                          "Color", # Flag for coloured screen output (default: True, if in batch mode: False)
                                          "DrawProgressBar", # Draw progress bar to display training, testing and evaluation schedule (default: True)
                                          "Transformations=I;D;P;U;G,D", # List of transformations to test; formatting example: Transformations=I;D;P;U;G,D, for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations
                                          "AnalysisType=Classification"] # Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)
                                     ))

In [None]:
dataloader = ROOT.TMVA.DataLoader("dataset")

dataloader.AddSpectator("event", "I")
# add discriminating variables for training
dataloader.AddVariable("LongestTrack","F") # Longest Track in the event [cm]
#dataloader.AddVariable("log(HighestTrackSummedADC)","F") # Highest Track ADC deposition in the events [ADC Units]
dataloader.AddVariable("PIDALongestTrack", "F") # PIDA of the longest track
dataloader.AddVariable("nTracks", "I") # number of tracks in the event
dataloader.AddVariable("nShowers", "I") # number of showers in the event
#dataloader.AddVariable("TotalMomentumP", "F") # total momentum magnitude of the event with only tracks info
dataloader.AddVariable("NHits", "I") # number of 3d space points reco
#dataloader.AddVariable("NPrimaryDaughters", "I") # nShowers+nTracks in the event (irrelevant, it can be take off)
dataloader.AddVariable("log(HighestShowerSummedADC)", "F") # Highest Shower ADC deposition in the events [ADC Units]
#dataloader.AddVariable("PrimaryPDGReco", "I") # Pandora returns for nue or numu
dataloader.AddVariable("LargeShowerOpenAngle", "F") # Large shower open Angle [rad]
dataloader.AddVariable("LongestShower", "F") # Longest Shower in the event [cm]
#dataloader.AddVariable("CosThetaDetTotalMom", "F") # Total momentum Cossine Theta detector coordinates.
#dataloader.AddVariable("CosPhiDetTotalMom", "F") # Total momentum Cossine Phi detector coordinates.
dataloader.AddVariable("FracTotalChargeLongTrack", "F") # How much of the total charge of the event the longest track carries
dataloader.AddVariable("AvarageTrackLength", "F") # Avarage track length in the event [cm]
dataloader.AddVariable("CVN_NCScore", "F") # CVN score for NC
dataloader.AddVariable("EventRecoEnergy_Charge", "F") # Event Energy given the charge deposition in the event.

In [10]:

# define signal and background trees
dataloader.AddSignalTree(tree_s)
dataloader.AddBackgroundTree(tree_b)


DataSetInfo              : [dataset] : Added class "Signal"
                         : Add Tree Atm of type Signal with 3019964 events
DataSetInfo              : [dataset] : Added class "Background"
                         : Add Tree Atm of type Background with 3019964 events


In [11]:
# define additional cuts 
sigCut = ROOT.TCut("CCNC == 1 && event < 2300000")
bgCut = ROOT.TCut("CCNC == 0 && event < 2300000")

In [12]:
# set options for trainings
dataloader.PrepareTrainingAndTestTree(sigCut, 
                                   bgCut, 
                                   ":".join(["nTrain_Signal=638496",
                                             "nTrain_Background=1967354",
                                             "SplitMode=Random",
                                             "NormMode=None",
                                             "!V"
                                             ]))

In [13]:
method = factory.BookMethod(dataloader, ROOT.TMVA.Types.kBDT, "BDTAtmCCNC_NoNorm",
                            ":".join([ "!H",
                                       "!V",
                                       "NTrees=800",
                                       "MinNodeSize=5%",
                                       "MaxDepth=3",
                                       "BoostType=AdaBoost",
                                       "AdaBoostBeta=0.5",
                                       "SeparationType=GiniIndex",
                                       "nCuts=20",
                                       "PruneMethod=NoPruning",
                                       "DoBoostMonitor=True"
                                       ]))

Factory                  : Booking method: [1mBDTAtmCCNC_NoNorm[0m
                         : 
                         : Rebuilding Dataset dataset
                         : Building event vectors for type 2 Signal
                         : Dataset[dataset] :  create input formulas for tree Atm
                         : Building event vectors for type 2 Background
                         : Dataset[dataset] :  create input formulas for tree Atm
DataSetFactory           : [dataset] : Number of events in input trees
                         : Dataset[dataset] :     Signal     requirement: "CCNC == 1 && event < 2300000"
                         : Dataset[dataset] :     Signal          -- number of events passed: 685605  / sum of weights: 685605
                         : Dataset[dataset] :     Signal          -- efficiency             : 0.227024
                         : Dataset[dataset] :     Background requirement: "CCNC == 0 && event < 2300000"
                         : Dataset

In [14]:
# self-explaining
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()

Factory                  : [1mTrain all methods[0m
Factory                  : [dataset] : Create Transformation "I" with events from all classes.
                         : 
                         : Transformation, Variable selection : 
                         : Input : variable 'LongestTrack' <---> Output : variable 'LongestTrack'
                         : Input : variable 'log(HighestTrackSummedADC)' <---> Output : variable 'log(HighestTrackSummedADC)'
                         : Input : variable 'PIDALongestTrack' <---> Output : variable 'PIDALongestTrack'
                         : Input : variable 'nTracks' <---> Output : variable 'nTracks'
                         : Input : variable 'nShowers' <---> Output : variable 'nShowers'
                         : Input : variable 'NHits' <---> Output : variable 'NHits'
                         : Input : variable 'log(HighestShowerSummedADC)' <---> Output : variable 'log(HighestShowerSummedADC)'
                         : Input : vari

0%, time left: unknown
6%, time left: 13 mins
12%, time left: 12 mins
18%, time left: 11 mins
25%, time left: 10 mins
31%, time left: 9 mins
37%, time left: 8 mins
43%, time left: 7 mins
50%, time left: 6 mins
56%, time left: 6 mins
62%, time left: 5 mins
68%, time left: 261 sec
75%, time left: 208 sec
81%, time left: 156 sec
87%, time left: 104 sec
93%, time left: 52 sec
0%, time left: unknown
6%, time left: 86 sec
12%, time left: 76 sec
18%, time left: 72 sec
25%, time left: 64 sec
31%, time left: 59 sec
37%, time left: 52 sec
43%, time left: 47 sec
50%, time left: 40 sec
56%, time left: 35 sec
62%, time left: 30 sec
68%, time left: 24 sec
75%, time left: 19 sec
81%, time left: 14 sec
87%, time left: 9 sec
93%, time left: 4 sec
0%, time left: unknown
6%, time left: 4 sec
12%, time left: 3 sec
18%, time left: 3 sec
25%, time left: 3 sec
31%, time left: 3 sec
37%, time left: 2 sec
43%, time left: 2 sec
50%, time left: 2 sec
56%, time left: 1 sec
62%, time left: 1 sec
68%, time left: 1 