In [1]:

from ROOT import TMVA, TFile, TTree, TCut
from subprocess import call
from os.path import isfile
 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import SGD
 

Welcome to JupyROOT 6.24/06


2022-02-08 14:24:21.492720: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /cvmfs/sft.cern.ch/lcg/releases/MCGenerators/thepeg/2.2.1-8d929/x86_64-centos7-gcc8-opt/lib/ThePEG:/cvmfs/sft.cern.ch/lcg/releases/MCGenerators/herwig++/7.2.1-f3599/x86_64-centos7-gcc8-opt/lib/Herwig:/cvmfs/sft.cern.ch/lcg/views/LCG_101swan/x86_64-centos7-gcc8-opt/lib/python3.9/site-packages/torch/lib:/cvmfs/sft.cern.ch/lcg/views/LCG_101swan/x86_64-centos7-gcc8-opt/lib/python3.9/site-packages/tensorflow:/cvmfs/sft.cern.ch/lcg/views/LCG_101swan/x86_64-centos7-gcc8-opt/lib/python3.9/site-packages/tensorflow/contrib/tensor_forest:/cvmfs/sft.cern.ch/lcg/views/LCG_101swan/x86_64-centos7-gcc8-opt/lib/python3.9/site-packages/tensorflow/python/framework:/cvmfs/sft.cern.ch/lcg/releases/java/8u222-884d8/x86_64-centos7-gcc8-opt/jre/lib/amd64:/cvmfs/sft.

In [2]:

# Setup TMVA
TMVA.Tools.Instance()
TMVA.PyMethodBase.PyInitialize()

# Fastest (and smallest) dataset
data = TFile.Open('10k_sample_data.root')
# Medium size dataset
# data = TFile.Open('50k_sample_data.root')
signal = data.Get('Mixing')
background = data.Get('Cosmic')




In [3]:

dataloader = TMVA.DataLoader('dataset')

variable_list = [
   'nhits',
   'residual',
   'r',
   'S0rawPerp',
   'S0axisrawZ',
   'phi_S0axisraw',
   'nCT',
   'nGT',
   'tracksdca',
   'curvemin',
   'curvemean',
   'lambdamin',
   'lambdamean',
   'curvesign',
   # Want to see the final tests fail? Enable these
   #'phi',
   #'X',
   #'Y',
   #'Z',
]
# Assign the variables for training (and testing)
for variable in variable_list:
    dataloader.AddVariable(variable)

In [4]:

dataloader.AddSignalTree(signal, 1.0)
dataloader.AddBackgroundTree(background, 1.0)

testing_events = 4000

dataloader.PrepareTrainingAndTestTree(TCut(''),
                                      'nTrain_Signal=' +
                                      str(signal.GetEntries() - testing_events) + 
                                      ':nTrain_Background=' +
                                      str(background.GetEntries() - testing_events) + 
                                      ':SplitMode=Random:NormMode=NumEvents:!V')
 

DataSetInfo              : [dataset] : Added class "Signal"
                         : Add Tree Mixing of type Signal with 10000 events
DataSetInfo              : [dataset] : Added class "Background"
                         : Add Tree Cosmic of type Background with 10000 events
                         : Dataset[dataset] : Class index : 0  name : Signal
                         : Dataset[dataset] : Class index : 1  name : Background


# Select the classifiers of interest here:

In [5]:
# Maybe pick one or two out of this list if memory is limited
active_mva_list = [
    'Fisher',
    'PyKeras',
    'Cuts',
    'KNN',
    'SVM',
    'BDT',
    'BDTB',
    'BDTG',
]
 

In [6]:
output = TFile.Open('TMVA.root', 'RECREATE')
factory = TMVA.Factory('TMVAClassification', output,
                       '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification')


# Book methods

if 'Fisher' in active_mva_list:
    factory.BookMethod(dataloader, TMVA.Types.kFisher, 'Fisher',
                       '!H:!V:Fisher:VarTransform=D,G')
if 'PyKeras' in active_mva_list:
    # Generate model
    
    # Define model
    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=len(variable_list)))
    model.add(Dense(2, activation='softmax'))
    
    # Set loss and optimizer
    model.compile(loss='categorical_crossentropy',
                  optimizer=SGD(lr=0.01), metrics=['accuracy', ])
    # Store model to file
    model.save('model.h5')
    model.summary()
    factory.BookMethod(dataloader, TMVA.Types.kPyKeras, 'PyKeras',
                       'H:!V:VarTransform=D,G:FilenameModel=model.h5:NumEpochs=20:BatchSize=32')
# Cut optimisation
if 'Cuts' in active_mva_list:
    factory.BookMethod( dataloader, TMVA.Types.kCuts, 'Cuts',
                        '!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart' )
#// K-Nearest Neighbour classifier (KNN)
if 'KNN' in active_mva_list:
    factory.BookMethod( dataloader, TMVA.Types.kKNN, 'KNN',
                       '!H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim' )

# Support Vector Machine
if 'SVM' in active_mva_list:
    factory.BookMethod( dataloader, TMVA.Types.kSVM, 'SVM', 'Gamma=0.25:Tol=0.001:VarTransform=Norm' )

# "BDT"  // Adaptive Boost
if 'BDT' in active_mva_list:
    factory.BookMethod( dataloader, TMVA.Types.kBDT, 'BDT',
                       '!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20:SigToBkgFraction=1.0' )

# "BDTB" // Bagging random forest
if 'BDTB' in active_mva_list:
    factory.BookMethod( dataloader, TMVA.Types.kBDT, 'BDTB',
                      '!H:!V:NTrees=500:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=1.0:SeparationType=GiniIndex:nCuts=50:MaxDepth=4:MinNodeSize=5.0%:UseRandomisedTrees:UseNvars=4:CreateMVAPdfs' )

# "BDTG" // Gradient Boost
if 'BDTG' in active_mva_list:  
    factory.BookMethod( dataloader, TMVA.Types.kBDT, 'BDTG',
                        '!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2:SigToBkgFraction=1.0' )
 



Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                960       
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 130       
Total params: 1,090
Trainable params: 1,090
Non-trainable params: 0
_________________________________________________________________
Factory                  : Booking method: [1mFisher[0m
                         : 
Fisher                   : [dataset] : Create Transformation "D" with events from all classes.
                         : 
                         : Transformation, Variable selection : 
                         : Input : variable 'nhits' <---> Output : variable 'nhits'
                         : Input : variable 'residual' <---> Output : variable 'residual'
                         : Input : variable 'r' <---> Output : varia

2022-02-08 14:24:29.210838: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /cvmfs/sft.cern.ch/lcg/releases/MCGenerators/thepeg/2.2.1-8d929/x86_64-centos7-gcc8-opt/lib/ThePEG:/cvmfs/sft.cern.ch/lcg/releases/MCGenerators/herwig++/7.2.1-f3599/x86_64-centos7-gcc8-opt/lib/Herwig:/cvmfs/sft.cern.ch/lcg/views/LCG_101swan/x86_64-centos7-gcc8-opt/lib/python3.9/site-packages/torch/lib:/cvmfs/sft.cern.ch/lcg/views/LCG_101swan/x86_64-centos7-gcc8-opt/lib/python3.9/site-packages/tensorflow:/cvmfs/sft.cern.ch/lcg/views/LCG_101swan/x86_64-centos7-gcc8-opt/lib/python3.9/site-packages/tensorflow/contrib/tensor_forest:/cvmfs/sft.cern.ch/lcg/views/LCG_101swan/x86_64-centos7-gcc8-opt/lib/python3.9/site-packages/tensorflow/python/framework:/cvmfs/sft.cern.ch/lcg/releases/java/8u222-884d8/x86_64-centos7-gcc8-opt/jre/lib/amd64:/cvmfs/sft.cern.ch/lc

# Train our classifiers! This can take some time

In [7]:

# Run training, test and evaluation
factory.TrainAllMethods()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                960       
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 130       
Total params: 1,090
Trainable params: 1,090
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.31050, saving model to dataset/weights/TrainedModel_PyKeras.h5
Epoch 2/20

Epoch 00002: val_loss improved from 0.31050 to 0.25406, saving model to dataset/weights/TrainedModel_PyKeras.h5
Epoch 3/20

Epoch 00003: val_loss improved from 0.25406 to 0.23543, saving model to dataset/weights/TrainedModel_PyKeras.h5
Epoch 4/20

Epoch 00004: val_loss improved from 0.23543 to 0.22527, saving model to dataset/weights/TrainedModel_PyKeras.h5
Epoch 5/20

Epoch 00005:

0%, time left: unknown
7%, time left: 0 sec
13%, time left: 0 sec
19%, time left: 0 sec
25%, time left: 0 sec
32%, time left: 0 sec
38%, time left: 0 sec
44%, time left: 0 sec
50%, time left: 0 sec
57%, time left: 0 sec
63%, time left: 0 sec
69%, time left: 0 sec
75%, time left: 0 sec
82%, time left: 0 sec
88%, time left: 0 sec
94%, time left: 0 sec
2022-02-08 14:24:55.491681: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2022-02-08 14:24:55.494322: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2399930000 Hz
0%, time left: unknown
7%, time left: 1 sec
13%, time left: 1 sec
19%, time left: 1 sec
25%, time left: 1 sec
32%, time left: 1 sec
38%, time left: 0 sec
44%, time left: 0 sec
50%, time left: 0 sec
57%, time left: 0 sec
63%, time left: 0 sec
69%, time left: 0 sec
75%, time left: 0 sec
82%, time left: 0 sec
88%, time left: 0 sec
94%, time left: 0 sec
0%, time left: unknown
7

In [8]:
factory.TestAllMethods()

Factory                  : [1mTest all methods[0m
Factory                  : Test method: Fisher for Classification performance
                         : 
Fisher                   : [dataset] : Evaluation of Fisher on testing sample (8000 events)
                         : Elapsed time for evaluation of 8000 events: 0.0718 sec       
Factory                  : Test method: PyKeras for Classification performance
                         : 
                         : Setting up tf.keras
                         : Using TensorFlow version 2
                         : Use Keras version from TensorFlow : tf.keras
                         :  Loading Keras Model 
                         : Loaded model from file: dataset/weights/TrainedModel_PyKeras.h5
PyKeras                  : [dataset] : Evaluation of PyKeras on testing sample (8000 events)
                         : Elapsed time for evaluation of 8000 events: 0.318 sec       
Factory                  : Test method: Cuts for Classificat

0%, time left: unknown
7%, time left: 0 sec
13%, time left: 0 sec
19%, time left: 0 sec
25%, time left: 0 sec
32%, time left: 0 sec
38%, time left: 0 sec
44%, time left: 0 sec
50%, time left: 0 sec
57%, time left: 0 sec
63%, time left: 0 sec
69%, time left: 0 sec
75%, time left: 0 sec
82%, time left: 0 sec
88%, time left: 0 sec
94%, time left: 0 sec
0%, time left: unknown
7%, time left: 0 sec
13%, time left: 0 sec
19%, time left: 0 sec
25%, time left: 0 sec
32%, time left: 0 sec
38%, time left: 0 sec
44%, time left: 0 sec
50%, time left: 0 sec
57%, time left: 0 sec
63%, time left: 0 sec
69%, time left: 0 sec
75%, time left: 0 sec
82%, time left: 0 sec
88%, time left: 0 sec
94%, time left: 0 sec
0%, time left: unknown
7%, time left: 2 sec
13%, time left: 2 sec
19%, time left: 1 sec
25%, time left: 1 sec
32%, time left: 1 sec
38%, time left: 1 sec
44%, time left: 1 sec
50%, time left: 1 sec
57%, time left: 0 sec
63%, time left: 0 sec
69%, time left: 0 sec
75%, time left: 0 sec
82%, time 

In [9]:
factory.EvaluateAllMethods()

Factory                  : [1mEvaluate all methods[0m
Factory                  : Evaluate classifier: Fisher
                         : 
TFHandler_Fisher         :      Variable             Mean             RMS     [        Min             Max ]
                         : ------------------------------------------------------------------------------------
                         :         nhits:      -0.067373        0.98861   [        -2.0098         5.7307 ]
                         :      residual:      0.0050044         1.0109   [        -4.3770         3.8136 ]
                         :             r:      0.0048427         1.0168   [        -5.7307         5.7307 ]
                         :     S0rawPerp:      -0.020035         1.0023   [        -5.7307         3.6124 ]
                         :    S0axisrawZ:       0.012440         1.0089   [        -2.9285         5.7307 ]
                         : phi_S0axisraw:     -0.0034774        0.98613   [        -3.6364         3

In [10]:
# The TMVA.root file has lots of nice diagnostic plots inside it now:
output.ls()

# TMVA::Gui() in the root program doesn't have a batch mode yet (so I don't believe it can be used from jupyter)

TFile**		TMVA.root	
 TFile*		TMVA.root	
  TDirectoryFile*		dataset	dataset
   TDirectoryFile*		InputVariables_Deco_Gauss	InputVariables_Deco_Gauss
    TDirectoryFile*		CorrelationPlots	CorrelationPlots
     KEY: TH2F	scat_residual_vs_nhits_Signal_Deco_Gauss;1	residual versus nhits (Signal)_Deco_Gauss
     KEY: TProfile	prof_residual_vs_nhits_Signal_Deco_Gauss;1	profile residual versus nhits (Signal)_Deco_Gauss
     KEY: TH2F	scat_residual_vs_nhits_Background_Deco_Gauss;1	residual versus nhits (Background)_Deco_Gauss
     KEY: TProfile	prof_residual_vs_nhits_Background_Deco_Gauss;1	profile residual versus nhits (Background)_Deco_Gauss
     KEY: TH2F	scat_r_vs_nhits_Signal_Deco_Gauss;1	r versus nhits (Signal)_Deco_Gauss
     KEY: TProfile	prof_r_vs_nhits_Signal_Deco_Gauss;1	profile r versus nhits (Signal)_Deco_Gauss
     KEY: TH2F	scat_r_vs_nhits_Background_Deco_Gauss;1	r versus nhits (Background)_Deco_Gauss
     KEY: TProfile	prof_r_vs_nhits_Background_Deco_Gauss;1	profile r versus nhit

In [11]:
output.Close()


# Test on a real experiment (not labeled training data)

In [12]:
from array import array
from ROOT import TString
experiment_data = TFile.Open('sample_experiment.root')
testsignal = experiment_data.Get('sample_experiment')


reader = TMVA.Reader( "!Color:!Silent" );
#for variable in variable_list:
#    reader.AddVariable(variable)

    
    
branches = {}
for branch in testsignal.GetListOfBranches():
    branchName = branch.GetName()
    branches[branchName] = array('f', [-999])
    reader.AddVariable(branchName, branches[branchName])
    testsignal.SetBranchAddress(branchName, branches[branchName])
 
# Book methods
#reader.BookMVA('PyKeras', TString('dataset/weights/TMVAClassification_PyKeras.weights.xml'))
for method in active_mva_list:
    reader.BookMVA(TString(method), TString('dataset/weights/TMVAClassification_' + method + '.weights.xml'))

# Print some example classifications
print('Some signal example classifications:')
threshold = 0.5
efficiency = 0.5



for method in active_mva_list:
    print(method)
    counts = 0
    mean_model_output = 0.0
    for i in range(testsignal.GetEntries()):
        testsignal.GetEntry(i)
        mean_model_output += reader.EvaluateMVA(TString(method))
        if reader.EvaluateMVA(TString(method)) > threshold:
            counts += 1
        #print(reader.EvaluateMVA(TString(method)))
    print('Mean model output ' + str ( mean_model_output / testsignal.GetEntries() ))
    print( '\tTotal Signal = ' + str(counts) + '/' + str(testsignal.GetEntries()))
    print( '\tEstimated Population = ' + str(counts/efficiency) + '(' + str(testsignal.GetEntries() - 10) + ')')
print('')

TypeError: none of the 2 overloaded methods succeeded. Full details:
  TMVA::IMethod* TMVA::Reader::BookMVA(const TString& methodTag, const TString& weightfile) =>
    runtime_error: FATAL error
  TMVA::IMethod* TMVA::Reader::BookMVA(TMVA::Types::EMVA methodType, const char* xmlstr) =>
    TypeError: could not convert argument 1 (an integer is required)

                         : Booking "Fisher" of type "Fisher" from dataset/weights/TMVAClassification_Fisher.weights.xml.
                         : Reading weight file: dataset/weights/TMVAClassification_Fisher.weights.xml
<FATAL>                          : Dataset[Default] : You declared 18 variables in the Reader while there are 14 variables declared in the file
***> abort program execution
