<div>
    <div style="float:left;">
        <img src="https://root.cern.ch/root/htmldoc/guides/users-guide/pictures/rootlogo.png" width="400px" />
    </div>
    <div style="float:left;">
        <img src="https://rawgit.com/qati/GSOC16/master/img/tmva-logo.svg" width="500px"/>
    </div>
</div>

<hr style="border-top-width: 4px; border-top-color: #34609b;">

<!--<script src="JsRoot/scripts/JSRootCore.js?jq2d&onload=JsRootLoadedCall" type="text/javascript"></script>-->

In [1]:
import ROOT
from ROOT import TFile, TMVA, TCut

Welcome to JupyROOT 6.09/01


## Enable JS visualization

In [2]:
%jsmva on

# Dataset infos

In [3]:
infname     = "tmva_class_example.root"
dataset     = "tmva_class_example"
treeNameSig = "TreeS"
treeNameBkg = "TreeB"
outfname    = "TMVA.root"

## Declare Factory and DataLoader

In [4]:
outputFile = TFile( outfname, 'RECREATE' )

TMVA.Tools.Instance();

In [5]:
factory = TMVA.Factory(JobName="TMVAClassification", TargetFile=outputFile,
            V=False, Color=False, DrawProgressBar=True, Transformations=["I", "D", "P", "G","D"],
                       AnalysisType="Classification")

In [6]:
loader = TMVA.DataLoader(dataset)

## Adding variables to DataLoader

In [7]:
loader.AddVariable( "myvar1 := var1+var2", 'F' )
loader.AddVariable( "myvar2 := var1-var2", "Expression 2", 'F' )
loader.AddVariable( "var3",                "Variable 3", 'F' )
loader.AddVariable( "var4",                "Variable 4", 'F' )

loader.AddSpectator( "spec1:=var1*2",  "Spectator 1",  'F' )
loader.AddSpectator( "spec2:=var1*3",  "Spectator 2",  'F' )

## If the dataset is not available on local computer we download from cern server

In [8]:
if ROOT.gSystem.AccessPathName( "./"+infname ) != 0: 
    ROOT.gSystem.Exec( "wget https://root.cern.ch/files/" + infname)

## Setting up dataset from Trees

In [9]:
input = TFile.Open( infname )

# Get the signal and background trees for training
signal      = input.Get( treeNameSig )
background  = input.Get( treeNameBkg )
    
# Global event weights (see below for setting event-wise weights)
signalWeight     = 1.0
backgroundWeight = 1.0

signalWeight     = 1.0
backgroundWeight = 1.0

mycuts = TCut("")
mycutb = TCut("")

loader.AddSignalTree(signal, signalWeight)
loader.AddBackgroundTree(background, backgroundWeight)
loader.fSignalWeight = signalWeight
loader.fBackgroundWeight = backgroundWeight
loader.fTreeS = signal
loader.fTreeB = background

loader.PrepareTrainingAndTestTree(SigCut=mycuts, BkgCut=mycutb,
            nTrain_Signal=1000, nTrain_Background=1000, nTest_Signal=2000, nTest_Background=2000,
                                  SplitMode="Random", NormMode="NumEvents", V=False);

0,1,2,3
DataSetInfo,"Dataset: tmva_class_exampleAdded class ""Signal""",,
DataSetInfo,Dataset: tmva_class_example,"Added class ""Signal""",
Add Tree TreeS of type Signal with 6000 events,,,
DataSetInfo,"Dataset: tmva_class_exampleAdded class ""Background""",,
DataSetInfo,Dataset: tmva_class_example,"Added class ""Background""",
Add Tree TreeB of type Background with 6000 events,,,

0,1,2
Dataset: tmva_class_example,"Added class ""Signal""",

0,1,2
Dataset: tmva_class_example,"Added class ""Background""",


## Visualizing input variables

In [10]:
loader.DrawInputVariable("myvar1")

0,1,2,3
DataSetFactory,Dataset: tmva_class_exampleNumber of events in input trees,,
DataSetFactory,Dataset: tmva_class_example,Number of events in input trees,
Number of training and testing eventsSignaltraining events1000testing events2000training and testing events3000Backgroundtraining events1000testing events2000training and testing events3000,,,
Number of training and testing events,Number of training and testing events,Number of training and testing events,
Signal,training events,1000,
Signal,testing events,2000,
Signal,training and testing events,3000,
Background,training events,1000,
Background,testing events,2000,
Background,training and testing events,3000,

0,1,2
Dataset: tmva_class_example,Number of events in input trees,

0,1,2,3
Number of training and testing events,Number of training and testing events,Number of training and testing events,
Signal,training events,1000,
Signal,testing events,2000,
Signal,training and testing events,3000,
Background,training events,1000,
Background,testing events,2000,
Background,training and testing events,3000,

0,1
Dataset: tmva_class_example,
Dataset: tmva_class_example,


### We can also visualize transformations on input variables

In [11]:
loader.DrawInputVariable("myvar1", processTrfs=["D", "N"]) #I;N;D;P;U;G,D

0,1,2,3,4,5
DataLoader,"Dataset: tmva_class_exampleCreate Transformation ""D"" with events from all classes.",,,,
Dataset: tmva_class_example,"Create Transformation ""D"" with events from all classes.",,,,
Dataset: tmva_class_example,,"Transformation, Variable selection :",,,
Input : variable 'myvar1' <---> Output : variable 'myvar1',,,,,
Input : variable 'myvar2' <---> Output : variable 'myvar2',,,,,
Input : variable 'var3' <---> Output : variable 'var3',,,,,
Input : variable 'var4' <---> Output : variable 'var4',,,,,
DataLoader,"Dataset: tmva_class_exampleCreate Transformation ""N"" with events from all classes.",,,,
Dataset: tmva_class_example,"Create Transformation ""N"" with events from all classes.",,,,
Dataset: tmva_class_example,,"Transformation, Variable selection :",,,

0,1,2
Dataset: tmva_class_example,"Create Transformation ""D"" with events from all classes.",
Dataset: tmva_class_example,,

0,1,2
Dataset: tmva_class_example,"Create Transformation ""N"" with events from all classes.",
Dataset: tmva_class_example,,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,-0.088539,1.0000,-2.9291,3.2844,
myvar2,-0.073882,1.0000,-3.5849,3.1248,
var3,-0.098159,1.0000,-3.2055,3.9118,
var4,0.30901,1.0000,-3.3243,3.0764,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,-0.085681,0.32188,-1.0000,1.0000,
myvar2,0.046556,0.29808,-1.0000,1.0000,
var3,-0.12682,0.28100,-1.0000,1.0000,
var4,0.13529,0.31247,-1.0000,1.0000,

0,1,2
Dataset: tmva_class_example,"Create Transformation ""D"" with events from all classes.",
Dataset: tmva_class_example,,

0,1,2
Dataset: tmva_class_example,"Create Transformation ""N"" with events from all classes.",
Dataset: tmva_class_example,,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,-0.088539,1.0000,-2.9291,3.2844,
myvar2,-0.073882,1.0000,-3.5849,3.1248,
var3,-0.098159,1.0000,-3.2055,3.9118,
var4,0.30901,1.0000,-3.3243,3.0764,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,-0.085681,0.32188,-1.0000,1.0000,
myvar2,0.046556,0.29808,-1.0000,1.0000,
var3,-0.12682,0.28100,-1.0000,1.0000,
var4,0.13529,0.31247,-1.0000,1.0000,


## Correlation matrix of input variables

In [12]:
loader.DrawCorrelationMatrix("Signal")

In [13]:
factory.BookDNN(loader)

## Booking methods

In [14]:
factory.BookMethod( DataLoader=loader, Method=TMVA.Types.kSVM, MethodTitle="SVM", 
                Gamma=0.25, Tol=0.001, VarTransform="Norm" )

factory.BookMethod( loader,TMVA.Types.kMLP, "MLP", 
        H=False, V=False, NeuronType="tanh", VarTransform="N", NCycles=600, HiddenLayers="N+5",
                   TestRate=5, UseRegulator=False )

factory.BookMethod( loader,TMVA.Types.kLD, "LD", 
        H=False, V=False, VarTransform="None", CreateMVAPdfs=True, PDFInterpolMVAPdf="Spline2",
                   NbinsMVAPdf=50, NsmoothMVAPdf=10 )

factory.BookMethod( loader,TMVA.Types.kLikelihood,"Likelihood","NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10",
    NSmooth=1, NAvEvtPerBin=50, H=True, V=False,TransformOutput=True,PDFInterpol="Spline2")

factory.BookMethod( loader, TMVA.Types.kBDT, "BDT",
    H=False, V=False, NTrees=850, MinNodeSize="2.5%", MaxDepth=3, BoostType="AdaBoost", AdaBoostBeta=0.5,
                   UseBaggedBoost=True, BaggedSampleFraction=0.5, SeparationType="GiniIndex", nCuts=20 )

<ROOT.TMVA::MethodBDT object ("BDT") at 0x7176530>

0,1,2
Factory,Booking method: SVM,
Factory,,
SVM,"Dataset: tmva_class_exampleCreate Transformation ""Norm"" with events from all classes.",
Dataset: tmva_class_example,"Create Transformation ""Norm"" with events from all classes.",
Dataset: tmva_class_example,,"Transformation, Variable selection :"
Input : variable 'myvar1' <---> Output : variable 'myvar1',,
Input : variable 'myvar2' <---> Output : variable 'myvar2',,
Input : variable 'var3' <---> Output : variable 'var3',,
Input : variable 'var4' <---> Output : variable 'var4',,
Factory,Booking method: MLP,

0,1,2
Dataset: tmva_class_example,"Create Transformation ""Norm"" with events from all classes.",
Dataset: tmva_class_example,,

0,1,2
Dataset: tmva_class_example,"Create Transformation ""N"" with events from all classes.",
Dataset: tmva_class_example,,


# Train Methods

In [15]:
factory.TrainAllMethods()

0,1,2,3,4,5,6
TFHandler_DNN,VariableMeanRMSMinMaxmyvar10.0330140.38087-1.00001.0000myvar20.0777380.30037-1.00001.0000var30.113290.37003-1.00001.0000var40.131010.42584-1.00001.0000,,,,,
Variable,Mean,RMS,Min,Max,,
myvar1,0.033014,0.38087,-1.0000,1.0000,,
myvar2,0.077738,0.30037,-1.0000,1.0000,,
var3,0.11329,0.37003,-1.0000,1.0000,,
var4,0.13101,0.42584,-1.0000,1.0000,,
TFHandler_DNN,VariableMeanRMSMinMaxmyvar10.0330140.38087-1.00001.0000myvar20.0777380.30037-1.00001.0000var30.113290.37003-1.00001.0000var40.131010.42584-1.00001.0000,,,,,
Variable,Mean,RMS,Min,Max,,
myvar1,0.033014,0.38087,-1.0000,1.0000,,
myvar2,0.077738,0.30037,-1.0000,1.0000,,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,0.033014,0.38087,-1.0000,1.0000,
myvar2,0.077738,0.30037,-1.0000,1.0000,
var3,0.11329,0.37003,-1.0000,1.0000,
var4,0.13101,0.42584,-1.0000,1.0000,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,0.033014,0.38087,-1.0000,1.0000,
myvar2,0.077738,0.30037,-1.0000,1.0000,
var3,0.11329,0.37003,-1.0000,1.0000,
var4,0.13101,0.42584,-1.0000,1.0000,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,0.019343,0.38955,-1.1686,1.0267,
myvar2,0.096013,0.29861,-0.88775,1.1677,
var3,0.097554,0.37953,-1.0397,1.1012,
var4,0.11846,0.43598,-1.0872,1.1028,

0,1,2
Dataset: tmva_class_example,Evaluation of DNN on training sample (2000 events),

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,0.033014,0.38087,-1.0000,1.0000,
myvar2,0.077738,0.30037,-1.0000,1.0000,
var3,0.11329,0.37003,-1.0000,1.0000,
var4,0.13101,0.42584,-1.0000,1.0000,

0,1,2
Dataset: tmva_class_example,Evaluation of SVM on training sample (2000 events),

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,0.033014,0.38087,-1.0000,1.0000,
myvar2,0.077738,0.30037,-1.0000,1.0000,
var3,0.11329,0.37003,-1.0000,1.0000,
var4,0.13101,0.42584,-1.0000,1.0000,

0,1,2
Dataset: tmva_class_example,Evaluation of MLP on training sample (2000 events),

0,1,2
Dataset: tmva_class_example,Evaluation of LD on training sample (2000 events),

0,1,2
Dataset: tmva_class_example,Separation from histogram (PDF): 0.469 (0.000),
Dataset: tmva_class_example,Evaluation of LD on training sample,

0,1
Dataset: Likelihood,
Dataset: Likelihood,

0,1,2
Dataset: tmva_class_example,Evaluation of Likelihood on training sample (2000 events),

0,1,2
Dataset: tmva_class_example,Evaluation of BDT on training sample (2000 events),


## Testing the methods

In [16]:
factory.TestAllMethods()

0,1,2,3
Factory,Test method: DNN for Classification performance,,
Factory,,,
DNN,Dataset: tmva_class_exampleEvaluation of DNN on testing sample (4000 events),,
DNN,Dataset: tmva_class_example,Evaluation of DNN on testing sample (4000 events),
Elapsed time for evaluation of 4000 events : 0.14 sec,,,
Factory,Test method: SVM for Classification performance,,
Factory,,,
SVM,Dataset: tmva_class_exampleEvaluation of SVM on testing sample (4000 events),,
SVM,Dataset: tmva_class_example,Evaluation of SVM on testing sample (4000 events),
Elapsed time for evaluation of 4000 events : 0.253 sec,,,

0,1,2
Dataset: tmva_class_example,Evaluation of DNN on testing sample (4000 events),

0,1,2
Dataset: tmva_class_example,Evaluation of SVM on testing sample (4000 events),

0,1,2
Dataset: tmva_class_example,Evaluation of MLP on testing sample (4000 events),

0,1,2
Dataset: tmva_class_example,Evaluation of LD on testing sample (4000 events),

0,1,2
Dataset: tmva_class_example,Evaluation of LD on testing sample,

0,1,2
Dataset: tmva_class_example,Evaluation of Likelihood on testing sample (4000 events),

0,1,2
Dataset: tmva_class_example,Evaluation of BDT on testing sample (4000 events),


## Evaluate the methods

In [17]:
factory.EvaluateAllMethods()

0,1,2,3,4,5,6
Factory,Evaluate classifier: DNN,,,,,
Factory,,,,,,
DNN,Dataset: tmva_class_exampleLoop over test events and fill histograms with classifier response...,,,,,
Dataset: tmva_class_example,Loop over test events and fill histograms with classifier response...,,,,,
Dataset: tmva_class_example,TFHandler_DNN,VariableMeanRMSMinMaxmyvar10.0193430.38955-1.16861.0267myvar20.0960130.29861-0.887751.1677var30.0975540.37953-1.03971.1012var40.118460.43598-1.08721.1028,,,,
Variable,Mean,RMS,Min,Max,,
myvar1,0.019343,0.38955,-1.1686,1.0267,,
myvar2,0.096013,0.29861,-0.88775,1.1677,,
var3,0.097554,0.37953,-1.0397,1.1012,,
var4,0.11846,0.43598,-1.0872,1.1028,,

0,1,2
Dataset: tmva_class_example,Loop over test events and fill histograms with classifier response...,
Dataset: tmva_class_example,,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,0.019343,0.38955,-1.1686,1.0267,
myvar2,0.096013,0.29861,-0.88775,1.1677,
var3,0.097554,0.37953,-1.0397,1.1012,
var4,0.11846,0.43598,-1.0872,1.1028,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,0.019343,0.38955,-1.1686,1.0267,
myvar2,0.096013,0.29861,-0.88775,1.1677,
var3,0.097554,0.37953,-1.0397,1.1012,
var4,0.11846,0.43598,-1.0872,1.1028,

0,1,2
Dataset: tmva_class_example,Loop over test events and fill histograms with classifier response...,
Dataset: tmva_class_example,,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,0.019343,0.38955,-1.1686,1.0267,
myvar2,0.096013,0.29861,-0.88775,1.1677,
var3,0.097554,0.37953,-1.0397,1.1012,
var4,0.11846,0.43598,-1.0872,1.1028,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,0.019343,0.38955,-1.1686,1.0267,
myvar2,0.096013,0.29861,-0.88775,1.1677,
var3,0.097554,0.37953,-1.0397,1.1012,
var4,0.11846,0.43598,-1.0872,1.1028,

0,1,2
Dataset: tmva_class_example,Loop over test events and fill histograms with classifier response...,
Dataset: tmva_class_example,,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,0.019343,0.38955,-1.1686,1.0267,
myvar2,0.096013,0.29861,-0.88775,1.1677,
var3,0.097554,0.37953,-1.0397,1.1012,
var4,0.11846,0.43598,-1.0872,1.1028,

0,1,2
Dataset: tmva_class_example,Loop over test events and fill histograms with classifier response...,
Dataset: tmva_class_example,,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,0.00033763,3.0558,-9.3187,7.9024,
myvar2,0.017344,1.1178,-3.6652,4.0291,
var3,-0.012729,1.7422,-5.2331,4.5943,
var4,0.14935,2.1621,-5.8296,5.0307,

0,1,2
Dataset: tmva_class_example,Loop over test events and fill histograms with classifier response...,
Dataset: tmva_class_example,,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,0.00033763,3.0558,-9.3187,7.9024,
myvar2,0.017344,1.1178,-3.6652,4.0291,
var3,-0.012729,1.7422,-5.2331,4.5943,
var4,0.14935,2.1621,-5.8296,5.0307,

0,1,2
Dataset: tmva_class_example,Loop over test events and fill histograms with classifier response...,
Dataset: tmva_class_example,,

0,1,2,3,4,5
Variable,Mean,RMS,Min,Max,
myvar1,0.00033763,3.0558,-9.3187,7.9024,
myvar2,0.017344,1.1178,-3.6652,4.0291,
var3,-0.012729,1.7422,-5.2331,4.5943,
var4,0.14935,2.1621,-5.8296,5.0307,


## Classifier Output Distributions

In [18]:
factory.DrawOutputDistribution(dataset, "MLP")

## Classifier Probability Distributions

In [19]:
factory.DrawProbabilityDistribution(dataset, "LD")

## ROC curve

In [20]:
factory.DrawROCCurve(dataset)

## Classifier Cut Efficiencies

In [21]:
factory.DrawCutEfficiencies(dataset, "MLP")

## Draw Neural Network

* Mouseover (node, weight): focusing
* Zooming and grab and move supported
* Reset: double click

In [22]:
factory.DrawNeuralNetwork(dataset, "MLP")

## Draw Deep Neural Network

In [23]:
factory.DrawNeuralNetwork(dataset, "DNN")

## Close the factory's output file

In [None]:
outputFile.Close()