In [None]:
#Useful when the notebook is running in tmux
import socket
hostname = socket.gethostname()
print(hostname)
from VarInfo import GetVarInfo

In [None]:
import ROOT
from ROOT import TMVA

In [None]:
import os
import sys
sys.path.insert(0, "%s/%s"%(os.getcwd(), "sample"))
from SampleInfo import getSamples

In [None]:
#inputFileS = ROOT.TFile("sig_1000.root")
mass    = "M800"
method  = "BDT"
package = "TMVA"

dirNtuple = "root://cmseos.fnal.gov//store/user/rverma/Output/cms-TT-run2/Ntuple_Skim/"
dirFile = "2016/Semilep/JetBase/"
sigFile = "Semilep_JetBase__TstarTstarToTgammaTgluon_%s_2016_Ntuple.root"%mass
inputFileS = ROOT.TFile.Open("%s/%s/%s"%(dirNtuple, dirFile, sigFile))
sig = inputFileS.Get("AnalysisTree")

allSamples = getSamples("2016", "Semilep", "JetBase")

bkgList = []
for s in allSamples.keys():
    if "TT_tytg" not in s and "Data" not in s:
        bkgs = allSamples[s]
        print("%s, files: %s"%(s, len(bkgs)))
        for b in bkgs:
            bkgList.append(b)
bkgList = ["Semilep_JetBase__TTGamma_SingleLept_2016_Ntuple.root"]
print("\nTotal files from all bkgs = %s"%len(bkgList))

bkg = ROOT.TChain("AnalysisTree")
for b in bkgList:
    bkg.Add("%s/%s/%s"%(dirNtuple, dirFile, b))
print(bkg.GetEntries())

In [None]:
loader = ROOT.TMVA.DataLoader("dataset")
sigWeight = 1.0
bkgWeight = 1.0
loader.AddSignalTree(sig, sigWeight)
loader.AddBackgroundTree(bkg, bkgWeight)

varDict = GetVarInfo()
print("\nTotal vars = %s \n"%len(varDict.keys()))
for var in varDict.keys():
    print(varDict[var][0])
    loader.AddVariable(varDict[var][0], 'F')

loader.SetSignalWeightExpression("Weight_lumi")
loader.SetBackgroundWeightExpression("Weight_lumi")

#evtSel = ROOT.TCut("pt_j1 > 50")
evtSel = ROOT.TCut("Event_pass_presel_mu &&((Jet_size>=5 && FatJet_size==0) || (Jet_size>=2 && FatJet_size==1))  && Jet_b_size >=1 && Photon_size==1 && Photon_et[0] > 100")

#loader.PrepareTrainingAndTestTree(evtSel,'SplitMode=Random:NormMode=NumEvents:!V')

loader.PrepareTrainingAndTestTree(evtSel,"SplitMode=Random:!V")

In [None]:
from os import environ
environ['KERAS_BACKEND'] = 'theano'
environ['THEANO_FLAGS'] = 'gcc.cxxflags=-march=corei7'
import ROOT
from keras.models import Sequential
from keras.layers import Dense, Activation, AlphaDropout,Conv1D,MaxPooling1D, Flatten, Reshape,Dropout,LSTM
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam, SGD, Nadam,Adadelta,RMSprop
from keras.regularizers import l2

In [None]:
#https://github.com/ViniciusMikuni/ttbb-analysis/blob/5d48e5e03bdd0ca162d3dd058f4ee02ef33a8460/python/MVA_cfg.py
batchs = 64
layoutString = "Layout=RELU|64,RELU|64,RELU|64,SOFTSIGN"
training0 =  "LearningRate=1e-3,Momentum=0.0,Repetitions=1,ConvergenceSteps=20,BatchSize=256,TestRepetitions=10,Regularization=L2,Multithreading=True,DropConfig=0.1,DropRepetitions=1"
#training1 = "LearningRate=1e-2,Momentum=0.0,Repetitions=1,ConvergenceSteps=10,BatchSize=256,TestRepetitions=7,Regularization=L2,Multithreading=True"

trainingStrategyString  = "TrainingStrategy="
trainingStrategyString += training0
#trainingStrategyString += training0 + "|" + training1

nnOptions = "!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=None:WeightInitialization=XAVIERUNIFORM"
nnOptions += ":" + layoutString + ":" +  trainingStrategyString + ":Architecture=CPU"

methodList = {"BDTP":[ROOT.TMVA.Types.kBDT,":".join(["!H","!V","NTrees=850","MaxDepth=5","BoostType=Grad","Shrinkage=0.01","UseBaggedBoost","BaggedSampleFraction=0.50","SeparationType=GiniIndex","nCuts=50"])],
              "BDTCW":[ROOT.TMVA.Types.kBDT,":".join(["!H","!V","NTrees=500","MaxDepth=8","BoostType=Grad","Shrinkage=0.01","UseBaggedBoost","BaggedSampleFraction=0.50","SeparationType=GiniIndex","nCuts=50"])],
              #"BDTFish":[ROOT.TMVA.Types.kBDT,":".join(["!H","!V","NTrees=500","MaxDepth=4","BoostType=Grad","Shrinkage=0.01","UseFisherCuts","MinLinCorrForFisher=0.5","UseBaggedBoost","BaggedSampleFraction=0.50","SeparationType=GiniIndex","nCuts=50"])],
              "LH":[ROOT.TMVA.Types.kLikelihood,"H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"],
              "Cuts":[ROOT.TMVA.Types.kCuts,"H:!V:PopSize=500:Steps=50"],
              "MLP": [ROOT.TMVA.Types.kMLP, "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator"],
              "SVM": [ROOT.TMVA.Types.kSVM,"VarTransform=Norm"],
              "BDTA": [ROOT.TMVA.Types.kBDT, "!H:!V:NTrees=850:MaxDepth=6:BoostType=AdaBoost:AdaBoostBeta=0.05:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=30"],
              "DNN": [ROOT.TMVA.Types.kDNN, nnOptions],
              #"PyDNN":[ROOT.TMVA.Types.kPyKeras,":".join(["H","V","NumEpochs=700","TriesEarlyStopping=20","BatchSize="+str(batchs)])],
              "SVM" : [ROOT.TMVA.Types.kSVM, "VarTransform=Norm"],
              #"Fish" : [ROOT.TMVA.Types.kFisher, "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ],
              #"FishG" : [ROOT.TMVA.Types.kFisher, "H:!V:Fisher:VarTransform=Gauss:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ],

              "PDEFoam": [ROOT.TMVA.Types.kPDEFoam, "!H:!V::SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Compress=T"],
              "LH":[ROOT.TMVA.Types.kLikelihood,"H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50:VarTransform=Decorrelate"],
              #"PyGTB": [ROOT.TMVA.Types.kPyGTB,"!V:NEstimators=850:NJobs=4"],
              "PyAda": [ROOT.TMVA.Types.kPyAdaBoost,"!V:NEstimators=1000"],
              "PyForest": [ROOT.TMVA.Types.kPyRandomForest, "!V:VarTransform=None:NEstimators=850:Criterion=gini:MaxFeatures=auto:MaxDepth=4:MinSamplesLeaf=1:MinWeightFractionLeaf=0:Bootstrap=kTRUE"]}

In [None]:
#method = "DNN"
print("Number of methods = %s \n"%len(methodList.keys()))
for m in methodList.keys():
    print("Method: %s"%m)
    ROOT.TMVA.Tools.Instance()
    ## For PYMVA methods
    TMVA.PyMethodBase.PyInitialize();

    outputFile = ROOT.TFile.Open("%s_%s_Classification_%s.root"%(package, m, mass), "RECREATE")

    factory = ROOT.TMVA.Factory("%s_%s_Classification"%(package, m), outputFile,
                          "!V:ROC:!Silent:Color:!DrawProgressBar:AnalysisType=Classification" )

    factory.BookMethod(loader, methodList[m][0], m, methodList[m][1])

    factory.TrainAllMethods();
    factory.TestAllMethods();
    factory.EvaluateAllMethods();
    
    #%jsroot on
    #c1 = factory.GetROCCurve(loader);
    #c1.Draw();
    #outputFile.Close()