In [None]:
#Useful when the notebook is running in tmux
import socket
hostname = socket.gethostname()
print(hostname)

In [None]:
import ROOT
from ROOT import TMVA

In [None]:
#inputFileS = ROOT.TFile("sig_1000.root")
dirNtuple = "root://cmseos.fnal.gov//store/user/rverma/Output/cms-TT-run2/Ntuple_Skim/"
dirFile = "2016/Semilep/JetBase/"
sigFile = "Semilep_JetBase__TstarTstarToTgammaTgluon_M800_2016_Ntuple.root"
inputFileS = ROOT.TFile.Open("%s/%s/%s"%(dirNtuple, dirFile, sigFile))
sig = inputFileS.Get("AnalysisTree")

bkg = ROOT.TChain("AnalysisTree")
bkgList = ["TTGamma_SingleLept", 
           "TTGamma_Dilepton", 
           "TTGamma_Hadronic", 
           "TTGamma_SingleLept_Pt100", 
           "TTGamma_Dilepton_Pt100", 
           "TTGamma_Hadronic_Pt100",
           "TTGamma_SingleLept_Pt200", 
           "TTGamma_Dilepton_Pt200",
           "TTGamma_Hadronic_Pt200"
          ]
for b in bkgList:
    fPath = "%s/%s/Semilep_JetBase__%s_2016_Ntuple.root"%(dirNtuple, dirFile, b)
    bkg.Add(fPath)
print(bkg.GetEntries())

In [None]:
loader = ROOT.TMVA.DataLoader("dataset")
sigWeight = 1.0
bkgWeight = 1.0
loader.AddSignalTree(sig, sigWeight)
loader.AddBackgroundTree(bkg, bkgWeight)

loader.AddVariable("Reco_ht" ,'F')
loader.AddVariable("Reco_st",'F')
loader.AddVariable("Photon_et",'F')
loader.AddVariable("Reco_mass_T" ,'F')

loader.SetSignalWeightExpression("Weight_lumi")
loader.SetBackgroundWeightExpression("Weight_lumi")

#cut1 = ROOT.TCut("pt_j1 > 50")
cut1 = ROOT.TCut("Event_pass_presel_mu &&((Jet_size>=5 && FatJet_size==0) || (Jet_size>=2 && FatJet_size==1))  && Jet_b_size >=1 && Photon_size==1 && Photon_et[0] > 100")
#cut1 = ROOT.TCut("((Jet_size>=5 && FatJet_size==0) || (Jet_size>=2 && FatJet_size==1)) && Jet_b_size >=1 && Photon_size==1 && Photon_et > 100")
#loader.PrepareTrainingAndTestTree(cut1,'SplitMode=Random:NormMode=NumEvents:!V')

loader.PrepareTrainingAndTestTree(cut1,"SplitMode=Random:!V")

In [None]:
ROOT.TMVA.Tools.Instance()
## For PYMVA methods
TMVA.PyMethodBase.PyInitialize();

outputFile = ROOT.TFile.Open("mT800_Test.root", "RECREATE")

factory = ROOT.TMVA.Factory("TMVA_TT_Classification", outputFile,
                      "!V:ROC:!Silent:Color:!DrawProgressBar:AnalysisType=Classification" )
factory.BookMethod(loader,TMVA.Types.kBDT, "BDTG_mT800",
                   "!V:NTrees=200:MinNodeSize=1%:MaxDepth=4:BoostType=Grad:Shrinkage=0.02:UseBaggedBoost:"
                   "BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=-1");

In [None]:
factory.TrainAllMethods();
factory.TestAllMethods();
factory.EvaluateAllMethods();

In [None]:
%jsroot on
c1 = factory.GetROCCurve(loader);
c1.Draw();
outputFile.Close()

In [None]:
#--------------------------------------------
#Read output directly from classification
#--------------------------------------------
#inDir = "%s/%s/%s/%s/Merged"%(condorHistDir, year, decayMode, channel)
#inFile = TFile.Open("%s/AllInc.root"%inDir, "read")
inFile = ROOT.TFile.Open("mT800_Test.root", "read")
outputFile = ROOT.TFile("Disc_Ntuple.root","RECREATE")

CR = "ttyg_Enriched_SR"
def getHistDir(sample, sysType, CR):
    histDir = "%s/%s/%s"%(sample, CR, sysType)
    return histDir

def writeHist(hist, procDir, outputFile):
    outHistDir = getHistDir(procDir, "Base", CR)
    if not outputFile.GetDirectory(outHistDir):
        outputFile.mkdir(outHistDir)
    outputFile.cd(outHistDir)
    ROOT.gDirectory.Delete("%s;*"%(hist.GetName()))
    print "%20s, %10s, %10s"%(hist.GetName(), procDir, round(hist.Integral()))
    #hNew = hist.Rebin(len(newBins)-1, histNewName, newBins) 
    #hNew.Write()
    hist.Write()

def getHist(inHistName, procDir, sysType):
    print(inHistName, procDir)
    hist = inFile.Get("dataset/InputVariables_Id/%s__%s_Id"%(inHistName, procDir)).Clone(inHistName)
    return hist, procDir, sysType

def getDisc(inHistName, procDir, sysType, s):
    hist = inFile.Get("dataset/Method_BDTG_mT800/BDTG_mT800/MVA_BDTG_mT800_%s"%(s)).Clone(inHistName)
    return hist, procDir, sysType

procList = ["Signal", "Background"]
histList = ["Reco_st", "Reco_ht", "Photon_et", "Reco_mass_T"]

writeList = []
for p in procList:
    for h in histList:
        writeList.append(getHist(h, p, "Base"))

writeList.append(getDisc("BDT_Disc", "Signal", "Base", "S"))
writeList.append(getDisc("BDT_Disc", "Background", "Base", "B"))

for write in writeList:
    writeHist(write[0], write[1], outputFile)
    if "Back" in write[1]:
        writeHist(write[0], "data_obs", outputFile)

outputFile.ls()
outputFile.Close()

In [None]:
#DNN model
#inputLayoutString = "InputLayout=1|1|21"; 
#batchLayoutString= "BatchLayout=1|256|21";
#layoutString = ("Layout=DENSE|100|RELU,DENSE|100|RELU,DENSE|64|RELU,DENSE|64|RELU,DENSE|1|LINEAR")     

In [None]:
##Training strategies 
## one can catenate several training strategies

#training1  = "Optimizer=ADAM,LearningRate=1e-3,Momentum=0.,Regularization=None,WeightDecay=1e-4,"
#training1 += "DropConfig=0.+0.+0.+0.,MaxEpochs=30,ConvergenceSteps=10,BatchSize=256,TestRepetitions=1"
 
# we add regularization in the second phase
#training2  = "Optimizer=ADAM,LearningRate=1e-3,Momentum=0.,Regularization=L2,WeightDecay=1e-4,"
#training2 += "DropConfig=0.0+0.0+0.0+0,MaxEpochs=20,ConvergenceSteps=10,BatchSize=128,TestRepetitions=1"
     
            

#trainingStrategyString = "TrainingStrategy=" + training1 ## + training2

In [None]:
## General Options.                                                                                                                                                                
#dnnOptions = "!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G,N:WeightInitialization=XAVIER::Architecture=CPU"

#dnnOptions +=  ":" + inputLayoutString
#dnnOptions +=  ":" + batchLayoutString
#dnnOptions +=  ":" + layoutString
#dnnOptions +=  ":" + trainingStrategyString

In [None]:
#we can now book the method
              
#factory.BookMethod(loader, ROOT.TMVA.Types.kDL, "DL_CPU", dnnOptions)

In [None]:
#factory.BookMethod(loader, ROOT.TMVA.Types.kPyGTB, "PyGTB","H:!V:VarTransform=G:NEstimators=1000:LearningRate=0.01:"
#                                                  "MaxDepth=4")

#factory.BookMethod(loader, ROOT.TMVA.Types.kPyRandomForest, "PyRandomForest","!V:VarTransform=G:NEstimators=400:"
#                          "Criterion=gini:MaxFeatures=auto:MaxDepth=6:MinSamplesLeaf=3:MinWeightFractionLeaf=0:"
#                           "Bootstrap=kTRUE" )
      
#factory.BookMethod(loader, ROOT.TMVA.Types.kPyAdaBoost, "PyAdaBoost","!V:VarTransform=G:NEstimators=400" )
