# MSSMHbb-SPECT stage 1: Training

## Description
This is a standalone plugin to evaluate classification techniques for the MSSM Hbb analysis of the DESY CMS Higgs -> bb group. 
In this stage the trainings will be performed. 

This code is independent of CMSSW, however, running in the CMSSW area of the main analysis framework is recommended.


## Installation
Download the repository . It is recommended to clone it in the analysis-test area


## Inputs
This code reads the output of the MSSM  Hbb analysis, with the variables relevant for the training saved in a flat ntuple, ant their corresponding weight saved in a branch in an event-basis.


In [22]:
import ROOT
from ROOT import TMVA, TFile, TTree, TCut, TCanvas, TString


In [23]:

#load the input files and trees
signalFile = TFile.Open("rootfiles_signal/mssmHbb_2018_FH_600_sr.root")
backgroundFileQCDbEnriched1 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_bEnriched_HT100to200_sr.root")
backgroundFileQCDbEnriched2 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_bEnriched_HT200to300_sr.root")
backgroundFileQCDbEnriched3 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_bEnriched_HT300to500_sr.root")
backgroundFileQCDbEnriched4 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_bEnriched_HT500to700_sr.root")
backgroundFileQCDbEnriched5 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_bEnriched_HT700to1000_sr.root")
backgroundFileQCDbEnriched6 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_bEnriched_HT1000to1500_sr.root")
backgroundFileQCDbEnriched7 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_bEnriched_HT1500to2000_sr.root")
backgroundFileQCDbEnriched8 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_bEnriched_HT2000toInf_sr.root")

backgroundFileQCDbGenFilter1 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_HT100to200_BGenFilter_sr.root")
backgroundFileQCDbGenFilter2 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_HT200to300_BGenFilter_sr.root")
backgroundFileQCDbGenFilter3 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_HT300to500_BGenFilter_sr.root")
backgroundFileQCDbGenFilter4 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_HT500to700_BGenFilter_sr.root")
backgroundFileQCDbGenFilter5 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_HT700to1000_BGenFilter_sr.root")
backgroundFileQCDbGenFilter6 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_HT1000to1500_BGenFilter_sr.root")
backgroundFileQCDbGenFilter7 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_HT1500to2000_BGenFilter_sr.root")
backgroundFileQCDbGenFilter8 = TFile.Open("rootfiles_QCD/mssmHbb_2018_FH_QCD_HT2000toInf_BGenFilter_sr.root")

#output file
outputFile = TFile.Open("TMVAOutput.root", "RECREATE")

In [35]:
# Create a TMVA factory
factory = TMVA.Factory("TMVA_Classification", outputFile, "AnalysisType=Classification")

# Define the DataLoader and set up the variables
loader = TMVA.DataLoader("dataset")

# Get the signal and background TTrees
signalTree = signalFile.Get("mssmhbb_MVA")
backgroundTreeQCDbEnriched1 = backgroundFileQCDbEnriched1.Get("mssmhbb_MVA")
backgroundTreeQCDbEnriched2 = backgroundFileQCDbEnriched2.Get("mssmhbb_MVA")
backgroundTreeQCDbEnriched3 = backgroundFileQCDbEnriched3.Get("mssmhbb_MVA")
backgroundTreeQCDbEnriched4 = backgroundFileQCDbEnriched4.Get("mssmhbb_MVA")
backgroundTreeQCDbEnriched5 = backgroundFileQCDbEnriched5.Get("mssmhbb_MVA")
backgroundTreeQCDbEnriched6 = backgroundFileQCDbEnriched6.Get("mssmhbb_MVA")
backgroundTreeQCDbEnriched7 = backgroundFileQCDbEnriched7.Get("mssmhbb_MVA")
backgroundTreeQCDbEnriched8 = backgroundFileQCDbEnriched8.Get("mssmhbb_MVA")
backgroundTreeQCDbGenFilter1 = backgroundFileQCDbGenFilter1.Get("mssmhbb_MVA")
backgroundTreeQCDbGenFilter2 = backgroundFileQCDbGenFilter2.Get("mssmhbb_MVA")
backgroundTreeQCDbGenFilter3 = backgroundFileQCDbGenFilter3.Get("mssmhbb_MVA")
backgroundTreeQCDbGenFilter4 = backgroundFileQCDbGenFilter4.Get("mssmhbb_MVA")
backgroundTreeQCDbGenFilter5 = backgroundFileQCDbGenFilter5.Get("mssmhbb_MVA")
backgroundTreeQCDbGenFilter6 = backgroundFileQCDbGenFilter6.Get("mssmhbb_MVA")
backgroundTreeQCDbGenFilter7 = backgroundFileQCDbGenFilter7.Get("mssmhbb_MVA")
backgroundTreeQCDbGenFilter8 = backgroundFileQCDbGenFilter8.Get("mssmhbb_MVA")


# Define the input variables based on your feature names and data types
input_variables = {
    "dr_jets12": "F",
    "dr_jets13": "F",
    "dr_jets23": "F",
    "eta_jet1": "F",
    "eta_jet2": "F",
    "eta_jet3": "F",
    "deta_jet12": "F",
    "deta_jet13": "F",
    "deta_jet23": "F",
    "phi_jet1": "F",
    "phi_jet2": "F",
    "phi_jet3": "F",
    "dphi_jet12": "F",
    "dphi_jet13": "F",
    "dphi_jet23": "F",
    "pT_jet1": "F",
    "pT_jet2": "F",
    "pT_jet3": "F",
    "dpT_jet12": "F",
    "dpT_jet13": "F",
    "dpT_jet23": "F",
    "pTratio_jet12": "F",
    "pTratio_jet13": "F",
    "pTratio_jet23": "F",
    "pTimbalance_jet12": "F",
    "pTimbalance_jet13": "F",
    "pTimbalance_jet23": "F",
    "qglikelyhood_jet1": "F",
    "qglikelyhood_jet2": "F",
    "qglikelyhood_jet3": "F"
}

# Add the input variables to the DataLoader
for var_name, var_type in input_variables.items():
    loader.AddVariable(var_name, var_type)

In [None]:


# load the signal and background trees
loader.AddSignalTree(signalTree, 1.0)
#loader.AddBackgroundTree(backgroundTreeQCDbEnriched1, 1.0)
loader.AddBackgroundTree(backgroundTreeQCDbEnriched2, 1.0)
loader.AddBackgroundTree(backgroundTreeQCDbEnriched3, 1.0)
loader.AddBackgroundTree(backgroundTreeQCDbEnriched4, 1.0)
loader.AddBackgroundTree(backgroundTreeQCDbEnriched5, 1.0)
loader.AddBackgroundTree(backgroundTreeQCDbEnriched6, 1.0)
loader.AddBackgroundTree(backgroundTreeQCDbEnriched7, 1.0)
loader.AddBackgroundTree(backgroundTreeQCDbEnriched8, 1.0)
#loader.AddBackgroundTree(backgroundTreeQCDbGenFilter1, 1.0)
#loader.AddBackgroundTree(backgroundTreeQCDbGenFilter2, 1.0)
loader.AddBackgroundTree(backgroundTreeQCDbGenFilter3, 1.0)
loader.AddBackgroundTree(backgroundTreeQCDbGenFilter4, 1.0)
loader.AddBackgroundTree(backgroundTreeQCDbGenFilter5, 1.0)
loader.AddBackgroundTree(backgroundTreeQCDbGenFilter6, 1.0)
loader.AddBackgroundTree(backgroundTreeQCDbGenFilter7, 1.0)
loader.AddBackgroundTree(backgroundTreeQCDbGenFilter8, 1.0)

# Define the signal and background cuts if needed
# factory.PrepareTrainingAndTestTree(signalCut, backgroundCut, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V")

# Define the method (e.g., BDT) and add it to the factory ----> factory.BookMethod(loader, methodType, methodTitle, options)
factory.BookMethod(loader, TMVA.Types.kBDT, "BDT", "NTrees=1000:MaxDepth=3")

In [26]:
# Train and test the classifier
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()

<HEADER> Factory                  : Train all methods
                         : ...nothing found to train
<HEADER> Factory                  : Test all methods
                         : ...nothing found to test
<HEADER> Factory                  : Evaluate all methods
                         : ...nothing found to evaluate


In [27]:
# Save the factory and cleanup
factory.Close()
outputFile.Close()
signalFile.Close()
backgroundFileQCDbEnriched1.Close()
backgroundFileQCDbEnriched2.Close()
backgroundFileQCDbEnriched3.Close()
backgroundFileQCDbEnriched4.Close()
backgroundFileQCDbEnriched5.Close()
backgroundFileQCDbEnriched6.Close()
backgroundFileQCDbEnriched7.Close()
backgroundFileQCDbEnriched8.Close()
backgroundFileQCDbGenFilter1.Close()
backgroundFileQCDbGenFilter2.Close()
backgroundFileQCDbGenFilter3.Close()
backgroundFileQCDbGenFilter4.Close()
backgroundFileQCDbGenFilter5.Close()
backgroundFileQCDbGenFilter6.Close()
backgroundFileQCDbGenFilter7.Close()
backgroundFileQCDbGenFilter8.Close()

AttributeError: 'Factory' object has no attribute 'Close'