In [1]:
import ROOT
import json
import os

Welcome to JupyROOT 6.18/00


In [2]:
class args:
    out_dir = "BDT_1"
    datasets = "../datasets_2016.json"

In [3]:
dataloader_name = args.out_dir
os.makedirs(dataloader_name, exist_ok=True)
output_file = ROOT.TFile(f"{dataloader_name}/training_output.root", "recreate")
factory = ROOT.TMVA.Factory(
    "VBS",
    output_file,
    ":".join([
        "!V", "!Silent",
        "Color", "DrawProgressBar",
        "Transformations=I",
        f"AnalysisType=Classification"
    ])
)
dataloader = ROOT.TMVA.DataLoader(dataloader_name)

In [4]:
samples_dict = json.load(open(args.datasets, "r"))
input_trees = []
for key in samples_dict:
    location = samples_dict[key]["location"]
    filelist = samples_dict[key]["filelist"]
    lumi = samples_dict[key]["lumi"]

    for sample in filelist:
        root_file = location + sample["name"]
        xs = sample["xs"]
        nMC = sample["nMC"]
        nMCneg = sample["nMCneg"]

        xs_weight = (lumi * xs) / (nMC - (2 * nMCneg))

        root_file = ROOT.TFile.Open(root_file)

        if key == "VBS_EWK":
            input_trees.append((root_file, xs_weight, "Signal"))

        elif key == "WJets":
            input_trees.append((root_file, xs_weight, "Background"))

        else:
            continue

for i_tree, treeWeight, treeClass in input_trees:
    dataloader.AddTree(i_tree.Get("otree"), treeClass, treeWeight)

DataSetInfo              : [BDT_1] : Added class "Signal"
                         : Add Tree otree of type Signal with 26095 events
                         : Add Tree otree of type Signal with 23888 events
                         : Add Tree otree of type Signal with 16938 events
                         : Add Tree otree of type Signal with 104489 events
                         : Add Tree otree of type Signal with 28705 events
                         : Add Tree otree of type Signal with 101123 events
                         : Add Tree otree of type Signal with 28317 events
                         : Add Tree otree of type Signal with 32276 events
                         : Add Tree otree of type Signal with 20595 events
DataSetInfo              : [BDT_1] : Added class "Background"
                         : Add Tree otree of type Background with 93 events
                         : Add Tree otree of type Background with 7226 events
                         : Add Tree otree of type

In [5]:
variables = [
    "njets",
    "l_pt1", 
    "l_eta1",
    "pfMET_Corr",
    "vbf_maxpt_jj_m", 
    "vbf_maxpt_jj_Deta",
    "vbf_maxpt_j1_pt",
    "vbf_maxpt_j1_eta",
    "vbf_maxpt_j2_pt",
    "vbf_maxpt_j2_eta",
    # AK8 jet
    "PuppiAK8_jet_mass_so_corr",
    "ungroomed_PuppiAK8_jet_pt",
    "ungroomed_PuppiAK8_jet_eta",
    # WV
    "mass_lvj_type0_PuppiAK8",
    "pt_lvj_type0_PuppiAK8",
    "eta_lvj_type0_PuppiAK8",
    "BosonCentrality_type0",
    "ZeppenfeldWH_dEtajj := ZeppenfeldWH/vbf_maxpt_jj_Deta",
    "ZeppenfeldWL_dEtajj := ZeppenfeldWL_type0/vbf_maxpt_jj_Deta",
    # angles
    "costheta1_type0",
    "costheta2_type0",
    "phi_type0",
    "phi1_type0",
    "costhetastar_type0",
    # W
    "v_pt_type0",
    "v_eta_type0",
    "v_mt_type0",
    "ht := ungroomed_PuppiAK8_jet_pt+vbf_maxpt_j1_pt+vbf_maxpt_j2_pt"
]

for var in variables:
    dataloader.AddVariable(var, "F")

# gen weights
dataloader.SetSignalWeightExpression("genWeight")
dataloader.SetBackgroundWeightExpression("genWeight")

In [6]:
preselection = """
(type==1 || type==0) 
&& (l_pt2<0)
&& (l_pt1>30)
&& (pfMET_Corr>50)
&& (nBTagJet_loose==0)
&& (vbf_maxpt_j1_pt>30)
&& (vbf_maxpt_j2_pt>30) 
&& (vbf_maxpt_jj_m>500)
&& (vbf_maxpt_jj_Deta>2.5)
&& (ungroomed_PuppiAK8_jet_pt>200)
&& (abs(ungroomed_PuppiAK8_jet_eta)<2.4)
&& (PuppiAK8_jet_mass_so_corr>65)
&& (PuppiAK8_jet_mass_so_corr<105)
&& (BosonCentrality_type0>0.0)
&& (abs(ZeppenfeldWL_type0/vbf_maxpt_jj_Deta)<1.0)
&& (abs(ZeppenfeldWH/vbf_maxpt_jj_Deta)<1.0)
"""
preselection = preselection.replace("\n", " ")

In [7]:
nTrain = 0
nTest = 0
dataloader.PrepareTrainingAndTestTree(
    ROOT.TCut(preselection),
    ":".join([
        "!V",
        "SplitMode=Random",
        "NormMode=NumEvents",
        f"nTrain_Signal={nTrain}",
        f"nTest_Signal={nTest}",
        f"nTrain_Background={nTrain}",
        f"nTest_Background={nTest}"
    ])
)

                         : Dataset[BDT_1] : Class index : 0  name : Signal
                         : Dataset[BDT_1] : Class index : 1  name : Background


In [8]:
factory.BookMethod(
    dataloader,
    ROOT.TMVA.Types.kBDT,
    "BDTG",
    ":".join([
        "!H", "!V",
        "NTrees=500",
        "MinNodeSize=2.5%",
        "BoostType=Grad",
        "Shrinkage=0.10",
        "UseBaggedBoost", "BaggedSampleFraction=0.5",
        "NegWeightTreatment=Pray"
    ])
)

<ROOT.TMVA::MethodBDT object ("BDTG") at 0x76a2910>

Factory                  : Booking method: [1mBDTG[0m
                         : 
                         : 
DataSetFactory           : [BDT_1] : Number of events in input trees
                         : Dataset[BDT_1] :     Signal     requirement: " (type==1 || type==0)  && (l_pt2<0) && (l_pt1>30) && (pfMET_Corr>50) && (nBTagJet_loose==0) && (vbf_maxpt_j1_pt>30) && (vbf_maxpt_j2_pt>30)  && (vbf_maxpt_jj_m>500) && (vbf_maxpt_jj_Deta>2.5) && (ungroomed_PuppiAK8_jet_pt>200) && (abs(ungroomed_PuppiAK8_jet_eta)<2.4) && (PuppiAK8_jet_mass_so_corr>65) && (PuppiAK8_jet_mass_so_corr<105) && (BosonCentrality_type0>0.0) && (abs(ZeppenfeldWL_type0/vbf_maxpt_jj_Deta)<1.0) && (abs(ZeppenfeldWH/vbf_maxpt_jj_Deta)<1.0) "
                         : Dataset[BDT_1] :     Signal          -- number of events passed: 7719   / sum of weights: 111.918
                         : Dataset[BDT_1] :     Signal          -- efficiency             : 0.0209004
                         : Dataset[BDT_1] :     Backg

In [9]:
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()
   
output_file.cd()
output_file.Close()

Factory                  : [1mTrain all methods[0m
Factory                  : [BDT_1] : Create Transformation "I" with events from all classes.
                         : 
                         : Transformation, Variable selection : 
                         : Input : variable 'njets' <---> Output : variable 'njets'
                         : Input : variable 'l_pt1' <---> Output : variable 'l_pt1'
                         : Input : variable 'l_eta1' <---> Output : variable 'l_eta1'
                         : Input : variable 'pfMET_Corr' <---> Output : variable 'pfMET_Corr'
                         : Input : variable 'vbf_maxpt_jj_m' <---> Output : variable 'vbf_maxpt_jj_m'
                         : Input : variable 'vbf_maxpt_jj_Deta' <---> Output : variable 'vbf_maxpt_jj_Deta'
                         : Input : variable 'vbf_maxpt_j1_pt' <---> Output : variable 'vbf_maxpt_j1_pt'
                         : Input : variable 'vbf_maxpt_j1_eta' <---> Output : variable 'vbf_maxpt_j

0%, time left: unknown
6%, time left: 32 sec
12%, time left: 29 sec
19%, time left: 26 sec
25%, time left: 24 sec
31%, time left: 22 sec
37%, time left: 20 sec
44%, time left: 18 sec
50%, time left: 16 sec
56%, time left: 14 sec
62%, time left: 12 sec
69%, time left: 10 sec
75%, time left: 8 sec
81%, time left: 6 sec
87%, time left: 4 sec
94%, time left: 2 sec
0%, time left: unknown
6%, time left: 2 sec
12%, time left: 2 sec
18%, time left: 2 sec
25%, time left: 2 sec
31%, time left: 2 sec
37%, time left: 1 sec
43%, time left: 1 sec
50%, time left: 1 sec
56%, time left: 1 sec
62%, time left: 1 sec
68%, time left: 0 sec
75%, time left: 0 sec
81%, time left: 0 sec
87%, time left: 0 sec
93%, time left: 0 sec
0%, time left: unknown
6%, time left: 1 sec
12%, time left: 1 sec
18%, time left: 1 sec
25%, time left: 1 sec
31%, time left: 1 sec
37%, time left: 1 sec
43%, time left: 0 sec
50%, time left: 0 sec
56%, time left: 0 sec
62%, time left: 0 sec
68%, time left: 0 sec
75%, time left: 0 sec