In [None]:
import ROOT
import json
import os

In [None]:
class args:
    out_dir = "BDT_4"
    datasets = "../datasets_2016.json"

In [None]:
dataloader_name = args.out_dir
os.makedirs(dataloader_name, exist_ok=True)
output_file = ROOT.TFile(f"{dataloader_name}/training_output.root", "recreate")
factory = ROOT.TMVA.Factory(
    "VBS",
    output_file,
    ":".join([
        "!V", "!Silent",
        "Color", "DrawProgressBar",
        "Transformations=I",
        f"AnalysisType=Classification"
    ])
)
dataloader = ROOT.TMVA.DataLoader(dataloader_name)

In [None]:
samples_dict = json.load(open(args.datasets, "r"))
input_trees = []
for key in samples_dict:
    location = samples_dict[key]["location"]
    filelist = samples_dict[key]["filelist"]
    lumi = samples_dict[key]["lumi"]

    for sample in filelist:
        root_file = location + sample["name"]
        xs = sample["xs"]
        nMC = sample["nMC"]
        nMCneg = sample["nMCneg"]

        xs_weight = (lumi * xs) / (nMC - (2 * nMCneg))

        root_file = ROOT.TFile.Open(root_file)

        if key == "VBS_EWK":
            input_trees.append((root_file, xs_weight, "Signal"))

        elif key == "WJets":
            input_trees.append((root_file, xs_weight, "Background"))

        else:
            continue

for i_tree, treeWeight, treeClass in input_trees:
    dataloader.AddTree(i_tree.Get("otree"), treeClass, treeWeight)

In [None]:
variables = [
    "njets",
    "l_pt1", 
    "l_eta1",
    "pfMET_Corr",
    "vbf_maxpt_jj_m", 
    "vbf_maxpt_jj_Deta",
    "vbf_maxpt_j1_pt",
    "vbf_maxpt_j1_eta",
    "vbf_maxpt_j2_pt",
    "vbf_maxpt_j2_eta",
    # AK8 jet
    "PuppiAK8_jet_mass_so_corr",
    "ungroomed_PuppiAK8_jet_pt",
    "ungroomed_PuppiAK8_jet_eta",
    # WV
    "mass_lvj_type0_PuppiAK8",
    "pt_lvj_type0_PuppiAK8",
    "eta_lvj_type0_PuppiAK8",
    "BosonCentrality_type0",
    "ZeppenfeldWH_dEtajj := ZeppenfeldWH/vbf_maxpt_jj_Deta",
    "ZeppenfeldWL_dEtajj := ZeppenfeldWL_type0/vbf_maxpt_jj_Deta",
    # angles
    "costheta1_type0",
    "costheta2_type0",
    "phi_type0",
    "phi1_type0",
    "costhetastar_type0",
    # W
    "v_pt_type0",
    "v_eta_type0",
    "v_mt_type0",
    "ht := ungroomed_PuppiAK8_jet_pt+vbf_maxpt_j1_pt+vbf_maxpt_j2_pt"
]

for var in variables:
    dataloader.AddVariable(var, "F")

# gen weights
dataloader.SetSignalWeightExpression("genWeight")
dataloader.SetBackgroundWeightExpression("genWeight")

In [None]:
preselection = """
(type==1 || type==0) 
&& (l_pt2<0)
&& (l_pt1>30)
&& (pfMET_Corr>50)
&& (nBTagJet_loose==0)
&& (vbf_maxpt_j1_pt>30)
&& (vbf_maxpt_j2_pt>30) 
&& (vbf_maxpt_jj_m>500)
&& (vbf_maxpt_jj_Deta>2.5)
&& (ungroomed_PuppiAK8_jet_pt>200)
&& (abs(ungroomed_PuppiAK8_jet_eta)<2.4)
&& (PuppiAK8_jet_mass_so_corr>65)
&& (PuppiAK8_jet_mass_so_corr<105)
&& (BosonCentrality_type0>0.0)
&& (abs(ZeppenfeldWL_type0/vbf_maxpt_jj_Deta)<1.0)
&& (abs(ZeppenfeldWH/vbf_maxpt_jj_Deta)<1.0)
"""
preselection = preselection.replace("\n", " ")

In [None]:
nTrain = 0
nTest = 0
dataloader.PrepareTrainingAndTestTree(
    ROOT.TCut(preselection),
    ":".join([
        "!V",
        "SplitMode=Random",
        "NormMode=NumEvents",
        f"nTrain_Signal={nTrain}",
        f"nTest_Signal={nTest}",
        f"nTrain_Background={nTrain}",
        f"nTest_Background={nTest}"
    ])
)

In [None]:
#factory.BookMethod(
#    dataloader,
#    ROOT.TMVA.Types.kBDT,
#    "BDT",
#    ":".join([
#        "!H", "!V",
#        "NTrees=500",
#        "MinNodeSize=10%",
#        "BoostType=AdaBoost",
#        "AdaBoostBeta=0.5",
#        "NegWeightTreatment=Pray"
#    ])
#)

factory.BookMethod(
    dataloader,
    ROOT.TMVA.Types.kBDT,
    "BDT",
    ":".join([
        "!H", "!V",
        "NTrees=500",
        "MinNodeSize=10%",
        "BoostType=Grad",
        "Shrinkage=0.05",
        "UseBaggedBoost", "BaggedSampleFraction=0.5",
        "NegWeightTreatment=Pray"
    ])
)

In [None]:
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()
   
output_file.cd()
output_file.Close()