In [None]:
import ROOT, os, math, sys
import scipy.optimize
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
import mplhep as hep
plt.style.use(hep.style.CMS)

In [None]:
indir = '/data_CMS/cms/vernazza/cmt/PreprocessRDF/'

In [None]:
ROOT.gInterpreter.Declare("""
    using Vfloat = const ROOT::RVec<float>&;
    using Vint   = const ROOT::RVec<int>&;
    ROOT::RVec<int> find_bb_tautau(Vint GenPart_pdgId, Vint GenPart_genPartIdxMother, int pdgid_bb, int pdgid_tt) {
        bool Foundbjets = false;
        bool Foundtaus = false;
        int index_b1 = -1;
        int index_b2 = -1;
        int index_t1 = -1;
        int index_t2 = -1;
        for (int i_gen = 0; i_gen < GenPart_pdgId.size(); i_gen ++) {
            if (GenPart_genPartIdxMother.at(i_gen) == -1) continue; // it is the incoming parton
            if ((Foundbjets != true) && (fabs(GenPart_pdgId.at(i_gen)) == 5) && (GenPart_pdgId.at(GenPart_genPartIdxMother.at(i_gen)) == pdgid_bb)) {
                if (index_b1 == -1) index_b1 = i_gen;
                else if (index_b2 == -1) index_b2 = i_gen;
                if ((index_b1 != -1) && (index_b2 != -1)) {
                    Foundbjets = true;
                }
            }
            if ((Foundtaus != true) && (fabs(GenPart_pdgId.at(i_gen)) == 15) && (GenPart_pdgId.at(GenPart_genPartIdxMother.at(i_gen)) == pdgid_tt)) {
                if (index_t1 == -1) index_t1 = i_gen;
                else if (index_t2 == -1) index_t2 = i_gen;
                if ((index_t1 != -1) && (index_t2 != -1)) {
                    Foundtaus = true;
                }
            }
        }
        return {index_b1, index_b2, index_t1, index_t2} ;
    }
""")

In [None]:
def ComputeHHBTagPurity(folder):
    files = glob.glob(folder + '/*')
    
    dataframe_files = ROOT.vector(str)()
    for f in files:
        dataframe_files.push_back(f)
    df = ROOT.RDataFrame("Events", dataframe_files)

    # find index for gen level b jets
    df = df.Define("index_b1", "find_bb_tautau(GenPart_pdgId, GenPart_genPartIdxMother, 23, 23).at(0)")
    df = df.Define("index_b2", "find_bb_tautau(GenPart_pdgId, GenPart_genPartIdxMother, 23, 23).at(1)")

    # check matching between tagged b jets and gen level b jets
    df = df.Define("matched", "check_gen_tag_matching(index_b1, index_b2, bjet1_JetIdx, bjet2_JetIdx,"
            "GenPart_pt, GenPart_eta, GenPart_phi, GenPart_mass, Jet_pt, Jet_eta, Jet_phi)")
    df_matched = df.Filter("matched == 1")

    df = df.Define("bjet1_pt_%s" %mass, "Jet_pt.at(bjet1_JetIdx)")
    df = df.Define("bjet2_pt_%s" %mass, "Jet_pt.at(bjet2_JetIdx)")
    df_matched = df_matched.Define("bjet1_pt_matched_%s" %mass, "Jet_pt.at(bjet1_JetIdx)")
    df_matched = df_matched.Define("bjet2_pt_matched_%s" %mass, "Jet_pt.at(bjet2_JetIdx)")


# Compute purity for HHBTag on ZZ samples (non-resonant and resonant)

In [None]:
mass_points = [ 200, 210, 220, 230, 240, 250, 260, 270, 280, 300, 320, 350, 360, 400, 450, 500, 550,
                600, 650, 700, 750, 800, 850, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700,
                1800, 1900, 2000, 2200, 2400, 2500, 2600, 2800, 3000, 3500, 4000, 4500, 5000]
cfg = 'ul_2018_ZZ_v12'
ver = 'prod_240305'
sm_name = 'zz_sl_signal'
res_name = 'GluGluToXToZZTo2B2Tau'
cat = 'cat_base_selection'

# /data_CMS/cms/vernazza/cmt/PreprocessRDF/ul_2018_ZZ_v12/GluGluToXToZZTo2B2Tau_M200/cat_base_selection/prod_240305/data_0.root

odir = f'/data_CMS/cms/vernazza/FrameworkNanoAOD/hhbbtt-analysis/nicePlots/HHBTag/{cfg}/{ver}'
print(" ### INFO: Saving output in ", odir)
os.system('mkdir -p ' + odir)