In [None]:
import glob
import os
import json
import pickle
import yaml

import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None
import pyarrow.parquet as pq
from sklearn.metrics import roc_auc_score, roc_curve
from scipy.special import softmax

import hist as hist2
import matplotlib.pyplot as plt
import mplhep as hep

plt.style.use(hep.style.CMS)

import utils
plt.rcParams.update({"font.size": 20})

In [None]:
! ls ../Feb22_2017

# Read parquets
- loads parquet dataframes and combine different pt-bins per sample
- saves the combined dataframe under `events[ch][sample]`

In [None]:
combine_samples = {
    # data
    "SingleElectron_": "SingleElectron",    
    "SingleMuon_": "SingleMuon",  
    "EGamma_": "EGamma",    
    
    # signal
    "GluGluHToWW_Pt-200ToInf_M-125": "HWW",    
    "HToWW_M-125": "VH",
    "VBFHToWWToLNuQQ_M-125_withDipoleRecoil": "VBF",
    
    # bkg
    "QCD_Pt": "QCD",
    "DYJets": "DYJets",    
    "WJetsToLNu_": "WJetsLNu",
    "JetsToQQ": "WZQQ",
    "TT": "TTbar",
    "ST_": "SingleTop",
    "WW": "Diboson",
    "WZ": "Diboson",
    "ZZ": "Diboson",
}

signals = [
        "HWW",        
        "ttHToNonbb_M125",
        "VH",
        "VBF"]

data_by_ch = {
    "ele": "SingleElectron",
    "mu": "SingleMuon",
}

In [None]:
samples_dir = "../Feb22_2017"
samples = os.listdir(samples_dir)

channels = ["mu", "ele"]

weights = {
    "mu": {
        "weight_genweight": 1,
        "weight_L1Prefiring": 1,
        "weight_pileup": 1,
        "weight_trigger_iso_muon": 1,
        "weight_trigger_noniso_muon": 1,
        "weight_isolation_muon": 1,
        "weight_id_muon": 1,
        "weight_vjets_nominal": 1,
    },
    "ele":{
        "weight_genweight": 1,
        "weight_L1Prefiring": 1,
        "weight_pileup": 1,
        "weight_trigger_electron": 1,
        "weight_reco_electron": 1,
        "weight_id_electron": 1,
        "weight_vjets_nominal": 1,   
    }
}

# Make ROCs

In [None]:
labels = {
    ### HWW
    "label_H_WqqWqq_0c": "( (fj_H_VV_4q==1) & (fj_nprongs==4) & (fj_ncquarks==0) )",
    "label_H_WqqWqq_1c": "( (fj_H_VV_4q==1) & (fj_nprongs==4) & (fj_ncquarks==1) )",
    "label_H_WqqWqq_2c": "( (fj_H_VV_4q==1) & (fj_nprongs==4) & (fj_ncquarks==2) )",
    "label_H_WqqWq_0c": "( (fj_H_VV_4q==1) & (fj_nprongs==3) & (fj_ncquarks==0) )",
    "label_H_WqqWq_1c": "( (fj_H_VV_4q==1) & (fj_nprongs==3) & (fj_ncquarks==1) )",
    "label_H_WqqWq_2c": "( (fj_H_VV_4q==1) & (fj_nprongs==3) & (fj_ncquarks==2) )",
    "label_H_WW2lep": "(fj_lepinprongs==2)",
    "label_H_WW0lep": "(fj_lepinprongs==0)",
    "label_H_WqWev": "(fj_H_VV_elenuqq==1) & (fj_nprongs==1) & (fj_lepinprongs==1)",
    "label_H_WqWmv": "(fj_H_VV_munuqq==1) & (fj_nprongs==1) & (fj_lepinprongs==1)",
    
    "label_H_WqqWev_0c": "( (fj_H_VV_elenuqq==1) & (fj_nprongs>=2) & (fj_ncquarks==0) )",
    "label_H_WqqWev_1c": "( (fj_H_VV_elenuqq==1) & (fj_nprongs>=2) & (fj_ncquarks==1) )",
    "label_H_WqqWmv_0c": "( (fj_H_VV_munuqq==1) & (fj_nprongs>=2) & (fj_ncquarks==0) )",
    "label_H_WqqWmv_1c": "( (fj_H_VV_munuqq==1) & (fj_nprongs>=2) & (fj_ncquarks==1) )",
    "label_H_WqqWtauev_0c": "( (fj_H_VV_leptauelvqq==1) & (fj_nprongs>=2) & (fj_ncquarks==0) )",
    "label_H_WqqWtauev_1c": "( (fj_H_VV_leptauelvqq==1) & (fj_nprongs>=2) & (fj_ncquarks==1) )",
    "label_H_WqqWtaumv_0c": "( (fj_H_VV_leptaumuvqq==1) & (fj_nprongs>=2) & (fj_ncquarks==0) )",
    "label_H_WqqWtaumv_1c": "( (fj_H_VV_leptaumuvqq==1) & (fj_nprongs>=2) & (fj_ncquarks==1) )",
    "label_H_WqqWtauhv_0c": "( (fj_H_VV_hadtauvqq==1) & (fj_nprongs>=2) & (fj_ncquarks==0) )",
    "label_H_WqqWtauhv_1c": "( (fj_H_VV_hadtauvqq==1) & (fj_nprongs>=2) & (fj_ncquarks==1) )",
    
    ### Top    
    "label_Top_nob": "(fj_Top_bmerged==0)",
    "label_Top_bWqq_0c": "( (fj_Top_2q==1) & (fj_nprongs == 2)  & (fj_Top_bmerged==1) & (fj_ncquarks==0) )",
    "label_Top_bWqq_1c": "( (fj_Top_2q==1) & (fj_nprongs == 2) & (fj_Top_bmerged==1) & (fj_ncquarks==1) )",
    "label_Top_bWq_0c": "( (fj_Top_2q==1) & (fj_nprongs == 1) & (fj_Top_bmerged==1) & (fj_ncquarks==0) )",
    "label_Top_bWq_1c": "( (fj_Top_2q==1) & (fj_nprongs == 1) & (fj_Top_bmerged==1) & (fj_ncquarks==1) )",
    "label_Top_bWev": "( (fj_Top_elenu==1) & (fj_Top_bmerged==1) )",
    "label_Top_bWmv": "( (fj_Top_munu==1) & (fj_Top_bmerged==1) )",
    "label_Top_bWtauhv": "( (fj_Top_hadtauvqq==1) & (fj_Top_bmerged==1) )",
    "label_Top_bWtauev": "( (fj_Top_leptauelvnu==1) & (fj_Top_bmerged==1) )",
    "label_Top_bWtaumv": "( (fj_Top_leptaumuvnu==1) & (fj_Top_bmerged==1) )",
    
    ### Wjets    
    "label_W_ev": "(fj_V_elenu==1) & (fj_lepinprongs==1)",
    "label_W_ev_0lep": "(fj_V_elenu==1) & (fj_lepinprongs==0)",
    "label_W_mv": "(fj_V_munu==1) & (fj_lepinprongs==1)",
    "label_W_mv_0lep": "(fj_V_munu==1) & (fj_lepinprongs==0)",
    "label_W_tauv": "(fj_V_taunu==1) & (fj_lepinprongs==1)",
    "label_W_tauv_0lep": "(fj_V_taunu==1) & (fj_lepinprongs==0)",
    "label_Z_2lep": "(fj_lepinprongs==2)",
    "label_Z_1lep": "(fj_lepinprongs==1)",
}

# labels to check per sample
labels_per_sample = {
    "ele": {
        "TTbar": [
            "label_Top_bWev",
            "label_Top_bWtauev",
            "label_Top_nob",
        ],    
        "HWW": [
            "label_H_WqqWev_0c","label_H_WqqWev_1c",
            "label_H_WqqWtauev_0c","label_H_WqqWtauev_1c",            
        ],
        "WJetsLNu": [
            "label_W_ev",#"label_W_ev_0lep",
            "label_W_tauv",#"label_W_tauv_0lep",
        ],
#         "DYJets": [
#             "label_Z_2lep",
#             "label_Z_1lep",
#         ],
        "QCD": [],
    },
    "mu": {
        "TTbar": [
            "label_Top_bWmv",
            "label_Top_bWtaumv",
            "label_Top_nob",
        ],
        "SingleTop": [
#             "label_Top_bWev",
#             "label_Top_bWtauev",
#             "label_Top_nob",
        ],            
        "HWW": [
            "label_H_WqqWmv_0c","label_H_WqqWmv_1c",
            "label_H_WqqWtaumv_0c","label_H_WqqWtaumv_1c",
        ], 
        "WJetsLNu": [
            "label_W_mv",#"label_W_mv_0lep",
            "label_W_tauv",#"label_W_tauv_0lep",
        ],
#         "DYJets": [
#             "label_Z_2lep",
#             "label_Z_1lep",
#         ],
        "QCD": [],
        }
}

match_labels = {
    "HWW": "fj_H_VV_isMatched",
    "TTbar": "fj_Top_isMatched",
    "SingleTop": "fj_Top_isMatched",    
    "WJetsLNu": "fj_V_isMatched",
    "DYJets": "fj_V_isMatched",
}

In [None]:
# aesthetics
nicex_scores = {
    "mu": {
    "QCD": r"$\frac{H_{WqqW{\mu}{\nu}}}{H_{WqqW{\mu}{\nu}}+QCD}$",
    "TTbar": r"$\frac{H_{WqqW{\mu}{\nu}}}{H_{WqqW{\mu}{\nu}}+Top_{bW{\mu}{\nu}}}$",
    "TTbar+QCD": r"$\frac{H_{WqqW{\mu}{\nu}}}{H_{WqqW{\mu}{\nu}}+Top_{bW{\mu}{\nu}}+QCD}$",
    "WJets": r"$\frac{H_{WqqW{\mu}{\nu}}}{H_{WqqW{\mu}{\nu}}+WJets}$",
    "TTbar+QCD+WJets": r"$\frac{H_{WqqW{\mu}{\nu}}}{H_{WqqW{\mu}{\nu}}+Top_{bW{\mu}{\nu}}+QCD+WJets}$",
    },
    "ele": {
    "QCD": r"$\frac{H_{WqqWe{\nu}}}{H_{WqqWe{\nu}}+QCD}$",
    "TTbar": r"$\frac{H_{WqqWe{\nu}}}{H_{WqqWe{\nu}}+Top_{bWe{\nu}}}$", 
    "TTbar+QCD": r"$\frac{H_{WqqWe{\nu}}}{H_{WqqWe{\nu}}+Top_{bWe{\nu}}+QCD}$",
    "WJets": r"$\frac{H_{WqqWe{\nu}}}{H_{WqqWe{\nu}}+WJets}$",
    "TTbar+QCD+WJets": r"$\frac{H_{WqqWe{\nu}}}{H_{WqqWe{\nu}}+Top_{bWe{\nu}}+QCD+WJets}$",        
    }
}    

nice_channel = {
    "mu": "Muon",
    "ele": "Electron"
}

import matplotlib.colors as mcolors
color_by_label = {}
for i,label in enumerate(labels):
    color_by_label[label] = list(mcolors.TABLEAU_COLORS.values())[i%10]

color_by_label["unmatched"] = "r"

In [None]:
def disc_score(df, sigs, bkgs):
    num = df[sigs].sum(axis=1) 
    den = df[sigs].sum(axis=1) + df[bkgs].sum(axis=1)
    return num/den

In [None]:
plt.rcParams.update({"font.size": 20})

def make_roc(roc_scores, roc_labels, channels, bkgs, PNs, scores_on_plot=False):
    
    ax = {}
    if len(channels) == 2:
        fig, (ax[channels[0]], ax[channels[1]]) = plt.subplots(1, 2, figsize=(16,7))
    else:
        fig, ax[channels[0]] = plt.subplots(figsize=(7,7))
            
    for ch in channels:
        print(f"Plotting {bkgs} score for {ch} channel")
        
        ax[ch].grid()
    #     plt.plot([plt.xlim()[0],plt.xlim()[1]], [0.01,0.01], linestyle="dashed", lw=2, color='gray')    
        ax[ch].hlines(y=1e-2, xmin=0, xmax=1, linestyle="dashed", lw=1, color='gray')    

        for pn_version in PNs:        
            labels_ = np.array(roc_labels[ch][pn_version])
            scores_ = np.array(roc_scores[ch][pn_version])

            is_nan = np.isnan(scores_)
            if is_nan.sum()!=0:
                print(f"Found {is_nan.sum()} NaN scores out of {len(scores_)}. Will remove them.")
                labels_ = labels_[~is_nan]
                scores_ = scores_[~is_nan]

            fpr, tpr, _ = roc_curve(labels_, scores_)

            ax[ch].plot(tpr,fpr,
                    linewidth=2,
                    linestyle='solid',
                    label=f"{pn_version} - AUC={roc_auc_score(labels_, scores_):.3f}"
                   )
            index=np.where(fpr==min(fpr, key=lambda x:abs(x-0.01), ))[0]
            ax[ch].vlines(x=tpr[index][0], ymin=0, ymax=1e-2,  linestyle="dashed", lw=1, color='gray')    
    #         plt.plot([tpr[index][0], tpr[index]][0], plt.ylim()[0], 0.01], linestyle="dashed", lw=1, color='gray')

            if scores_on_plot:
                if pn_version=="ParT":
                    for score_on_plot in scores_on_plot:
                        y = len(scores_[(labels_==0) & (scores_>score_on_plot)]) / len(scores_[(labels_==0)])
                        x = len(scores_[(labels_==1) & (scores_>score_on_plot)]) / len(scores_[(labels_==1)])

                        ax[ch].plot([x],[y], marker="*", markersize=10, label=f"Threshold={score_on_plot:.2f}")

        ax[ch].set_xlim(0, 1)
        ax[ch].set_ylim(1e-4,1)
        ax[ch].set_xlabel(f"Signal efficiency (HWW)", fontsize=16, ha='right', x=1)
        ax[ch].set_ylabel(f"Mistag rate ({bkgs})", fontsize=16, ha='right', y=1)
        ax[ch].set_yscale("log")
        xch = nice_channel[ch]
        ax[ch].legend(title=f"{xch} Channel")        

In [None]:
def get_scores_and_labels(channels, presel, PNs, sigs, bkgs, samples=None, matched_Top=True, from_W=False, from_Wstar=False):

    roc_scores, roc_labels = {}, {}

    for ch in channels:
        print(f"Processing channel {ch}")

        roc_labels[ch], roc_scores[ch] = {}, {}
        for pn_version in PNs:
            roc_labels[ch][pn_version], roc_scores[ch][pn_version] = [], []

        for sample in os.listdir(samples_dir):

            ### get a combined label to combine samples of the same process
            for key in combine_samples:
                if key in sample:
                    sample_to_use = combine_samples[key]
                    break
                else:
                    sample_to_use = sample

            if samples and sample_to_use not in samples:
                continue

            print(f"Finding {sample} samples and should combine them under {sample_to_use}")

            out_files = f"{samples_dir}/{sample}/outfiles/"
            parquet_files = glob.glob(f"{out_files}/*_{ch}.parquet")
            pkl_files = glob.glob(f"{out_files}/*.pkl")

            if not parquet_files:
                print(f"No parquet file for {sample}")
                continue                

            data = pd.read_parquet(parquet_files)
            not_empty = len(data) > 0

            # apply preselection
            for selection in presel[ch]:
                data = data.query(presel[ch][selection])  

            # query labels from ucsd to match a pku label and save the column
            for label in labels_per_sample[ch][sample_to_use]:
                match_label = match_labels[sample_to_use]
                data[label] = data.query(labels[label])[match_label]            

            # fill NaN rows that didn't pass with False
            data.fillna(value=False,inplace=True)

            if (sample_to_use=="HWW") and from_W:
                data = data = data.query("(fj_H_VV_isVlepton==True)")
            elif (sample_to_use=="HWW") and from_Wstar:
                data = data = data.query("(fj_H_VV_isVlepton==False)")
            
            if sample_to_use in matched:  # query after matching (for HWW, TTbar, WJets)
                if (sample_to_use != "TTbar") or (sample_to_use == "TTbar" and matched_Top):
                    data = data.query(matched[sample_to_use][ch])                    
                
            if sample_to_use in signals:
                label = [1] * len(data)            
            else:
                label = [0] * len(data)

            for pn_version in PNs:

                new_sig = [s.replace("PN",pn_version) for s in sigs[ch]]
                new_bkg = [b.replace("PN",pn_version) for b in bkgs[ch]]

                data1 = disc_score(data,new_sig,new_bkg)   
                roc_scores[ch][pn_version] += data1.tolist()  
                roc_labels[ch][pn_version] += label  
        print("-------------------------------------------------") 
    return roc_scores, roc_labels

In [None]:
# scores definition
hwwev = ["fj_PN_probHWqqWev0c","fj_PN_probHWqqWev1c","fj_PN_probHWqqWtauev0c","fj_PN_probHWqqWtauev1c"]
hwwmv = ["fj_PN_probHWqqWmv0c","fj_PN_probHWqqWmv1c","fj_PN_probHWqqWtauev0c","fj_PN_probHWqqWtaumv1c"]

qcd = ["fj_PN_probQCDbb","fj_PN_probQCDcc","fj_PN_probQCDb","fj_PN_probQCDc","fj_PN_probQCDothers"]

tope = ["fj_PN_probTopbWev","fj_PN_probTopbWtauev"]
topm = ["fj_PN_probTopbWmv","fj_PN_probTopbWtaumv"]

tophad = ["fj_PN_probTopbWqq0c","fj_PN_probTopbWqq1c","fj_PN_probTopbWq0c","fj_PN_probTopbWq1c","fj_PN_probTopbWtauhv"]

In [None]:
matched = {
    "HWW" : {
        "mu": "( (label_H_WqqWmv_0c==1) | (label_H_WqqWmv_1c==1) | (label_H_WqqWtaumv_0c==1) | (label_H_WqqWtaumv_1c==1) )",
        "ele": "( (label_H_WqqWev_0c==1) | (label_H_WqqWev_1c==1) | (label_H_WqqWtauev_0c==1) | (label_H_WqqWtauev_1c==1) )",
    },
    "TTbar" : {
        "mu": "( (label_Top_bWmv==1) | (label_Top_bWtaumv==1) )",
        "ele": "( (label_Top_bWev==1) | (label_Top_bWtauev==1) )",
    }
}

# QCD

In [None]:
presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
#         "mini-isolation": "( (lep_pt < 55) |  ( (lep_misolation < 0.1) & (lep_pt >= 55) ) )",          
    },
    "ele": {
        "preselection": "( ( (lep_pt < 120) & (lep_isolation<0.15)) |  (lep_pt >= 120) )",
    }    
}


bkgs = {
    "ele": qcd,
    "mu": qcd,
}

roc_scores, roc_labels = get_scores_and_labels(["mu", "ele"], 
                                               presel, 
                                               ["ParT", "ParticleNet"], 
                                               sigs, 
                                               bkgs, 
                                               ["HWW", "QCD"]
                                              )

In [None]:
make_roc(roc_scores, roc_labels, ["mu", "ele"], "QCD", ["ParT", "ParticleNet"])

In [None]:
make_roc(roc_scores, roc_labels, ["mu", "ele"], "QCD", ["ParT", "ParticleNet"], scores_on_plot=[0.9,0.99])

# QCD with and without miso

In [None]:
bkgs = {
    "ele": qcd,
    "mu": qcd,
}


roc_scores = {}
roc_labels = {}

presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
#         "mini-isolation": "( (lep_pt < 55) |  ( (lep_misolation < 0.1) & (lep_pt >= 55) ) )",          
    },
    "ele": {
        "preselection": "( ( (lep_pt < 120) & (lep_isolation<0.15)) |  (lep_pt >= 120) )",
    }    
}
roc_scores["preselection"], roc_labels["preselection"] = get_scores_and_labels(["mu"], 
                                               presel, 
                                               ["ParT"], 
                                               sigs, 
                                               bkgs, 
                                               ["HWW", "QCD"]
                                              )

presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
        "mini-isolation": "( (lep_pt < 55) |  ( (lep_misolation < 0.1) & (lep_pt >= 55) ) )",          
    },
    "ele": {
        "preselection": "( ( (lep_pt < 120) & (lep_isolation<0.15)) |  (lep_pt >= 120) )",
    }    
}
roc_scores["preselection + mini-isolation"], roc_labels["preselection + mini-isolation"] = get_scores_and_labels(["mu"], 
                                               presel, 
                                               ["ParT"], 
                                               sigs, 
                                               bkgs, 
                                               ["HWW", "QCD"]
                                              )

In [None]:
plt.rcParams.update({"font.size": 20})

scores_on_plot = [0.9,0.99]
ch = "mu"

fig, ax = plt.subplots(figsize=(8,8))

ax.grid()
ax.hlines(y=1e-2, xmin=0, xmax=1, linestyle="dashed", lw=1, color='gray')    

for selection in roc_labels:        
    labels_ = np.array(roc_labels[selection][ch][pn_version])
    scores_ = np.array(roc_scores[selection][ch][pn_version])

    is_nan = np.isnan(scores_)
    if is_nan.sum()!=0:
        print(f"Found {is_nan.sum()} NaN scores out of {len(scores_)}. Will remove them.")
        labels_ = labels_[~is_nan]
        scores_ = scores_[~is_nan]

    fpr, tpr, _ = roc_curve(labels_, scores_)

    ax.plot(tpr,fpr,
            linewidth=2,
            linestyle='solid',
            label=f"{selection} \n AUC={roc_auc_score(labels_, scores_):.3f}"
           )
    index=np.where(fpr==min(fpr, key=lambda x:abs(x-0.01), ))[0]
    ax.vlines(x=tpr[index][0], ymin=0, ymax=1e-2,  linestyle="dashed", lw=1, color='gray')    
    
ax.set_xlim(0, 1)
ax.set_ylim(1e-4,1)
ax.set_xlabel(f"Signal efficiency (HWW)", fontsize=16, ha='right', x=1)
ax.set_ylabel(f"Mistag rate (QCD)", fontsize=16, ha='right', y=1)
ax.set_yscale("log")
xch = nice_channel[ch]
ax.legend(title=f"{xch} Channel");     

# Top with and without miso

In [None]:
bkgs = {
    "ele": topm+tope+tophad,
    "mu": topm+tope+tophad,
}


roc_scores = {}
roc_labels = {}

presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
#         "mini-isolation": "( (lep_pt < 55) |  ( (lep_misolation < 0.1) & (lep_pt >= 55) ) )",          
    },
    "ele": {
        "preselection": "( ( (lep_pt < 120) & (lep_isolation<0.15)) |  (lep_pt >= 120) )",
    }    
}
roc_scores["preselection"], roc_labels["preselection"] = get_scores_and_labels(["mu"], 
                                               presel, 
                                               ["ParT"], 
                                               sigs, 
                                               bkgs, 
                                               ["HWW", "TTbar"],
                                               matched_Top=False
                                              )

presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
        "mini-isolation": "( (lep_pt < 55) |  ( (lep_misolation < 0.1) & (lep_pt >= 55) ) )",          
    },
    "ele": {
        "preselection": "( ( (lep_pt < 120) & (lep_isolation<0.15)) |  (lep_pt >= 120) )",
    }    
}
roc_scores["preselection + mini-isolation"], roc_labels["preselection + mini-isolation"] = get_scores_and_labels(["mu"], 
                                               presel, 
                                               ["ParT"], 
                                               sigs, 
                                               bkgs, 
                                               ["HWW", "TTbar"],
                                               matched_Top=False                                                                                                                 
                                              )

In [None]:
plt.rcParams.update({"font.size": 20})

scores_on_plot = [0.9,0.99]
ch = "mu"

fig, ax = plt.subplots(figsize=(8,8))

ax.grid()
ax.hlines(y=1e-2, xmin=0, xmax=1, linestyle="dashed", lw=1, color='gray')    

for selection in roc_labels:        
    labels_ = np.array(roc_labels[selection][ch]["ParT"])
    scores_ = np.array(roc_scores[selection][ch]["ParT"])

    is_nan = np.isnan(scores_)
    if is_nan.sum()!=0:
        print(f"Found {is_nan.sum()} NaN scores out of {len(scores_)}. Will remove them.")
        labels_ = labels_[~is_nan]
        scores_ = scores_[~is_nan]

    fpr, tpr, _ = roc_curve(labels_, scores_)

    ax.plot(tpr,fpr,
            linewidth=2,
            linestyle='solid',
            label=f"{selection} \n AUC={roc_auc_score(labels_, scores_):.3f}"
           )
    index=np.where(fpr==min(fpr, key=lambda x:abs(x-0.01), ))[0]
    ax.vlines(x=tpr[index][0], ymin=0, ymax=1e-2,  linestyle="dashed", lw=1, color='gray')    
    
ax.set_xlim(0, 1)
ax.set_ylim(1e-4,1)
ax.set_xlabel(f"Signal efficiency (HWW)", fontsize=16, ha='right', x=1)
ax.set_ylabel(f"Mistag rate (Top)", fontsize=16, ha='right', y=1)
ax.set_yscale("log")
xch = nice_channel[ch]
ax.legend(title=f"{xch} Channel");

# Top

In [None]:
presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
#         "bjet_ophem": "( (fj_bjets_ophem < 0.3040) )", 
    },
    "ele": {
        "preselection": "( ( (lep_pt < 120) & (lep_isolation<0.15)) |  (lep_pt >= 120) )",
#         "bjet_ophem": "( (fj_bjets_ophem < 0.3040) )", 
    }    
}

sigs = {
    "ele": hwwev,
    "mu": hwwmv,
}

bkgs = {
    "ele": tope,
    "mu": topm,
}

roc_scores, roc_labels = get_scores_and_labels(["mu"], 
                                               presel, 
                                               ["ParT", "ParticleNet"], 
                                               sigs, 
                                               bkgs, 
                                               ["HWW", "TTbar"]
                                              )

In [None]:
make_roc(roc_scores, roc_labels, ["mu"], "Top", ["ParT", "ParticleNet"])

In [None]:
make_roc(roc_scores, roc_labels, ["mu"], "Top", ["ParT", "ParticleNet"], scores_on_plot=[0.9,0.99])

In [None]:
### combining the semileptonic score definition
presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
#         "bjet_ophem": "( (fj_bjets_ophem < 0.3040) )", 
    },
    "ele": {
        "preselection": "( ( (lep_pt < 120) & (lep_isolation<0.15)) |  (lep_pt >= 120) )",
#         "bjet_ophem": "( (fj_bjets_ophem < 0.3040) )", 
    }    
}

sigs = {
    "ele": hwwev,
    "mu": hwwmv,
}

bkgs = {
    "ele": tope+topm+tophad,
    "mu": tope+topm+tophad,
}

roc_scores, roc_labels = get_scores_and_labels(["mu"], 
                                               presel, 
                                               ["ParT", "ParticleNet"], 
                                               sigs, 
                                               bkgs, 
                                               ["HWW", "TTbar"],
                                               matched_Top=False
                                              )

In [None]:
make_roc(roc_scores, roc_labels, ["mu"], "Top", ["ParT", "ParticleNet"], scores_on_plot=[0.9,0.99])

In [None]:
bkgs = {
    "ele": qcd+topm+tope+tophad,
    "mu": qcd+topm+tope+tophad,
}


roc_scores = {}
roc_labels = {}

presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
#         "mini-isolation": "( (lep_pt < 55) |  ( (lep_misolation < 0.1) & (lep_pt >= 55) ) )",          
    },
    "ele": {
        "preselection": "( ( (lep_pt < 120) & (lep_isolation<0.15)) |  (lep_pt >= 120) )",
    }    
}
roc_scores["lepton from W*"], roc_labels["lepton from W*"] = get_scores_and_labels(["mu"], 
                                               presel, 
                                               ["ParT"], 
                                               sigs, 
                                               bkgs, 
                                               ["HWW", "TTbar", "QCD"],
                                                from_W=True
                                              )

presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
#         "mini-isolation": "( (lep_pt < 55) |  ( (lep_misolation < 0.1) & (lep_pt >= 55) ) )",          
    },
    "ele": {
        "preselection": "( ( (lep_pt < 120) & (lep_isolation<0.15)) |  (lep_pt >= 120) )",
    }    
}
roc_scores["lepton from W"], roc_labels["lepton from W"] = get_scores_and_labels(["mu"], 
                                               presel, 
                                               ["ParT"], 
                                               sigs, 
                                               bkgs, 
                                               ["HWW", "TTbar", "QCD"],
                                                from_Wstar=True                          
                                              )

In [None]:
plt.rcParams.update({"font.size": 20})

scores_on_plot = [0.9,0.99]
ch = "mu"

fig, ax = plt.subplots(figsize=(8,8))

ax.grid()
ax.hlines(y=1e-2, xmin=0, xmax=1, linestyle="dashed", lw=1, color='gray')    

for selection in roc_labels:        
    labels_ = np.array(roc_labels[selection][ch]["ParT"])
    scores_ = np.array(roc_scores[selection][ch]["ParT"])

    is_nan = np.isnan(scores_)
    if is_nan.sum()!=0:
        print(f"Found {is_nan.sum()} NaN scores out of {len(scores_)}. Will remove them.")
        labels_ = labels_[~is_nan]
        scores_ = scores_[~is_nan]

    fpr, tpr, _ = roc_curve(labels_, scores_)

    ax.plot(tpr,fpr,
            linewidth=2,
            linestyle='solid',
            label=f"{selection} \n AUC={roc_auc_score(labels_, scores_):.3f}"
           )
    index=np.where(fpr==min(fpr, key=lambda x:abs(x-0.01), ))[0]
    ax.vlines(x=tpr[index][0], ymin=0, ymax=1e-2,  linestyle="dashed", lw=1, color='gray')    
    
ax.set_xlim(0, 1)
ax.set_ylim(1e-4,1)
ax.set_xlabel(f"Signal efficiency (HWW)", fontsize=16, ha='right', x=1)
ax.set_ylabel(f"Mistag rate (Top+QCD)", fontsize=16, ha='right', y=1)
ax.set_yscale("log")
xch = nice_channel[ch]
ax.legend(title=f"{xch} Channel");

# Top with and without bjet veto

In [None]:
bkgs = {
    "ele": tope,
    "mu": topm,
}


roc_scores = {}
roc_labels = {}

presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
#         "bjet_ophem": "( (fj_bjets_ophem < 0.3040) )", 
    },
    "ele": {
        "preselection": "( ( (lep_pt < 120) & (lep_isolation<0.15)) |  (lep_pt >= 120) )",
#         "bjet_ophem": "( (fj_bjets_ophem < 0.3040) )",         
    }    
}
roc_scores["preselection"], roc_labels["preselection"] = get_scores_and_labels(["mu", "ele"], 
                                               presel, 
                                               ["ParT"], 
                                               sigs, 
                                               bkgs, 
                                               ["HWW", "TTbar"],
                                                matched_Top=False
                                                                    
                                              )

presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
        "bjet": "( (fj_bjets < 0.3040) )", 
    },
    "ele": {
        "preselection": "( ( (lep_pt < 120) & (lep_isolation<0.15)) |  (lep_pt >= 120) )",
        "bjet": "( (fj_bjets < 0.3040) )",         
    }    
}
roc_scores["preselection + bjet veto"], roc_labels["preselection + bjet veto"] = get_scores_and_labels(["mu", "ele"], 
                                               presel, 
                                               ["ParT"], 
                                               sigs, 
                                               bkgs, 
                                               ["HWW", "TTbar"],
                                                matched_Top=False                                                                                                                   
                                              )

In [None]:
plt.rcParams.update({"font.size": 20})

scores_on_plot = [0.9,0.99]
ch = "mu"

fig, ax = plt.subplots(figsize=(8,8))

ax.grid()
ax.hlines(y=1e-2, xmin=0, xmax=1, linestyle="dashed", lw=1, color='gray')    

for selection in roc_labels:        
    labels_ = np.array(roc_labels[selection][ch][pn_version])
    scores_ = np.array(roc_scores[selection][ch][pn_version])

    is_nan = np.isnan(scores_)
    if is_nan.sum()!=0:
        print(f"Found {is_nan.sum()} NaN scores out of {len(scores_)}. Will remove them.")
        labels_ = labels_[~is_nan]
        scores_ = scores_[~is_nan]

    fpr, tpr, _ = roc_curve(labels_, scores_)

    ax.plot(tpr,fpr,
            linewidth=2,
            linestyle='solid',
            label=f"{selection} \n AUC={roc_auc_score(labels_, scores_):.3f}"
           )
    index=np.where(fpr==min(fpr, key=lambda x:abs(x-0.01), ))[0]
    ax.vlines(x=tpr[index][0], ymin=0, ymax=1e-2,  linestyle="dashed", lw=1, color='gray')    

ax.set_xlim(0, 1)
ax.set_ylim(1e-4,1)
ax.set_xlabel(f"Signal efficiency (HWW)", fontsize=16, ha='right', x=1)
ax.set_ylabel(f"Mistag rate (Top)", fontsize=16, ha='right', y=1)
ax.set_yscale("log")
xch = nice_channel[ch]
ax.legend(title=f"{xch} Channel");     

# Top + QCD

In [None]:
presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
#         "mini-isolation": "( (lep_pt < 55) |  ( (lep_misolation < 0.1) & (lep_pt >= 55) ) )",          
#         "bjet_ophem": "( (fj_bjets_ophem < 0.3040) )", 
    },
    "ele": {
        "preselection": "( ( (lep_pt < 120) & (lep_isolation<0.15)) |  (lep_pt >= 120) )",
#         "bjet_ophem": "( (fj_bjets_ophem < 0.3040) )", 
    }    
}

sigs = {
    "ele": hwwev,
    "mu": hwwmv,
}
bkgs = {
    "ele": tope+qcd,
    "mu": topm+qcd,
}


roc_scores, roc_labels = get_scores_and_labels(["mu"], 
                                               presel, 
                                               ["ParT", "ParticleNet"], 
                                               sigs, 
                                               bkgs, 
                                               ["HWW", "TTbar", "QCD"]
                                              )

In [None]:
make_roc(roc_scores, roc_labels, ["mu"], "TTbar+QCD", ["ParT"], scores_on_plot=[0.9,0.99])

# Top + QCD + WJets

In [None]:
presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
#         "mini-isolation": "( (lep_pt < 55) |  ( (lep_misolation < 0.1) & (lep_pt >= 55) ) )",          
#         "bjet_ophem": "( (fj_bjets_ophem < 0.3040) )", 
    },
    "ele": {
        "preselection": "( ( (lep_pt < 120) & (lep_isolation<0.15)) |  (lep_pt >= 120) )",
#         "bjet_ophem": "( (fj_bjets_ophem < 0.3040) )", 
    }    
}

sigs = {
    "ele": hwwev,
    "mu": hwwmv,
}
bkgs = {
    "ele": tope+qcd,
    "mu": topm+qcd,
}


roc_scores, roc_labels = get_scores_and_labels(["mu"], 
                                               presel, 
                                               PNs, 
                                               sigs, 
                                               bkgs, 
                                               ["HWW", "TTbar", "QCD", "WJetsLNu"]
                                              )

In [None]:
make_roc(roc_scores, roc_labels, ["mu"], "TTbar+QCD+WJetsLNu", ["ParT"], scores_on_plot=[0.9,0.99])

# Lepton coming from W

In [None]:
presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
#         "bjet": "( (fj_bjets<0.3040) )",
#         "bjet_ophem": "( (fj_bjets_ophem<0.3040) )",                
#         "iso": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
#         "mini-iso": "( (lep_pt < 55) |  ( (lep_misolation < 0.1) & (lep_pt >= 55) ) )",  
    },
    "ele": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
    }    
}

# scores definition
hwwev = ["fj_PN_probHWqqWev0c","fj_PN_probHWqqWev1c","fj_PN_probHWqqWtauev0c","fj_PN_probHWqqWtauev1c"]
hwwmv = ["fj_PN_probHWqqWmv0c","fj_PN_probHWqqWmv1c","fj_PN_probHWqqWtauev0c","fj_PN_probHWqqWtaumv1c"]
tope = ["fj_PN_probTopbWev","fj_PN_probTopbWtauev"]
topm = ["fj_PN_probTopbWmv","fj_PN_probTopbWtaumv"]
qcd = ["fj_PN_probQCDbb","fj_PN_probQCDcc","fj_PN_probQCDb","fj_PN_probQCDc","fj_PN_probQCDothers"]

sigs = {
    "ele": hwwev,
    "mu": hwwmv,
}
bkgs = {
    "ele": tope+topm+tophad+qcd,
    "mu": tope+topm+tophad+qcd,
}


channels = ["mu", "ele"]

roc_scores, roc_labels = {}, {}
for ch in channels:
    print(f"Processing channel {ch}")
    
    roc_labels[ch], roc_scores[ch] = [], []

    for sample in os.listdir(samples_dir):

        ### get a combined label to combine samples of the same process
        for key in combine_samples:
            if key in sample:
                sample_to_use = combine_samples[key]
                break
            else:
                sample_to_use = sample

        if sample_to_use not in ["HWW", "QCD", "TTbar"]:#, "WJetsLNu"]:
            continue

        print(f"Finding {sample} samples and should combine them under {sample_to_use}")

        out_files = f"{samples_dir}/{sample}/outfiles/"
        parquet_files = glob.glob(f"{out_files}/*_{ch}.parquet")
        pkl_files = glob.glob(f"{out_files}/*.pkl")

        if not parquet_files:
            print(f"No parquet file for {sample}")
            continue                

        data = pd.read_parquet(parquet_files)
        not_empty = len(data) > 0

        # apply preselection
        for selection in presel[ch]:
            data = data.query(presel[ch][selection])  

        # query labels from ucsd to match a pku label and save the column
        for label in labels_per_sample[ch][sample_to_use]:
            match_label = match_labels[sample_to_use]
            data[label] = data.query(labels[label])[match_label]            

        # fill NaN rows that didn't pass with False
        data.fillna(value=False,inplace=True)

        if "TTbar" in sample_to_use:
            if ch == "mu":
                match_top = (data["label_Top_bWmv"] | 
                             data["label_Top_bWtaumv"]
                            ).tolist()
            elif ch == "ele":
                match_top = (data["label_Top_bWev"] | 
                             data["label_Top_bWtauev"]
                            ).tolist()
            ## keep only the matched and label them as 0
            data = data[match_top]
            label = [0] * len(data["fj_pt"].tolist())
                        
        elif "HWW" in sample_to_use:
            if ch == "mu":
                match_hww = (data["label_H_WqqWmv_0c"] | 
                             data["label_H_WqqWmv_1c"] | 
                             data["label_H_WqqWtaumv_0c"] | 
                             data["label_H_WqqWtaumv_1c"]
                            ).tolist()
            elif ch == "ele":
                match_hww = (data["label_H_WqqWev_0c"] | 
                             data["label_H_WqqWev_1c"] | 
                             data["label_H_WqqWtauev_0c"] | 
                             data["label_H_WqqWtauev_1c"]
                            ).tolist()                        

            ## keep only the matched and label them as 1
            data = data[match_hww]
            data = data.loc[data["fj_H_VV_isVlepton"] == True]
            
            label = [1] * len(data["fj_pt"].tolist())

        elif "WJetsLNu" in sample_to_use:
            data = data.loc[data[match_labels[sample_to_use]] == True]
            label = [0] * len(data["fj_pt"].tolist())
                        
        else:
            label = [0] * len(data["fj_pt"].tolist())
            
        for pn_version in PNs:

            new_sig = [s.replace("PN","ParT") for s in sigs[ch]]
            new_bkg = [b.replace("PN","ParT") for b in bkgs[ch]]

            data1 = disc_score(data,new_sig,new_bkg)   
            roc_scores[ch] += data1.tolist()  
            roc_labels[ch] += label     
    print("-------------------------------------------------")

In [None]:
roc_scores1 = roc_scores
roc_labels1 = roc_labels

In [None]:
presel = {
    "mu": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
#         "bjet": "( (fj_bjets<0.3040) )",
#         "bjet_ophem": "( (fj_bjets_ophem<0.3040) )",                
#         "iso": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
#         "mini-iso": "( (lep_pt < 55) |  ( (lep_misolation < 0.1) & (lep_pt >= 55) ) )",  
    },
    "ele": {
        "preselection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
    }    
}

# scores definition
hwwev = ["fj_PN_probHWqqWev0c","fj_PN_probHWqqWev1c","fj_PN_probHWqqWtauev0c","fj_PN_probHWqqWtauev1c"]
hwwmv = ["fj_PN_probHWqqWmv0c","fj_PN_probHWqqWmv1c","fj_PN_probHWqqWtauev0c","fj_PN_probHWqqWtaumv1c"]
tope = ["fj_PN_probTopbWev","fj_PN_probTopbWtauev"]
topm = ["fj_PN_probTopbWmv","fj_PN_probTopbWtaumv"]
qcd = ["fj_PN_probQCDbb","fj_PN_probQCDcc","fj_PN_probQCDb","fj_PN_probQCDc","fj_PN_probQCDothers"]

sigs = {
    "ele": hwwev,
    "mu": hwwmv,
}
bkgs = {
    "ele": tope+topm+tophad+qcd,
    "mu": tope+topm+tophad+qcd,
}


channels = ["mu", "ele"]

roc_scores, roc_labels = {}, {}
for ch in channels:
    print(f"Processing channel {ch}")
    
    roc_labels[ch], roc_scores[ch] = [], []

    for sample in os.listdir(samples_dir):

        ### get a combined label to combine samples of the same process
        for key in combine_samples:
            if key in sample:
                sample_to_use = combine_samples[key]
                break
            else:
                sample_to_use = sample

        if sample_to_use not in ["HWW", "QCD", "TTbar"]:#, "WJetsLNu"]:
            continue

        print(f"Finding {sample} samples and should combine them under {sample_to_use}")

        out_files = f"{samples_dir}/{sample}/outfiles/"
        parquet_files = glob.glob(f"{out_files}/*_{ch}.parquet")
        pkl_files = glob.glob(f"{out_files}/*.pkl")

        if not parquet_files:
            print(f"No parquet file for {sample}")
            continue                

        data = pd.read_parquet(parquet_files)
        not_empty = len(data) > 0

        # apply preselection
        for selection in presel[ch]:
            data = data.query(presel[ch][selection])  

        # query labels from ucsd to match a pku label and save the column
        for label in labels_per_sample[ch][sample_to_use]:
            match_label = match_labels[sample_to_use]
            data[label] = data.query(labels[label])[match_label]            

        # fill NaN rows that didn't pass with False
        data.fillna(value=False,inplace=True)

        if "TTbar" in sample_to_use:
            if ch == "mu":
                match_top = (data["label_Top_bWmv"] | 
                             data["label_Top_bWtaumv"]
                            ).tolist()
            elif ch == "ele":
                match_top = (data["label_Top_bWev"] | 
                             data["label_Top_bWtauev"]
                            ).tolist()
            ## keep only the matched and label them as 0
            data = data[match_top]
            label = [0] * len(data["fj_pt"].tolist())
                        
        elif "HWW" in sample_to_use:
            if ch == "mu":
                match_hww = (data["label_H_WqqWmv_0c"] | 
                             data["label_H_WqqWmv_1c"] | 
                             data["label_H_WqqWtaumv_0c"] | 
                             data["label_H_WqqWtaumv_1c"]
                            ).tolist()
            elif ch == "ele":
                match_hww = (data["label_H_WqqWev_0c"] | 
                             data["label_H_WqqWev_1c"] | 
                             data["label_H_WqqWtauev_0c"] | 
                             data["label_H_WqqWtauev_1c"]
                            ).tolist()                        

            ## keep only the matched and label them as 1
            data = data[match_hww]
            data = data.loc[data["fj_H_VV_isVlepton"] == False]
            
            label = [1] * len(data["fj_pt"].tolist())

        elif "WJetsLNu" in sample_to_use:
            data = data.loc[data[match_labels[sample_to_use]] == True]
            label = [0] * len(data["fj_pt"].tolist())
                        
        else:
            label = [0] * len(data["fj_pt"].tolist())
            
        for pn_version in PNs:

            new_sig = [s.replace("PN","ParT") for s in sigs[ch]]
            new_bkg = [b.replace("PN","ParT") for b in bkgs[ch]]

            data1 = disc_score(data,new_sig,new_bkg)   
            roc_scores[ch] += data1.tolist()  
            roc_labels[ch] += label     
    print("-------------------------------------------------")

In [None]:
roc_scores2 = roc_scores
roc_labels2 = roc_labels

In [None]:
fig, ax = plt.subplots(figsize=(8,8))
ax.grid()
plt.plot([plt.xlim()[0],plt.xlim()[1]], [0.01,0.01], linestyle="dashed", lw=2, color='gray')    

### from W
scores_, labels_ = [], []
for ch in ["mu", "ele"]: 
    scores_ += roc_scores1[ch]    
    labels_ += roc_labels1[ch]
scores_ = np.array(scores_)
labels_ = np.array(labels_)
    
is_nan = np.isnan(scores_)
if is_nan.sum()!=0:
    print(f"Found {is_nan.sum()} scores out of {len(scores_)}. Will remove them.")
    labels_ = labels_[~is_nan]
    scores_ = scores_[~is_nan]

fpr, tpr, _ = roc_curve(labels_, scores_)
ax.plot(tpr,fpr,
        linewidth=2,
        linestyle='solid',
        label=r"W($\l\nu$)W*(qq)" + f" - AUC={roc_auc_score(labels_, scores_):.3f}"
       )
index=np.where(fpr==min(fpr, key=lambda x:abs(x-0.01), ))[0]
plt.plot([tpr[index], tpr[index]], [plt.ylim()[0], 0.01], linestyle="dashed", lw=1, color='gray')


### from W*
scores_, labels_ = [], []
for ch in ["mu", "ele"]: 
    scores_ += roc_scores2[ch]    
    labels_ += roc_labels2[ch]
scores_ = np.array(scores_)
labels_ = np.array(labels_)

is_nan = np.isnan(scores_)
if is_nan.sum()!=0:
    print(f"Found {is_nan.sum()} scores out of {len(scores_)}. Will remove them.")
    labels_ = labels_[~is_nan]
    scores_ = scores_[~is_nan]

fpr, tpr, _ = roc_curve(labels_, scores_)

ax.plot(tpr,fpr,
        linewidth=2,
        linestyle='solid',
        label=r"W(qq)W*($\l\nu$)" + f" - AUC={roc_auc_score(labels_, scores_):.3f}"
       )
index=np.where(fpr==min(fpr, key=lambda x:abs(x-0.01), ))[0]
plt.plot([tpr[index], tpr[index]], [plt.ylim()[0], 0.01], linestyle="dashed", lw=1, color='gray')

ax.set_xlim(0, 1)
ax.set_ylim(1e-4,1)
ax.set_xlabel(f'Signal efficiency (HWW)', fontsize=16, ha='right', x=1)
ax.set_ylabel(f'Mistag rate (Top+QCD)', fontsize=16, ha='right', y=1)
ax.set_yscale('log')
ax.legend(title=f"Semi-Leptonic  Channel");
#     ax.set_title("bjets_ophem<0.3");
#     ax.set_title("At pre-selection level"); 