In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from trainlib.FileCollection import FileCollection
from trainlib.config import Config
from trainlib.ConfigFileHandler import ConfigFileHandler
from trainlib.ConfigFileUtils import ConfigFileUtils
import trainlib.cuts as cuts
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import copy
import re
from scipy import interpolate
import scipy.integrate as integrate
import pickle
import os

Welcome to JupyROOT 6.10/09


In [3]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error



In [4]:
candidate_branches = ["PFMET", "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF", "nExtraLep", "ZZMass_masked", "nExtraZ", "Z1Mass", "Z2Mass", "Z1Pt", "Z2Pt", "ZZMassErr", "ZZPt", "ZZEta", "ZZPhi", "Z1Flav", "Z2Flav", "costhetastar", "helphi", "helcosthetaZ1", "helcosthetaZ2", "phistarZ1", "phistarZ2", "xi", "xistar",
                      "D_bkg_ME", "D_VBF2j_ggH_ME", "D_VBF1j_ggH_ME", "D_WHh_ggH_ME", "D_ZHh_ggH_ME", "D_WHh_ZHh_ME", "D_VBF2j_WHh_ME", "D_VBF2j_ZHh_ME"]
#candidate_branches = ["PFMET", "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF", "nExtraLep", "ZZMass_masked", "nExtraZ", "Z1Mass", "Z2Mass", "Z1Pt", "Z2Pt", "ZZMassErr", "ZZPt", "ZZEta", "ZZPhi", "Z1Flav", "Z2Flav", "D_VBF2j_ggH_ME", "D_VBF1j_ggH_ME", "D_WHh_ggH_ME", "D_ZHh_ggH_ME", "D_WHh_ZHh_ME", "D_VBF2j_WHh_ME", "D_VBF2j_ZHh_ME"]
#list_branches = ["Jet", "Lep", "ExtraLep"]
MELA_branches = []
list_branches = ["Jet", "ExtraLep"]
pt_limits = [30.0, 0.0, 0.0]

In [5]:
allbranches = ["JetPt", "JetEta", "JetPhi", "LepPt", "LepEta", "LepPhi", "ExtraLepPt", "ExtraLepEta", "ExtraLepPhi"] + candidate_branches + MELA_branches + ["LHEAssociatedParticleId", "GenAssocLep1Id", "GenAssocLep2Id", "training_weight"]

In [6]:
#MC_path = "/data_CMS/cms/wind/CJLST_NTuples_randomizeda/"
MC_path = "/data_CMS/cms/wind/CJLST_NTuples/trainval/"

In [7]:
# these are the cuts without any m4l restriction imposed
def WHhadr0j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] == 0

def WHhadr01j_cut(row):
    return cuts.WHhadr_cut(row) and (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def WHhadr1j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] == 1

def WHhadr2j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] >= 2

def ZHhadr0j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] == 0

def ZHhadr01j_cut(row):
    return cuts.ZHhadr_cut(row) and (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def ZHhadr1j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] == 1

def ZHhadr2j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] >= 2

def mZZ0j_cut(row):
    return row["nCleanedJetsPt30"] == 0

def mZZ01j_cut(row):
    return (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def mZZ1j_cut(row):
    return row["nCleanedJetsPt30"] == 1

def mZZ2j_cut(row):
    return row["nCleanedJetsPt30"] >= 2

In [8]:
collections = {"VBF2j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ2j_cut},
            "VBF1j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ1j_cut},
            "VBF0j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ0j_cut},
            "VBF01j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ01j_cut},
            "VBF": {MC_path + "VBFH125/ZZ4lAnalysis.root": cuts.no_cut},
            "ggH2j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ2j_cut},
            "ggH1j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ1j_cut},
            "ggH0j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ0j_cut},
            "ggH01j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ01j_cut},
            "ggH" : {MC_path + "ggH125/ZZ4lAnalysis.root": cuts.no_cut},
            "WHh2j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr2j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr2j_cut},
            "WHh1j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr1j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr1j_cut},
            "WHh0j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr0j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr0j_cut},
            "WHh": {MC_path + "WplusH125/ZZ4lAnalysis.root": cuts.WHhadr_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": cuts.WHhadr_cut},
            "WHh01j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr01j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr01j_cut},
            "WHl": {MC_path + "WplusH125/ZZ4lAnalysis.root": cuts.WHlept_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": cuts.WHlept_cut},
            "ZHh2j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr2j_cut},
            "ZHh1j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr1j_cut},
            "ZHh01j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr01j_cut},
            "ZHh0j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr0j_cut},
            "ZHh": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHhadr_cut},
            "ZHl": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHlept_cut},
            "ttHh": {MC_path + "ttH125/ZZ4lAnalysis.root": cuts.ttHhadr_cut},
            "ttHl": {MC_path + "ttH125/ZZ4lAnalysis.root": cuts.ttHlept_cut},
            "ZHMET": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHMET_cut},
            "bck": {MC_path + "bkg/ZZ4lAnalysis.root": cuts.no_cut}
          }

In [9]:
# all the model combinations for which neural networks are currently trained
discriminant_pairs = [("ggH", "bck"), ("VBF", "bck"), ("ZHh", "bck"), ("WHh", "bck"), ("ZHl", "bck"),
                      ("WHl", "bck"), ("ZHMET", "bck"), ("ttHh", "bck"), ("ttHl", "bck"), ("VBF", "ggH"), ("WHh", "ggH"), ("ZHh", "ggH"), ("WHh", "ZHh"), ("VBF", "WHh"),
                     ("VBF", "ZHh"), ("WHl", "ggH"), ("WHl", "VBF"), ("WHl", "WHh"), ("WHl", "ZHh"),
                     ("WHl", "ZHl"), ("WHl", "ZHMET"), ("WHl", "ttHh"), ("WHl", "ttHl"), ("ZHh", "ZHl"),
                     ("ZHh", "ZHMET"), ("ZHh", "ttHh"), ("ZHh", "ttHl"), ("ZHl", "ggH"), ("ZHl", "VBF"),
                     ("ZHl", "WHh"), ("ZHl", "ZHMET"), ("ZHl", "ttHh"), ("ZHl", "ttHl"), ("ZHMET", "ggH"),
                     ("ZHMET", "VBF"), ("ZHMET", "WHh"), ("ZHMET", "ttHh"), ("ZHMET", "ttHl"), ("ttHh", "ggH"),
                      ("ttHh", "VBF"), ("ttHh", "WHh"), ("ttHh", "ttHl"), ("ttHl", "ggH"), ("ttHl", "VBF"),
                     ("ttHl", "WHh")]

In [10]:
def extract_order(df, col_basename, sorted_column, columns, order):
    def get_index(row, order, col_basename, sorted_column):
        sorted_column = row[col_basename + sorted_column]
        if order >= len(sorted_column):
            return -1
        else:
            return np.flipud(np.argsort(sorted_column))[order]
    
    index_column = pd.DataFrame(df.transform(lambda row: get_index(row, order, col_basename, sorted_column), axis = 1, raw = True))
    index_column.columns = ["index"]
    df_temp = pd.concat([index_column, df], axis = 1)
    
    def get_element(row, column_name):
        if row["index"] == -1:
            return 0
        else:
            return row[column_name][row["index"]]
        
    extracted_cols = pd.DataFrame()
    for column in columns:
        extracted_col = pd.DataFrame(df_temp.transform(lambda row: get_element(row, col_basename + column), axis = 1, raw = True))
        extracted_col.columns = [col_basename + column + "(" + col_basename + "Pt|" + str(order) + ")"]
        extracted_cols = pd.concat([extracted_cols, extracted_col], axis = 1)
        
    return extracted_cols

In [11]:
def prepare_data(df, col_basenames, sorted_column, columns, orders, pt_limits):
    all_extracted = pd.DataFrame()
    for col_basename, pt_limit in zip(col_basenames, pt_limits):
        for order in orders:
            extracted = extract_order(df, col_basename, sorted_column, columns, order)
            mask = extracted[col_basename + "Pt(" + col_basename + "Pt|" + str(order) + ")"] < pt_limit
            extracted[mask] = 0.0

            all_extracted = pd.concat([all_extracted, extracted], axis = 1)
            
    return all_extracted

In [12]:
def get_data(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits):
    H1_df = H1_coll.get_data(read_branches, 0.0, 1.0)
    H0_df = H0_coll.get_data(read_branches, 0.0, 1.0)
    
    H1_list_df = prepare_data(H1_df, list_branches, "Pt", ["Pt", "Eta", "Phi"], range(4), pt_limits)
    H0_list_df = prepare_data(H0_df, list_branches, "Pt", ["Pt", "Eta", "Phi"], range(4), pt_limits)
    
    list_branches_unrolled = H1_list_df.columns
            
    H1_df = pd.concat([H1_df, H1_list_df], axis = 1)
    H0_df = pd.concat([H0_df, H0_list_df], axis = 1)        
    
    complete_input_branches = np.concatenate([input_branches, list_branches_unrolled])
            
    H1_df = H1_df[complete_input_branches]
    H0_df = H0_df[complete_input_branches]
    
    return H1_df, H0_df

In [13]:
def get_data_dmatrix(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits):
    H1_df, H0_df = get_data(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits)
    
    complete_input_branches = H1_df.columns
    print "number of input variables: " + str(len(complete_input_branches))
    print "final list of inputs: " + str(complete_input_branches)
    
    # try with the same weights as used later in the neural network training, to balance out some (very)
    # unbalanced datasets
    H1_class_weight = 1.0 + float(len(H0_df)) / float(len(H1_df))
    H0_class_weight = 1.0 + float(len(H1_df)) / float(len(H0_df))
    
    print "using class weights: " + str(H1_class_weight) + " (H1), " + str(H0_class_weight) + " (H0)"
    
    H1_weights = np.full(len(H1_df), H1_class_weight)
    H0_weights = np.full(len(H0_df), H0_class_weight)
    
    H1_data = H1_df.as_matrix()
    H0_data = H0_df.as_matrix()
    H1_target = np.ones(np.shape(H1_data)[0])
    H0_target = np.zeros(np.shape(H0_data)[0])
    
    target = np.concatenate([H1_target, H0_target])
    data = np.concatenate([H1_data, H0_data])
    weights = np.concatenate([H1_weights, H0_weights])
    
    dmatrix = xgb.DMatrix(data, label = target, feature_names = complete_input_branches, weight = weights)
    
    return dmatrix

In [14]:
def get_feature_correlation(source, corr_branches, mandatory_branches, optional_branches, list_branches, pt_limits):    
    coll = FileCollection(collections[source], 0.0, 0.5)
    
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if "0j" in source and ("0j" in optional_branch):
            input_branches.append(optional_branch)
            
        if "1j" in source and ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
        if "2j" in source and ("2j" in optional_branch):
            input_branches.append(optional_branch)

    df, _ = get_data(coll, coll, allbranches, input_branches, list_branches, pt_limits)

    df = df[corr_branches]
    
    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(111)
    
    cax = ax.matshow(df.corr(), vmin = -1.0, vmax = 1.0, cmap = "RdBu")
    
    fig.colorbar(cax)
    
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    
    ax.set_yticklabels([''] + corr_branches)
    ax.set_xticklabels([''] + corr_branches, rotation = 'vertical')
    
    return fig

In [15]:
def get_interpolating_function(data, bins):
    bin_centers = [np.mean([bins[i], bins[i + 1]]) for i in range(len(bins) - 1)]
    intf = interpolate.interp1d(bin_centers, data, kind = "linear")
    interpolated_function = lambda x: intf(x) if x > bin_centers[0] and x < bin_centers[-1] else 0
    
    return interpolated_function

In [16]:
def get_binned_data(df, branch):
    data = df[branch].as_matrix()
    weights = df["training_weight"].as_matrix()
    
    # set the bin width
    q75, q25 = np.percentile(data, [75, 25])
    bin_width = max(2 * (q75 - q25) / len(data)**0.33, 0.005)

    data_max = np.max(data)
    data_min = np.min(data)
    bins = np.arange(data_min, data_max + bin_width, bin_width)
    
    weights = weights / (np.sum(weights) * bin_width)
    
    hist = np.histogram(data, bins = bins, weights = weights)
    return hist

In [17]:
def get_feature_importance_list_BDT(disc_pair, mandatory_branches, optional_branches, list_branches, pt_limits):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    # first assemble the list of branches that can serve as input: it will *always* contain the mandatory branches,
    # and *can* contain some of the optional branches, if the name of the categories allows it
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if ("0j" in H1_name or "0j" in H0_name) and ("0j" in optional_branch):
            input_branches.append(optional_branch)  
        elif ("1j" in H1_name or "1j" in H0_name) and ("1j" in optional_branch):
            input_branches.append(optional_branch)
        elif ("2j" in H1_name or "2j" in H0_name) and not ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
        # the fully inclusive categories (i.e. those with NO "xxj" in their name, can not use MELA, since there may
        # be events with low number of jets contained)
    
    # get the training data for the BDT ...
    H1_coll_train = FileCollection(collections[H1_name], 0.0, 0.5)
    H0_coll_train = FileCollection(collections[H0_name], 0.0, 0.5)
    
    dtrain = get_data_dmatrix(H1_coll_train, H0_coll_train, allbranches, input_branches, list_branches, pt_limits)
    
    # ... and the validation data as well
    H1_coll_val = FileCollection(collections[H1_name], 0.5, 1.0)
    H0_coll_val = FileCollection(collections[H0_name], 0.5, 1.0)
    dval = get_data_dmatrix(H1_coll_val, H0_coll_val, allbranches, input_branches, list_branches, pt_limits)
    
    evallist = [(dtrain, 'train'), (dval, 'eval')]
    
    # perform the training
    # try different tree depths and choose the one that gives the best RMSE (i.e. avoid too deep trees to start with)
        
    params = {'eta': 0.01, 'silent': 1, 'gamma': 0.5, 'objective': 'binary:logistic'}
    params['nthread'] = 16
    params['eval_metric'] = 'rmse'
    max_num_rounds = 2000
    
    best_loss = 1e6
    best_imp = None
    best_params = None
    for tree_depth in range(1,8):
        params['max_depth'] = tree_depth
        
        bst = xgb.train(params, dtrain, max_num_rounds, evals = evallist, early_stopping_rounds = 10, verbose_eval = False)
    
        pred = bst.predict(dval)
        cur_loss = np.sqrt(mean_squared_error(pred, dval.get_label()))
        cur_imp = bst.get_fscore()

        print "for max_depth = " + str(params['max_depth']) + ": loss = " + str(cur_loss)
        
        if cur_loss < best_loss:
            best_loss = cur_loss
            best_imp = copy.copy(cur_imp)
            best_params = copy.copy(params)
            
    # normalize the usage score w.r.t. the total score (i.e. sum of all individuals)
    score_sum = sum([val for key, val in best_imp.iteritems()])
    used_variables = {key: val / float(score_sum) for key, val in sorted(best_imp.iteritems(), key = lambda x: x[1], reverse = True)}           
    return best_params, dtrain.feature_names, used_variables

In [18]:
def get_histogram(df, branch, label):
    data = df[branch].as_matrix()
    weights = df["training_weight"].as_matrix()
    
    # set the bin width
    q75, q25 = np.percentile(data, [75, 25])
    bin_width = max(2 * (q75 - q25) / len(data)**0.33, 0.005)

    data_max = np.max(data)
    data_min = np.min(data)
    bins = np.arange(data_min, data_max + bin_width, bin_width)
    
    weights = weights / (np.sum(weights) * bin_width)
    
    fig = plt.hist(data, bins = bins, weights = weights, alpha = 0.5, label = label)
    return fig

In [19]:
def plot_branch(disc_pair, branch, start_fraction = 0.0, end_fraction = 1.0):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    # get the training data for the BDT ...
    H1_coll = FileCollection(collections[H1_name], start_fraction, end_fraction)
    H0_coll = FileCollection(collections[H0_name], start_fraction, end_fraction)
    
    H1_df, H0_df = get_data(H1_coll, H0_coll, allbranches, allbranches, list_branches, pt_limits)
    
    plt.figure()
    H1_hist = get_histogram(H1_df, branch, H1_name)
    H0_hist = get_histogram(H0_df, branch, H0_name)
    
    plt.legend(loc = 'upper right')
    plt.show()

In [20]:
def plot_variables(discs):
    plotframe = pd.DataFrame()
    
    for disc in discs:
        _, _, implist = get_feature_importance_list_BDT(disc, candidate_branches, MELA_branches, list_branches, pt_limits)
        
        # cut the list to select only the 95% most important variables
        cutimplist = {key: val for key, val in implist.iteritems() if val > 0.00}
        curframe = pd.DataFrame(cutimplist, index = [len(plotframe)])
        
        plotframe = pd.concat([plotframe, curframe])
        
    plotframe = plotframe.fillna(0.0)
    
    print plotframe
    print "number of pre-selected input variables = " + str(len(plotframe.columns))
    
    # start the plotting
    parameters = plotframe.columns
    plotdata = np.transpose(plotframe.as_matrix())
    
    plt.close('all')
    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(111)
    cax = ax.matshow(plotdata, cmap = 'Blues')
    
    # make axis labels
    disclabels = []
    for disc in discs:
        if "0j" in disc[0] or "0j" in disc[1]:
            disclabels.append('D_' + re.sub('0j', '', disc[0]) + "_" + re.sub('0j', '', disc[1]) + "_0j")
        elif "01j" in disc[0] or "01j" in disc[1]:
            disclabels.append('D_' + re.sub('01j', '', disc[0]) + "_" + re.sub('01j', '', disc[1]) + "_01j")
        elif "1j" in disc[0] or "1j" in disc[1]:
            disclabels.append('D_' + re.sub('1j', '', disc[0]) + "_" + re.sub('1j', '', disc[1]) + "_1j")
        elif "2j" in disc[0] or "2j" in disc[1]:
            disclabels.append('D_' + re.sub('2j', '', disc[0]) + "_" + re.sub('2j', '', disc[1]) + "_2j")
        else:
            disclabels.append('D_' + disc[0] + "_" + disc[1] + "_2j")
            
    disclabels = np.concatenate([[''], np.array(disclabels)])
    parameters = np.concatenate([[''], np.array(parameters)])
        
    ax.set_xticklabels(disclabels, rotation = 'vertical')
    ax.set_yticklabels(parameters)
    
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    
    # sort the used variables according to their importance
    sorted_implist = []
    for key, val in sorted(cutimplist.iteritems(), key = lambda x: x[1], reverse = True):
        sorted_implist.append((key, val))
    
    return fig, sorted_implist

In [21]:
def append_variables(confhandler, impdict, threshold_fscore):
    confhandler.new_section(impdict["discriminant"])
    cur_sec = confhandler.get_section(impdict["discriminant"])

    periodic_inputs = []
    nonperiodic_inputs = []
    for key, val in impdict.iteritems():
        if val[0] > threshold_fscore and key is not "discriminant":
            if "phi" in key or "Phi" in key:
                periodic_inputs.append(key)
            else:
                nonperiodic_inputs.append(key)
    cur_sec["nonperiodic_columns"] = ConfigFileUtils.serialize_list(nonperiodic_inputs, lambda x: x)
    cur_sec["periodic_columns"] = ConfigFileUtils.serialize_list(periodic_inputs, lambda x: x)

In [22]:
def convert_varname(raw):
    raw = raw.replace('(', '[')
    raw = raw.replace(')', ']')
    return raw

In [23]:
df = pd.DataFrame()

In [24]:
out_dir = "/data_CMS/cms/wind/InputConfigurations/"
#out_path = os.path.join(out_dir, "inclusive_99_fullmassrange_ZZMask.conf")
out_path = os.path.join(out_dir, "background_ZXmixed.conf")
threshold_fscore = 0.01

In [25]:
confhandler = ConfigFileHandler()

In [26]:
%%capture
fig, implist = plot_variables([("ggH", "bck")])

In [27]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ggH_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [28]:
append_variables(confhandler, impdict, threshold_fscore)

In [29]:
implist

[('Z1Mass', 0.0918911400621262),
 ('Z2Mass', 0.07695071630580927),
 ('Z2Pt', 0.06829245676665585),
 ('Z1Pt', 0.06220733460058417),
 ('ZZPt', 0.0578955908943391),
 ('ZZMassErr', 0.057860818767675834),
 ('D_bkg_ME', 0.05563540266122676),
 ('PFMET', 0.045203764662246744),
 ('helcosthetaZ2', 0.03946636376280773),
 ('costhetastar', 0.038817284065093424),
 ('helcosthetaZ1', 0.03754230608744031),
 ('helphi', 0.03735685474523622),
 ('ZZEta', 0.034065093421113635),
 ('ZZPhi', 0.03340442301451157),
 ('JetPt(JetPt|0)', 0.03132968612360332),
 ('phistarZ1', 0.030367657285919607),
 ('xistar', 0.02846678102832769),
 ('D_VBF1j_ggH_ME', 0.02228893319115397),
 ('Z2Flav', 0.02220779822893968),
 ('ZZMass_masked', 0.01831332004265381),
 ('D_WHh_ZHh_ME', 0.018011961611572164),
 ('JetEta(JetPt|0)', 0.017861282396031342),
 ('JetPhi(JetPt|0)', 0.017594696091612964),
 ('Z1Flav', 0.007928044879224813),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.005354907506143076),
 ('D_ZHh_ggH_ME', 0.005354907506143076),
 ('JetPt(Je

In [30]:
%%capture
fig, implist = plot_variables([("VBF", "bck")])

In [31]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [32]:
append_variables(confhandler, impdict, threshold_fscore)

In [33]:
implist

[('Z1Mass', 0.07739840654546495),
 ('Z2Pt', 0.06385261260745806),
 ('Z2Mass', 0.06356653459398377),
 ('D_bkg_ME', 0.059389795597259376),
 ('ZZPt', 0.05778775872180344),
 ('ZZMassErr', 0.056986740284075466),
 ('PFMET', 0.05010656406001917),
 ('Z1Pt', 0.04650198109024331),
 ('JetPt(JetPt|0)', 0.03824863040151049),
 ('costhetastar', 0.03694697544020254),
 ('helphi', 0.034372273318934075),
 ('helcosthetaZ2', 0.03365707828524839),
 ('ZZEta', 0.032727324741456995),
 ('JetEta(JetPt|0)', 0.032627197436741),
 ('D_VBF1j_ggH_ME', 0.028736536453490866),
 ('Z2Flav', 0.02820729212856346),
 ('xistar', 0.026605255253107522),
 ('ZZPhi', 0.026147530431548684),
 ('phistarZ1', 0.025346511993820715),
 ('helcosthetaZ1', 0.02448827795339789),
 ('D_WHh_ZHh_ME', 0.020969518387664317),
 ('ZZMass_masked', 0.017894179742815867),
 ('JetPhi(JetPt|0)', 0.016835691092961052),
 ('D_VBF2j_ggH_ME', 0.01571998684041138),
 ('JetEta(JetPt|1)', 0.010427543591137303),
 ('D_ZHh_ggH_ME', 0.009740956358799045),
 ('Z1Flav', 0.00

In [34]:
%%capture
fig, implist = plot_variables([("ZHl", "bck")])

In [35]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [36]:
append_variables(confhandler, impdict, threshold_fscore)

In [37]:
implist

[('Z2Mass', 0.07316437053174189),
 ('Z2Pt', 0.06285477585836581),
 ('ZZPt', 0.06041545211868307),
 ('Z1Mass', 0.059602344205455494),
 ('PFMET', 0.05905004449081035),
 ('D_bkg_ME', 0.05300543094719401),
 ('ZZMassErr', 0.05187014820042343),
 ('ZZEta', 0.04415329385413151),
 ('helcosthetaZ2', 0.041315086987205055),
 ('JetPt(JetPt|0)', 0.04002638765303305),
 ('costhetastar', 0.03705010585744531),
 ('Z1Pt', 0.035546623300911295),
 ('JetEta(JetPt|0)', 0.029164493265011814),
 ('helcosthetaZ1', 0.028044552176981374),
 ('ZZPhi', 0.02772237734343837),
 ('D_VBF1j_ggH_ME', 0.026801877819029794),
 ('ZZMass_masked', 0.026541069620447364),
 ('phistarZ1', 0.025313736921235924),
 ('helphi', 0.02399435426958363),
 ('xistar', 0.02370286275352091),
 ('Z2Flav', 0.021785155411003038),
 ('JetPhi(JetPt|0)', 0.01799576570218772),
 ('D_WHh_ggH_ME', 0.013638734619987113),
 ('ExtraLepPt(ExtraLepPt|0)', 0.013592709643766684),
 ('JetPhi(JetPt|1)', 0.012534135190696818),
 ('D_VBF2j_ggH_ME', 0.011276119174005093),
 (

In [38]:
%%capture
fig, implist = plot_variables([("ZHh", "bck")])

In [39]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [40]:
append_variables(confhandler, impdict, threshold_fscore)

In [41]:
implist

[('Z1Mass', 0.09117786697742673),
 ('Z2Pt', 0.088933488713367),
 ('ZZMassErr', 0.06366265268246364),
 ('D_bkg_ME', 0.05563468427640381),
 ('PFMET', 0.050800638784582845),
 ('Z2Mass', 0.04954896629116492),
 ('helcosthetaZ2', 0.048923130044455955),
 ('Z2Flav', 0.045642884889291725),
 ('ZZPt', 0.045642884889291725),
 ('Z1Pt', 0.039794553066597606),
 ('JetPt(JetPt|0)', 0.0352842159782468),
 ('ZZMass_masked', 0.034679960291769174),
 ('ZZEta', 0.028270533903060124),
 ('D_WHh_ZHh_ME', 0.0254866416332168),
 ('helcosthetaZ1', 0.025098191549052615),
 ('costhetastar', 0.02483922482627649),
 ('D_ZHh_ggH_ME', 0.024429194181880963),
 ('JetEta(JetPt|0)', 0.022098493676895852),
 ('D_WHh_ggH_ME', 0.0214294963097242),
 ('D_VBF1j_ggH_ME', 0.021407915749492856),
 ('helphi', 0.020997885105097328),
 ('phistarZ1', 0.020782079502783893),
 ('xistar', 0.019487245888903278),
 ('ZZPhi', 0.019379343087746557),
 ('JetPhi(JetPt|0)', 0.012171435970477793),
 ('JetPhi(JetPt|1)', 0.0074237127195822),
 ('D_VBF2j_ggH_ME',

In [42]:
%%capture
fig, implist = plot_variables([("WHl", "bck")])

In [43]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [44]:
append_variables(confhandler, impdict, threshold_fscore)

In [45]:
implist

[('Z2Pt', 0.07856231154849332),
 ('Z1Mass', 0.07210640226419282),
 ('Z2Mass', 0.07023807321630071),
 ('PFMET', 0.06507704545034129),
 ('ZZMassErr', 0.06454059453559999),
 ('D_bkg_ME', 0.05913908877337724),
 ('ZZPt', 0.053275125326032666),
 ('helcosthetaZ2', 0.052202223496550064),
 ('Z2Flav', 0.04167668658317764),
 ('ZZMass_masked', 0.04052979152407555),
 ('Z1Pt', 0.03538726206552101),
 ('costhetastar', 0.03470282469154072),
 ('ZZEta', 0.034184872084204294),
 ('ExtraLepPt(ExtraLepPt|0)', 0.02946780369596182),
 ('helcosthetaZ1', 0.02909783754786437),
 ('helphi', 0.02608261344087016),
 ('JetPt(JetPt|0)', 0.025916128674226307),
 ('phistarZ1', 0.02044062968238406),
 ('xistar', 0.02029264322314508),
 ('ZZPhi', 0.019182744778852734),
 ('JetEta(JetPt|0)', 0.017110934349507022),
 ('JetPhi(JetPt|0)', 0.014003218705488448),
 ('D_WHh_ZHh_ME', 0.0110804861355186),
 ('D_VBF1j_ggH_ME', 0.010044580920845743),
 ('D_VBF2j_WHh_ME', 0.00995208938382138),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0092676520098411),

In [46]:
%%capture
fig, implist = plot_variables([("WHh", "bck")])

In [47]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [48]:
append_variables(confhandler, impdict, threshold_fscore)

In [49]:
implist

[('Z1Mass', 0.07594578668238067),
 ('Z2Pt', 0.06787271655863288),
 ('Z2Mass', 0.061661756040070716),
 ('ZZMassErr', 0.06064820271066588),
 ('D_bkg_ME', 0.059964643488509134),
 ('ZZPt', 0.055120801414260456),
 ('PFMET', 0.046434885091337655),
 ('Z1Pt', 0.04089569829110194),
 ('JetPt(JetPt|0)', 0.03849145550972304),
 ('ZZEta', 0.03705362404242781),
 ('costhetastar', 0.0326694166175604),
 ('helcosthetaZ2', 0.031997642899233944),
 ('phistarZ1', 0.030430170889805538),
 ('JetEta(JetPt|0)', 0.030088391278727165),
 ('Z2Flav', 0.029723040659988216),
 ('xistar', 0.029263406010606955),
 ('helcosthetaZ1', 0.028461991750147318),
 ('helphi', 0.025857395403653505),
 ('ZZPhi', 0.023806717737183264),
 ('D_WHh_ZHh_ME', 0.02347672362993518),
 ('D_VBF1j_ggH_ME', 0.02121390689451974),
 ('ZZMass_masked', 0.018467884502062465),
 ('JetPhi(JetPt|0)', 0.01730111962286388),
 ('JetPt(JetPt|1)', 0.013447259870359458),
 ('D_WHh_ggH_ME', 0.011667648791985858),
 ('D_VBF2j_ggH_ME', 0.01033588685916323),
 ('JetPhi(JetP

In [50]:
%%capture
fig, implist = plot_variables([("ZHMET", "bck")])

In [51]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [52]:
append_variables(confhandler, impdict, threshold_fscore)

In [53]:
implist

[('Z1Mass', 0.10670761431217705),
 ('Z2Pt', 0.08248025738520035),
 ('ZZPt', 0.0787754704104514),
 ('PFMET', 0.07765428487861947),
 ('ZZMassErr', 0.07331578434240031),
 ('D_bkg_ME', 0.06639368236326411),
 ('Z2Flav', 0.06190894023593643),
 ('Z2Mass', 0.057570439699717264),
 ('Z1Pt', 0.044262454908842744),
 ('JetPt(JetPt|0)', 0.03183191966461928),
 ('costhetastar', 0.028127132689870334),
 ('ZZMass_masked', 0.0268597055669299),
 ('helcosthetaZ2', 0.024129862532904358),
 ('D_WHh_ZHh_ME', 0.023398654577361802),
 ('xistar', 0.02330116018328946),
 ('ZZPhi', 0.02178999707516818),
 ('helphi', 0.020327581164083064),
 ('helcosthetaZ1', 0.020083845178902212),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.01969386760261285),
 ('phistarZ1', 0.019060154041142634),
 ('ZZEta', 0.016671541386370285),
 ('D_VBF2j_WHh_ME', 0.010090669786487276),
 ('D_ZHh_ggH_ME', 0.00931071463390855),
 ('JetPhi(JetPt|0)', 0.00926196743687238),
 ('Z1Flav', 0.008628253875402165),
 ('D_VBF1j_ggH_ME', 0.00809203470800429),
 ('JetEta(Je

In [54]:
%%capture
fig, implist = plot_variables([("ttHh", "bck")])

In [55]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [56]:
append_variables(confhandler, impdict, threshold_fscore)

In [57]:
implist

[('Z2Pt', 0.10117451655000594),
 ('PFMET', 0.06624747894174872),
 ('ZZMassErr', 0.061312136671016725),
 ('Z2Flav', 0.05884446553565073),
 ('JetPt(JetPt|0)', 0.04669593071538735),
 ('ZZMass_masked', 0.04318424486890497),
 ('Z1Mass', 0.04287578597698422),
 ('Z2Mass', 0.0422351405860719),
 ('D_WHh_ZHh_ME', 0.04133349151738047),
 ('D_bkg_ME', 0.03575750385573615),
 ('JetPt(JetPt|1)', 0.03412029896784909),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.033977933325424134),
 ('JetPhi(JetPt|0)', 0.025673270850634713),
 ('D_WHh_ggH_ME', 0.02470043896073081),
 ('ZZPt', 0.024415707675880888),
 ('JetPt(JetPt|2)', 0.023490331000118637),
 ('JetEta(JetPt|0)', 0.021449756792027524),
 ('ZZEta', 0.020832839008186024),
 ('JetPt(JetPt|3)', 0.020666745758690236),
 ('Z1Pt', 0.017534701625341084),
 ('costhetastar', 0.015589037845533277),
 ('JetPhi(JetPt|1)', 0.015090758097045912),
 ('phistarZ1', 0.014711116383912683),
 ('helphi', 0.014450112706133587),
 ('helcosthetaZ2', 0.01411792620714201),
 ('D_ZHh_ggH_ME', 0.013

In [58]:
%%capture
fig, implist = plot_variables([("ttHl", "bck")])

In [59]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [60]:
append_variables(confhandler, impdict, threshold_fscore)

In [61]:
implist

[('Z2Pt', 0.08650875386199794),
 ('PFMET', 0.07056097413218634),
 ('ZZMassErr', 0.0699248803537893),
 ('Z2Mass', 0.0621705942933301),
 ('Z1Mass', 0.04917610710607621),
 ('JetPt(JetPt|0)', 0.04052826073786878),
 ('Z2Flav', 0.04024050402859393),
 ('D_bkg_ME', 0.03942266917065487),
 ('ZZPt', 0.035409220330768765),
 ('Z1Pt', 0.0296086508753862),
 ('helcosthetaZ2', 0.02756406373053856),
 ('D_VBF2j_ggH_ME', 0.024504755558248017),
 ('D_WHh_ZHh_ME', 0.022899376022293572),
 ('ExtraLepPt(ExtraLepPt|0)', 0.022460168413400376),
 ('phistarZ1', 0.021702913915308656),
 ('costhetastar', 0.021112255406797117),
 ('ZZEta', 0.020521596898285577),
 ('helcosthetaZ1', 0.020415581268552734),
 ('xistar', 0.0204004361785909),
 ('JetEta(JetPt|0)', 0.020385291088629066),
 ('ZZMass_masked', 0.019113103531834978),
 ('JetPt(JetPt|1)', 0.01832555885381959),
 ('D_VBF1j_ggH_ME', 0.018310413763857756),
 ('JetPhi(JetPt|0)', 0.018128672684315745),
 ('ZZPhi', 0.017719755255346217),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.017

In [None]:
%%capture
fig, implist = plot_variables([("VBF", "ggH")])

In [None]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [None]:
append_variables(confhandler, impdict, threshold_fscore)

In [None]:
implist

In [30]:
%%capture
fig, implist = plot_variables([("WHh", "ggH")])

In [31]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [32]:
append_variables(confhandler, impdict, threshold_fscore)

In [33]:
implist

[('ZZEta', 0.0867813693705261),
 ('ZZPt', 0.08398870092767952),
 ('JetEta(JetPt|0)', 0.08140468012711456),
 ('ZZMassErr', 0.061567104291721506),
 ('JetPt(JetPt|0)', 0.0602991686193946),
 ('Z1Pt', 0.05580521940102077),
 ('PFMET', 0.05530767502327224),
 ('Z2Mass', 0.04770006098931082),
 ('Z1Mass', 0.04755561262157738),
 ('Z2Pt', 0.04468269508554553),
 ('ZZPhi', 0.04426539980098225),
 ('D_WHh_ggH_ME', 0.028697075723044328),
 ('JetPhi(JetPt|0)', 0.028584726992584984),
 ('JetPt(JetPt|1)', 0.028584726992584984),
 ('D_VBF1j_ggH_ME', 0.028071132796199404),
 ('D_VBF2j_ggH_ME', 0.025599460726093794),
 ('JetEta(JetPt|2)', 0.024620421789233782),
 ('D_WHh_ZHh_ME', 0.020784515134978976),
 ('JetEta(JetPt|1)', 0.02067216640451963),
 ('D_ZHh_ggH_ME', 0.01635476519115334),
 ('JetPt(JetPt|2)', 0.01627451609796809),
 ('JetPhi(JetPt|1)', 0.014139890219240522),
 ('D_VBF2j_ZHh_ME', 0.013353449106025102),
 ('JetPhi(JetPt|2)', 0.011973164703238854),
 ('D_VBF2j_WHh_ME', 0.011315122139119827),
 ('JetEta(JetPt|3)

In [34]:
%%capture
fig, implist = plot_variables([("ZHh", "ggH")])

In [35]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [36]:
append_variables(confhandler, impdict, threshold_fscore)

In [37]:
implist

[('ZZPt', 0.08227995166837067),
 ('ZZEta', 0.0685704991170183),
 ('ZZPhi', 0.06494562691699972),
 ('JetEta(JetPt|0)', 0.06471326331443443),
 ('PFMET', 0.0624825727298076),
 ('JetPt(JetPt|0)', 0.0591597732131239),
 ('Z1Pt', 0.05744028255414072),
 ('Z2Mass', 0.05507017380797472),
 ('Z1Mass', 0.05021377451436007),
 ('ZZMassErr', 0.0435217027604796),
 ('Z2Pt', 0.04331257551817083),
 ('D_ZHh_ggH_ME', 0.03276326796170648),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03218235895529324),
 ('D_VBF1j_ggH_ME', 0.03132261362580165),
 ('JetPt(JetPt|1)', 0.028557486755274655),
 ('JetEta(JetPt|1)', 0.027604795984756948),
 ('D_VBF2j_ggH_ME', 0.022608978529603124),
 ('JetPhi(JetPt|0)', 0.020587415187285064),
 ('JetEta(JetPt|2)', 0.018844688168045357),
 ('JetPt(JetPt|2)', 0.016985779347523003),
 ('D_WHh_ZHh_ME', 0.015010688725718004),
 ('D_WHh_ggH_ME', 0.01357003438981318),
 ('JetPhi(JetPt|2)', 0.013500325309043592),
 ('D_VBF2j_WHh_ME', 0.011687889209034297),
 ('JetPhi(JetPt|1)', 0.01031694395389906),
 ('Extr

In [38]:
%%capture
fig, implist = plot_variables([("WHh", "ZHh")])

In [39]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ZHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [40]:
append_variables(confhandler, impdict, threshold_fscore)

In [41]:
implist

[('D_WHh_ZHh_ME', 0.05863246905975248),
 ('PFMET', 0.05790446323570589),
 ('ZZMassErr', 0.0564484515876127),
 ('Z2Mass', 0.05129641037128297),
 ('Z1Mass', 0.04720837766702134),
 ('JetPhi(JetPt|1)', 0.046984375875007),
 ('JetEta(JetPt|0)', 0.04692837542700341),
 ('JetPt(JetPt|0)', 0.04659237273898191),
 ('JetPhi(JetPt|0)', 0.04356834854678837),
 ('Z1Pt', 0.042336338690709525),
 ('JetEta(JetPt|1)', 0.041944335554684437),
 ('D_WHh_ggH_ME', 0.03903231225849807),
 ('ZZEta', 0.038752310018480146),
 ('Z2Pt', 0.036344290754326034),
 ('JetPt(JetPt|1)', 0.03544828358626869),
 ('ZZPt', 0.03354426835414683),
 ('ZZPhi', 0.032368258946071565),
 ('D_ZHh_ggH_ME', 0.03141625133001064),
 ('JetPt(JetPt|2)', 0.026432211457691663),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.026096208769670157),
 ('JetPhi(JetPt|2)', 0.024864198913591307),
 ('D_VBF2j_ggH_ME', 0.0213921711373691),
 ('D_VBF2j_WHh_ME', 0.020664165313322505),
 ('JetEta(JetPt|2)', 0.019824158593268745),
 ('D_VBF2j_ZHh_ME', 0.01624012992103937),
 ('Jet

In [42]:
%%capture
fig, implist = plot_variables([("VBF", "WHh")])

In [43]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [44]:
append_variables(confhandler, impdict, threshold_fscore)

In [45]:
implist

[('JetEta(JetPt|0)', 0.11613351877607789),
 ('ZZEta', 0.0882012053778396),
 ('JetEta(JetPt|2)', 0.06957000463606862),
 ('JetPt(JetPt|0)', 0.05919680111265647),
 ('D_VBF2j_ggH_ME', 0.05542999536393139),
 ('ZZPt', 0.039087853500231806),
 ('JetEta(JetPt|1)', 0.03888502549837738),
 ('Z1Mass', 0.03453871117292536),
 ('Z1Pt', 0.034191006026889195),
 ('PFMET', 0.033872276309689385),
 ('D_ZHh_ggH_ME', 0.03294506258692629),
 ('Z2Pt', 0.03268428372739916),
 ('JetPt(JetPt|2)', 0.03259735744089012),
 ('JetPhi(JetPt|0)', 0.03167014371812703),
 ('ZZMassErr', 0.030627028280018544),
 ('D_WHh_ggH_ME', 0.02961288827074641),
 ('D_VBF1j_ggH_ME', 0.029381084840055634),
 ('Z2Mass', 0.028975428836346778),
 ('ZZPhi', 0.028019239684747333),
 ('D_WHh_ZHh_ME', 0.023991655076495134),
 ('D_VBF2j_ZHh_ME', 0.02005099675475197),
 ('JetPt(JetPt|1)', 0.019906119610570237),
 ('JetPhi(JetPt|1)', 0.016110338433008808),
 ('D_VBF2j_WHh_ME', 0.014516689847009736),
 ('JetPt(JetPt|3)', 0.010952712100139083),
 ('JetPhi(JetPt|2)

In [46]:
%%capture
fig, implist = plot_variables([("VBF", "ZHh")])

In [47]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ZHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [48]:
append_variables(confhandler, impdict, threshold_fscore)

In [49]:
implist

[('JetEta(JetPt|0)', 0.09797265440829797),
 ('ZZEta', 0.08503064592173504),
 ('JetEta(JetPt|2)', 0.06115040075436115),
 ('D_VBF2j_ggH_ME', 0.057543611504007545),
 ('PFMET', 0.050424328147100426),
 ('JetPt(JetPt|0)', 0.043446487505893444),
 ('JetEta(JetPt|1)', 0.040594059405940595),
 ('Z2Mass', 0.04012258368694012),
 ('D_ZHh_ggH_ME', 0.03802451673738803),
 ('JetPhi(JetPt|0)', 0.0338991041961339),
 ('ZZPt', 0.03378123526638378),
 ('D_WHh_ZHh_ME', 0.032013201320132016),
 ('Z1Mass', 0.03154172560113154),
 ('ZZMassErr', 0.03026874115983027),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.030174446016030174),
 ('JetPt(JetPt|2)', 0.029655822725129656),
 ('JetPt(JetPt|1)', 0.027274870344177273),
 ('Z1Pt', 0.02633191890617633),
 ('Z2Pt', 0.025907590759075907),
 ('D_VBF2j_ZHh_ME', 0.02461103253182461),
 ('ZZPhi', 0.023196605374823195),
 ('D_WHh_ggH_ME', 0.02105139085337105),
 ('D_VBF2j_WHh_ME', 0.02050919377652051),
 ('JetPhi(JetPt|2)', 0.02010843941537011),
 ('JetPhi(JetPt|1)', 0.019000471475719),
 ('D_

In [50]:
%%capture
fig, implist = plot_variables([("WHl", "ggH")])

In [51]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [52]:
append_variables(confhandler, impdict, threshold_fscore)

In [53]:
implist

[('PFMET', 0.12065034797216223),
 ('Z1Mass', 0.08549316054715622),
 ('ZZPt', 0.08247340212782978),
 ('Z1Pt', 0.062375009999200065),
 ('Z2Pt', 0.061555075593952485),
 ('Z2Mass', 0.06121510279177666),
 ('ZZMassErr', 0.060455163586913045),
 ('ZZEta', 0.06003519718422526),
 ('ZZPhi', 0.05789536837053036),
 ('JetPt(JetPt|0)', 0.05411567074634029),
 ('D_VBF1j_ggH_ME', 0.05239580833533317),
 ('JetEta(JetPt|0)', 0.036137109031277495),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03505719542436605),
 ('JetPhi(JetPt|0)', 0.03283737301015919),
 ('ZZMass_masked', 0.026597872170226383),
 ('ExtraLepEta(ExtraLepPt|0)', 0.011019118470522359),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.010219182465402767),
 ('JetPhi(JetPt|1)', 0.009439244860411167),
 ('D_WHh_ZHh_ME', 0.00911927045836333),
 ('JetEta(JetPt|1)', 0.008459323254139669),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.007719382449404047),
 ('D_VBF2j_ggH_ME', 0.007599392048636109),
 ('JetPt(JetPt|1)', 0.00753939684825214),
 ('D_VBF2j_WHh_ME', 0.0042596592272618194),
 ('D_WHh

In [54]:
%%capture
fig, implist = plot_variables([("WHl", "VBF")])

In [55]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [56]:
append_variables(confhandler, impdict, threshold_fscore)

In [57]:
implist

[('PFMET', 0.10777479892761394),
 ('ZZEta', 0.0697646708370569),
 ('JetEta(JetPt|0)', 0.06082812034554662),
 ('JetPt(JetPt|0)', 0.05637970410088373),
 ('Z1Mass', 0.05298381491410982),
 ('D_VBF1j_ggH_ME', 0.05296395591301758),
 ('ZZPt', 0.05070002978850164),
 ('Z1Pt', 0.04815807764869427),
 ('Z2Pt', 0.04291530136034157),
 ('ZZMassErr', 0.04204150531228279),
 ('Z2Mass', 0.04088968324893258),
 ('ExtraLepEta(ExtraLepPt|0)', 0.04057193923145666),
 ('ZZPhi', 0.037156191043590504),
 ('ExtraLepPt(ExtraLepPt|0)', 0.036759011021745606),
 ('JetPhi(JetPt|0)', 0.0343759308906762),
 ('D_VBF2j_ggH_ME', 0.03189355575414557),
 ('ZZMass_masked', 0.0279416145367888),
 ('D_WHh_ZHh_ME', 0.02182504220037732),
 ('JetEta(JetPt|1)', 0.018508589017972397),
 ('JetPt(JetPt|1)', 0.01805183199285076),
 ('JetEta(JetPt|2)', 0.017058881938238506),
 ('D_VBF2j_ZHh_ME', 0.016800714924039322),
 ('JetPhi(JetPt|1)', 0.01181610564988581),
 ('D_ZHh_ggH_ME', 0.010227385562506205),
 ('D_VBF2j_WHh_ME', 0.008658524476218846),
 ('

In [58]:
%%capture
fig, implist = plot_variables([("WHl", "WHh")])

In [59]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [60]:
append_variables(confhandler, impdict, threshold_fscore)

In [61]:
implist

[('PFMET', 0.14229144667370644),
 ('JetPt(JetPt|0)', 0.0683078141499472),
 ('ZZPt', 0.06127903907074973),
 ('ZZMassErr', 0.05811114044350581),
 ('JetEta(JetPt|0)', 0.05154435058078141),
 ('Z1Mass', 0.05101636747624076),
 ('ExtraLepPt(ExtraLepPt|0)', 0.05081837381203801),
 ('ZZMass_masked', 0.047683474128827875),
 ('Z2Mass', 0.04454857444561774),
 ('Z1Pt', 0.03920274551214361),
 ('Z2Pt', 0.037519799366420276),
 ('ZZEta', 0.031019007391763463),
 ('ZZPhi', 0.0308870116156283),
 ('D_VBF1j_ggH_ME', 0.027917106652587117),
 ('JetPhi(JetPt|0)', 0.027092133051742344),
 ('D_WHh_ggH_ME', 0.0261681626187962),
 ('D_ZHh_ggH_ME', 0.02286826821541711),
 ('ExtraLepEta(ExtraLepPt|0)', 0.018875395987328406),
 ('D_VBF2j_WHh_ME', 0.017456441393875397),
 ('JetPhi(JetPt|1)', 0.017093453009503694),
 ('JetPt(JetPt|1)', 0.01616948257655755),
 ('JetEta(JetPt|1)', 0.015542502639915523),
 ('JetPt(JetPt|2)', 0.014849524815205913),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.012803590285110876),
 ('D_WHh_ZHh_ME', 0.01197861668

In [62]:
%%capture
fig, implist = plot_variables([("WHl", "ZHh")])

In [63]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [64]:
append_variables(confhandler, impdict, threshold_fscore)

In [65]:
implist

[('PFMET', 0.1491066252042376),
 ('Z2Pt', 0.05544721446265746),
 ('D_VBF1j_ggH_ME', 0.05534180150740526),
 ('ExtraLepPt(ExtraLepPt|0)', 0.05354978126811785),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04891161123702103),
 ('JetPt(JetPt|0)', 0.04801560111737733),
 ('ZZMass_masked', 0.04696147156485532),
 ('Z1Mass', 0.04480050598218521),
 ('JetEta(JetPt|0)', 0.0429557792652717),
 ('JetPhi(JetPt|0)', 0.0425341274442629),
 ('ZZMassErr', 0.04079481368260159),
 ('ZZPt', 0.040583987772097194),
 ('ZZEta', 0.04047857481684499),
 ('Z1Pt', 0.040056922995836186),
 ('ZZPhi', 0.03826490275654878),
 ('D_ZHh_ggH_ME', 0.030411637590259843),
 ('Z2Mass', 0.03009539872450324),
 ('D_WHh_ggH_ME', 0.01944869024403099),
 ('D_VBF2j_WHh_ME', 0.015495704422073472),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.013545564749907764),
 ('ExtraLepEta(ExtraLepPt|0)', 0.01296579349602066),
 ('JetPt(JetPt|1)', 0.012333315764507458),
 ('nExtraLep', 0.012175196331629157),
 ('D_WHh_ZHh_ME', 0.009856111316080746),
 ('JetPhi(JetPt|1)', 0.0080

In [66]:
%%capture
fig, implist = plot_variables([("WHl", "ZHl")])

In [67]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [68]:
append_variables(confhandler, impdict, threshold_fscore)

In [69]:
implist

[('PFMET', 0.10971298789310768),
 ('Z1Mass', 0.09722145905452681),
 ('Z2Mass', 0.08606700001831602),
 ('ZZPt', 0.07584665824129531),
 ('Z2Pt', 0.07110280784658497),
 ('Z1Pt', 0.06886825283440481),
 ('ZZEta', 0.053812480539223766),
 ('ZZMassErr', 0.04525889700899317),
 ('JetPt(JetPt|0)', 0.043372346465923035),
 ('ZZMass_masked', 0.042035276663552945),
 ('ZZPhi', 0.03509350330604245),
 ('ExtraLepPt(ExtraLepPt|0)', 0.026283495430151838),
 ('JetEta(JetPt|0)', 0.0256973826400718),
 ('JetPhi(JetPt|0)', 0.02373756799824166),
 ('ExtraLepEta(ExtraLepPt|0)', 0.022638606516841586),
 ('D_VBF1j_ggH_ME', 0.019561514368921368),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.01928677399857135),
 ('nExtraLep', 0.013810282616260966),
 ('D_ZHh_ggH_ME', 0.012729637159550892),
 ('JetPhi(JetPt|1)', 0.011539095554700807),
 ('JetEta(JetPt|1)', 0.0100005494807407),
 ('D_WHh_ZHh_ME', 0.008810007875890617),
 ('D_VBF2j_ggH_ME', 0.008443687382090592),
 ('JetPt(JetPt|1)', 0.007912522666080554),
 ('nCleanedJetsPt30BTagged_bTagSF'

In [70]:
%%capture
fig, implist = plot_variables([("WHl", "ZHMET")])

In [71]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [72]:
append_variables(confhandler, impdict, threshold_fscore)

In [73]:
implist

[('PFMET', 0.10432389937106919),
 ('Z1Mass', 0.09347484276729559),
 ('ZZPt', 0.08852201257861635),
 ('ZZPhi', 0.07963836477987421),
 ('Z1Pt', 0.05911949685534591),
 ('Z2Pt', 0.05731132075471698),
 ('ZZEta', 0.05628930817610063),
 ('Z2Mass', 0.05566037735849057),
 ('ExtraLepPt(ExtraLepPt|0)', 0.05479559748427673),
 ('ExtraLepEta(ExtraLepPt|0)', 0.04504716981132075),
 ('JetPt(JetPt|0)', 0.04355345911949685),
 ('ZZMassErr', 0.039072327044025155),
 ('JetPhi(JetPt|0)', 0.030660377358490566),
 ('JetEta(JetPt|0)', 0.029716981132075472),
 ('ZZMass_masked', 0.0264937106918239),
 ('D_VBF1j_ggH_ME', 0.021776729559748426),
 ('nExtraLep', 0.014229559748427673),
 ('JetPhi(JetPt|1)', 0.013522012578616353),
 ('ExtraLepPhi(ExtraLepPt|1)', 0.013364779874213837),
 ('D_VBF2j_WHh_ME', 0.009669811320754717),
 ('JetPt(JetPt|1)', 0.009591194968553459),
 ('JetEta(JetPt|1)', 0.008254716981132075),
 ('D_VBF2j_ggH_ME', 0.006603773584905661),
 ('D_ZHh_ggH_ME', 0.006289308176100629),
 ('ExtraLepPhi(ExtraLepPt|0)', 

In [74]:
%%capture
fig, implist = plot_variables([("WHl", "ttHh")])

In [75]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [76]:
append_variables(confhandler, impdict, threshold_fscore)

In [77]:
implist

[('PFMET', 0.12430020625499853),
 ('JetPt(JetPt|0)', 0.07261017805278444),
 ('ExtraLepPt(ExtraLepPt|0)', 0.06802205665698531),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.062129056699078165),
 ('Z1Mass', 0.043987035400092606),
 ('ZZMassErr', 0.04285052826535337),
 ('ZZEta', 0.035652649745338216),
 ('Z2Mass', 0.03434777118323021),
 ('JetPt(JetPt|1)', 0.03224312834112051),
 ('ZZMass_masked', 0.029254535505324746),
 ('Z2Pt', 0.028412678368480868),
 ('JetEta(JetPt|1)', 0.02811802837058551),
 ('D_WHh_ggH_ME', 0.027907564086374542),
 ('ZZPhi', 0.02757082123163699),
 ('JetPt(JetPt|2)', 0.027276171233741636),
 ('JetEta(JetPt|0)', 0.0261396640990024),
 ('JetPhi(JetPt|0)', 0.02601338552847582),
 ('JetPt(JetPt|3)', 0.024455949825314644),
 ('ZZPt', 0.02289851412215347),
 ('D_WHh_ZHh_ME', 0.022098749842151787),
 ('JetEta(JetPt|2)', 0.019615271288462348),
 ('Z1Pt', 0.01932062129056699),
 ('D_VBF1j_ggH_ME', 0.01919434272004041),
 ('D_ZHh_ggH_ME', 0.017510628446352653),
 ('D_VBF2j_ggH_ME', 0.016500399882140

In [78]:
%%capture
fig, implist = plot_variables([("WHl", "ttHl")])

In [79]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [80]:
append_variables(confhandler, impdict, threshold_fscore)

In [81]:
implist

[('ZZMass_masked', 0.09034457567271399),
 ('JetPt(JetPt|0)', 0.06995007914282235),
 ('PFMET', 0.06733227809570194),
 ('ZZEta', 0.0647753561427006),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.06069645683672227),
 ('ZZPt', 0.057774260319006454),
 ('Z2Pt', 0.04803360525995373),
 ('Z1Mass', 0.0392061366126872),
 ('JetPt(JetPt|1)', 0.0371362474126385),
 ('Z2Mass', 0.031474491659564105),
 ('JetPhi(JetPt|0)', 0.030500426153658833),
 ('JetEta(JetPt|0)', 0.02861317423596737),
 ('ExtraLepEta(ExtraLepPt|0)', 0.02843053695361013),
 ('ExtraLepPt(ExtraLepPt|0)', 0.025630098624132472),
 ('ExtraLepPt(ExtraLepPt|1)', 0.025630098624132472),
 ('D_WHh_ZHh_ME', 0.024169000365274564),
 ('D_VBF2j_ggH_ME', 0.02386460489467917),
 ('Z1Pt', 0.023803725800560088),
 ('ZZMassErr', 0.023012297577012054),
 ('JetEta(JetPt|1)', 0.020942408376963352),
 ('JetPt(JetPt|2)', 0.016559113600389627),
 ('JetPhi(JetPt|1)', 0.016559113600389627),
 ('D_ZHh_ggH_ME', 0.015463289906246194),
 ('D_VBF1j_ggH_ME', 0.015402410812127116),
 ('Je

In [82]:
%%capture
fig, implist = plot_variables([("ZHh", "ZHl")])

In [83]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ZHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [84]:
append_variables(confhandler, impdict, threshold_fscore)

In [85]:
implist

[('Z1Mass', 0.12084453160940413),
 ('Z2Pt', 0.08869034648071475),
 ('Z2Mass', 0.07617248977022348),
 ('ZZPt', 0.07580930243819763),
 ('Z1Pt', 0.07563981501658555),
 ('PFMET', 0.07101522965545629),
 ('ZZMassErr', 0.05672986126243917),
 ('JetPt(JetPt|0)', 0.04622164112249098),
 ('ZZMass_masked', 0.040095881455654826),
 ('JetPhi(JetPt|0)', 0.03864313212755139),
 ('ZZPhi', 0.037868332485896224),
 ('ZZEta', 0.03692404542262899),
 ('JetEta(JetPt|0)', 0.026512675237887702),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0260042129730515),
 ('D_VBF1j_ggH_ME', 0.021403840100723954),
 ('D_WHh_ZHh_ME', 0.0209438028134912),
 ('D_WHh_ggH_ME', 0.017069804605215372),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.01547178034430159),
 ('D_ZHh_ggH_ME', 0.013050531464129198),
 ('JetPhi(JetPt|1)', 0.010241882763129222),
 ('D_VBF2j_WHh_ME', 0.009854482942301639),
 ('JetEta(JetPt|1)', 0.00813539623737924),
 ('JetPt(JetPt|1)', 0.0070458342413016635),
 ('ExtraLepEta(ExtraLepPt|0)', 0.006997409263698216),
 ('D_VBF2j_ggH_ME', 0.00670

In [86]:
%%capture
fig, implist = plot_variables([("ZHh", "ZHMET")])

In [87]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [88]:
append_variables(confhandler, impdict, threshold_fscore)

In [89]:
implist

[('PFMET', 0.14271876009041007),
 ('ZZPt', 0.0738707709970222),
 ('ZZEta', 0.06185197144189718),
 ('JetPt(JetPt|0)', 0.05898181035410612),
 ('Z1Pt', 0.05837190112295053),
 ('Z1Mass', 0.0554299860079647),
 ('Z2Pt', 0.05385139740967962),
 ('ZZPhi', 0.047537043016539306),
 ('Z2Mass', 0.04585082337746206),
 ('JetEta(JetPt|0)', 0.04491802102392997),
 ('D_VBF1j_ggH_ME', 0.03878305169877659),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03189466508807807),
 ('ZZMassErr', 0.031213001829727693),
 ('D_WHh_ggH_ME', 0.03067484662576687),
 ('D_WHh_ZHh_ME', 0.029203889068273956),
 ('D_ZHh_ggH_ME', 0.027123022279625444),
 ('JetPhi(JetPt|0)', 0.022279625443978043),
 ('D_VBF2j_ggH_ME', 0.02044989775051125),
 ('JetPt(JetPt|1)', 0.01808201485308363),
 ('JetEta(JetPt|1)', 0.017938506798694076),
 ('JetPhi(JetPt|1)', 0.017759121730707136),
 ('JetPt(JetPt|2)', 0.014960714670110859),
 ('D_VBF2j_ZHh_ME', 0.012090553582319807),
 ('D_VBF2j_WHh_ME', 0.009292146521723532),
 ('nCleanedJetsPt30', 0.007175402719477631),
 ('

In [90]:
%%capture
fig, implist = plot_variables([("ZHh", "ttHh")])

In [91]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [92]:
append_variables(confhandler, impdict, threshold_fscore)

In [93]:
implist

[('nCleanedJetsPt30BTagged_bTagSF', 0.09986684420772303),
 ('JetPt(JetPt|0)', 0.06051191004586477),
 ('PFMET', 0.056961088918479064),
 ('nCleanedJetsPt30', 0.056813138038171326),
 ('JetPt(JetPt|2)', 0.04778813433939932),
 ('JetPt(JetPt|1)', 0.046308625536321944),
 ('ZZEta', 0.04053854120432016),
 ('JetEta(JetPt|2)', 0.04009468856339695),
 ('JetPt(JetPt|3)', 0.037875425358780884),
 ('ZZPt', 0.03757952359816541),
 ('D_VBF1j_ggH_ME', 0.03654386743601124),
 ('D_VBF2j_ZHh_ME', 0.03491640775262613),
 ('ZZMass_masked', 0.030477881343393992),
 ('D_WHh_ggH_ME', 0.030329930463086255),
 ('JetEta(JetPt|1)', 0.029442225181239828),
 ('Z1Pt', 0.028998372540316616),
 ('D_WHh_ZHh_ME', 0.0273709128569315),
 ('Z1Mass', 0.0273709128569315),
 ('D_ZHh_ggH_ME', 0.024263944370469005),
 ('ExtraLepPt(ExtraLepPt|0)', 0.02174877940523746),
 ('Z2Pt', 0.01908566355969818),
 ('JetPhi(JetPt|1)', 0.018789761799082705),
 ('JetEta(JetPt|0)', 0.01834590915815949),
 ('JetPhi(JetPt|3)', 0.017310252996005325),
 ('Z2Mass', 0

In [94]:
%%capture
fig, implist = plot_variables([("ZHh", "ttHl")])

In [95]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [96]:
append_variables(confhandler, impdict, threshold_fscore)

In [97]:
implist

[('PFMET', 0.1061828868543009),
 ('Z1Mass', 0.061376737877246526),
 ('Z2Pt', 0.05011868429976263),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04546173844240985),
 ('Z2Mass', 0.04543913190912174),
 ('ZZEta', 0.045416525375833613),
 ('ZZPhi', 0.043449756979767155),
 ('ZZPt', 0.042635921781394824),
 ('D_VBF1j_ggH_ME', 0.0404883011190234),
 ('ZZMass_masked', 0.039290154854753026),
 ('ZZMassErr', 0.03788854979088957),
 ('JetPt(JetPt|0)', 0.03653215779360235),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.031943031536113935),
 ('JetEta(JetPt|0)', 0.02981801740703063),
 ('Z1Pt', 0.029162427941675145),
 ('D_VBF2j_ZHh_ME', 0.027466937945066123),
 ('D_WHh_ZHh_ME', 0.02545495648242342),
 ('D_VBF2j_WHh_ME', 0.022538713688255906),
 ('JetPt(JetPt|2)', 0.020798010625070647),
 ('D_VBF2j_ggH_ME', 0.019848536226969596),
 ('JetPhi(JetPt|1)', 0.01797219396405561),
 ('JetPt(JetPt|3)', 0.015892392901548546),
 ('JetEta(JetPt|1)', 0.015801966768396066),
 ('JetPhi(JetPt|0)', 0.015530688368938624),
 ('D_WHh_ggH_ME', 0.01516898383

In [98]:
%%capture
fig, implist = plot_variables([("ZHl", "ggH")])

In [99]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [100]:
append_variables(confhandler, impdict, threshold_fscore)

In [101]:
implist

[('Z1Mass', 0.10357239456546616),
 ('Z2Mass', 0.08391070053887606),
 ('Z2Pt', 0.08274556311507812),
 ('ZZPt', 0.0793749869962341),
 ('Z1Pt', 0.07916692674198447),
 ('PFMET', 0.0625949274910014),
 ('ZZMassErr', 0.059193142334019934),
 ('ZZEta', 0.05700850966439881),
 ('JetPt(JetPt|0)', 0.04915423506647525),
 ('ZZPhi', 0.04690718432057924),
 ('JetEta(JetPt|0)', 0.04076940682021513),
 ('D_VBF1j_ggH_ME', 0.03238457857395502),
 ('JetPhi(JetPt|0)', 0.029513347065310114),
 ('ExtraLepPt(ExtraLepPt|0)', 0.01913114037825354),
 ('D_WHh_ZHh_ME', 0.01905831928926617),
 ('ZZMass_masked', 0.01749786738239394),
 ('D_VBF2j_ggH_ME', 0.016488775149283232),
 ('JetPhi(JetPt|1)', 0.012192330899028358),
 ('JetPt(JetPt|1)', 0.011422507958304726),
 ('D_ZHh_ggH_ME', 0.011297671805754946),
 ('JetEta(JetPt|1)', 0.010943969373530574),
 ('D_VBF2j_WHh_ME', 0.007979110750473337),
 ('D_WHh_ggH_ME', 0.0071156606953373695),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.007074048644487444),
 ('JetEta(JetPt|2)', 0.006949212491937

In [102]:
%%capture
fig, implist = plot_variables([("ZHl", "VBF")])

In [103]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [104]:
append_variables(confhandler, impdict, threshold_fscore)

In [105]:
implist

[('Z1Mass', 0.08884143713513301),
 ('Z2Pt', 0.07201347803941544),
 ('ZZPt', 0.06911413235121264),
 ('Z1Pt', 0.06895741096266113),
 ('PFMET', 0.06801708263135212),
 ('Z2Mass', 0.0653430239391921),
 ('ZZEta', 0.06310974415233318),
 ('JetEta(JetPt|0)', 0.055694863456490225),
 ('ZZMassErr', 0.052638796379735925),
 ('ZZPhi', 0.04667358852799436),
 ('JetPt(JetPt|0)', 0.04246170121067273),
 ('D_VBF1j_ggH_ME', 0.032921286682600005),
 ('JetPhi(JetPt|0)', 0.03174587626846374),
 ('D_VBF2j_ggH_ME', 0.028366571327821964),
 ('D_WHh_ZHh_ME', 0.0234396426752341),
 ('ExtraLepPt(ExtraLepPt|0)', 0.018404968068017082),
 ('ZZMass_masked', 0.01716099204638953),
 ('JetEta(JetPt|1)', 0.016592877012890334),
 ('D_ZHh_ggH_ME', 0.014163695490342045),
 ('JetEta(JetPt|2)', 0.014144105316773107),
 ('ExtraLepEta(ExtraLepPt|0)', 0.013096031030834933),
 ('JetPhi(JetPt|1)', 0.0126650472123183),
 ('D_VBF2j_WHh_ME', 0.011097833326803275),
 ('JetPt(JetPt|1)', 0.010412177251890451),
 ('JetPhi(JetPt|2)', 0.00961877522234847)

In [106]:
%%capture
fig, implist = plot_variables([("ZHl", "WHh")])

In [107]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [108]:
append_variables(confhandler, impdict, threshold_fscore)

In [109]:
implist

[('Z1Mass', 0.09206548089232697),
 ('Z2Pt', 0.0809794895541089),
 ('Z2Mass', 0.07900471222727644),
 ('ZZPt', 0.07594040258219159),
 ('Z1Pt', 0.07466020210824503),
 ('PFMET', 0.07387029117751205),
 ('ZZMassErr', 0.06283877645520661),
 ('JetPt(JetPt|0)', 0.050200201563478875),
 ('ZZPhi', 0.041538419633372375),
 ('ZZEta', 0.04061231716285784),
 ('JetEta(JetPt|0)', 0.032808541933375096),
 ('JetPhi(JetPt|0)', 0.031092528532127583),
 ('D_WHh_ZHh_ME', 0.026148775638057363),
 ('ZZMass_masked', 0.025454198785171467),
 ('ExtraLepPt(ExtraLepPt|0)', 0.02166807397924441),
 ('JetPhi(JetPt|1)', 0.02046958842916678),
 ('JetEta(JetPt|1)', 0.015907171846484897),
 ('D_VBF1j_ggH_ME', 0.015348786533380546),
 ('D_WHh_ggH_ME', 0.013755345517936425),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.013578296516220412),
 ('JetPt(JetPt|1)', 0.012624955737749572),
 ('JetPhi(JetPt|2)', 0.011603519189387955),
 ('D_ZHh_ggH_ME', 0.011549042573475336),
 ('D_VBF2j_ggH_ME', 0.010622940102960804),
 ('D_VBF2j_WHh_ME', 0.00932912047

In [110]:
%%capture
fig, implist = plot_variables([("ZHl", "ZHMET")])

In [111]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [112]:
append_variables(confhandler, impdict, threshold_fscore)

In [113]:
implist

[('PFMET', 0.11174477938338837),
 ('Z1Mass', 0.10647056301187771),
 ('Z2Pt', 0.09519317353458256),
 ('Z1Pt', 0.09304918313965953),
 ('ZZPt', 0.08074267827280134),
 ('Z2Mass', 0.07049440418506925),
 ('ZZMass_masked', 0.051284250246558895),
 ('JetPt(JetPt|0)', 0.04982633677801124),
 ('ZZMassErr', 0.042665408858968315),
 ('ZZEta', 0.042150851164186785),
 ('ExtraLepPt(ExtraLepPt|0)', 0.039235024227091464),
 ('ZZPhi', 0.03426096651087003),
 ('JetEta(JetPt|0)', 0.019210153938510357),
 ('JetPhi(JetPt|0)', 0.018524077012134987),
 ('D_VBF1j_ggH_ME', 0.016937524119891943),
 ('D_ZHh_ggH_ME', 0.014622014493375069),
 ('nExtraLep', 0.014021697182796621),
 ('D_VBF2j_WHh_ME', 0.013507139488015093),
 ('D_VBF2j_ggH_ME', 0.012349384674756657),
 ('D_WHh_ZHh_ME', 0.010848591398310535),
 ('ExtraLepEta(ExtraLepPt|0)', 0.008961879850778268),
 ('Z2Flav', 0.007932764461215214),
 ('D_WHh_ggH_ME', 0.006517730800566014),
 ('JetPt(JetPt|1)', 0.006346211568972171),
 ('JetPt(JetPt|2)', 0.005745894258393723),
 ('JetPh

In [114]:
%%capture
fig, implist = plot_variables([("ZHl", "ttHh")])

In [115]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [116]:
append_variables(confhandler, impdict, threshold_fscore)

In [117]:
implist

[('JetPt(JetPt|0)', 0.07395215013349231),
 ('Z1Mass', 0.07270794992094144),
 ('Z2Mass', 0.05998081858005651),
 ('ZZMass_masked', 0.058607014178698257),
 ('Z2Pt', 0.05137510044324633),
 ('ZZMassErr', 0.04740921226574043),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04520594105601493),
 ('ZZPt', 0.04367661162808782),
 ('PFMET', 0.04232872806449105),
 ('JetPt(JetPt|1)', 0.03955519842401307),
 ('JetPhi(JetPt|0)', 0.030431063531973352),
 ('D_WHh_ZHh_ME', 0.03030145934316597),
 ('Z1Pt', 0.02884989242852329),
 ('ZZEta', 0.028150029808963424),
 ('ExtraLepPt(ExtraLepPt|0)', 0.028098188133440474),
 ('D_WHh_ggH_ME', 0.027942663106871615),
 ('JetPt(JetPt|3)', 0.02607636278804531),
 ('JetEta(JetPt|1)', 0.02462479587340263),
 ('JetPt(JetPt|2)', 0.02379532906503538),
 ('ZZPhi', 0.023562041525182095),
 ('JetEta(JetPt|0)', 0.022291920474869747),
 ('JetPhi(JetPt|1)', 0.01876668653930895),
 ('JetPhi(JetPt|2)', 0.017133673760335935),
 ('JetPhi(JetPt|3)', 0.017029990409290027),
 ('JetEta(JetPt|2)', 0.01682262370

In [118]:
%%capture
fig, implist = plot_variables([("ZHl", "ttHl")])

In [119]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [120]:
append_variables(confhandler, impdict, threshold_fscore)

In [121]:
implist

[('PFMET', 0.09512157822296988),
 ('Z1Mass', 0.08686343477595963),
 ('Z2Mass', 0.08328235713921599),
 ('Z1Pt', 0.07100983840546465),
 ('JetPt(JetPt|0)', 0.05880103991436),
 ('Z2Pt', 0.056800224295254116),
 ('ZZPt', 0.053270122852627826),
 ('ZZEta', 0.04324055666003976),
 ('ZZMass_masked', 0.0360146811439058),
 ('ZZPhi', 0.03412856196156395),
 ('JetEta(JetPt|0)', 0.029935769995412143),
 ('ZZMassErr', 0.028215323443951675),
 ('JetPhi(JetPt|0)', 0.02564102564102564),
 ('D_VBF1j_ggH_ME', 0.022544221848396797),
 ('D_VBF2j_ggH_ME', 0.020415965743997552),
 ('ExtraLepPt(ExtraLepPt|0)', 0.019676810929296018),
 ('JetPt(JetPt|1)', 0.01947290615282663),
 ('JetEta(JetPt|1)', 0.01867003109547841),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.014948768924912067),
 ('JetPt(JetPt|2)', 0.01477035224550135),
 ('D_WHh_ggH_ME', 0.014196870061681195),
 ('JetPhi(JetPt|1)', 0.013292042616098282),
 ('D_WHh_ZHh_ME', 0.013164602130804915),
 ('nExtraLep', 0.012552887801396748),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.012540143

In [122]:
%%capture
fig, implist = plot_variables([("ZHMET", "ggH")])

In [123]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [124]:
append_variables(confhandler, impdict, threshold_fscore)

In [125]:
implist

[('ZZPt', 0.1499269158488202),
 ('PFMET', 0.14115681770724578),
 ('ZZPhi', 0.08093547713510127),
 ('D_VBF1j_ggH_ME', 0.07128836917936938),
 ('ZZEta', 0.06807266652745876),
 ('Z2Pt', 0.06732094382960953),
 ('JetPt(JetPt|0)', 0.06573397368970557),
 ('Z1Pt', 0.06059720192106912),
 ('Z1Mass', 0.05124243057005638),
 ('Z2Mass', 0.05015660889538526),
 ('ZZMassErr', 0.049488410941741494),
 ('JetPhi(JetPt|0)', 0.025600334098976822),
 ('JetEta(JetPt|0)', 0.022802255168093546),
 ('D_VBF2j_ggH_ME', 0.013614533305491752),
 ('D_WHh_ZHh_ME', 0.010816454374608478),
 ('JetPt(JetPt|1)', 0.010732929630403006),
 ('JetEta(JetPt|1)', 0.009855919816245562),
 ('JetPhi(JetPt|1)', 0.007893088327416998),
 ('nCleanedJetsPt30', 0.007433702234286907),
 ('Z2Flav', 0.00622259344330758),
 ('D_VBF2j_WHh_ME', 0.005930256838588431),
 ('D_WHh_ggH_ME', 0.003967425349759866),
 ('D_VBF2j_ZHh_ME', 0.0032157026519106283),
 ('D_ZHh_ggH_ME', 0.0031321779077051574),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0028398413029860096),
 ('Z

In [126]:
%%capture
fig, implist = plot_variables([("ZHMET", "VBF")])

In [127]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [128]:
append_variables(confhandler, impdict, threshold_fscore)

In [129]:
implist

[('PFMET', 0.14550078247261347),
 ('ZZPt', 0.08493740219092331),
 ('JetPt(JetPt|0)', 0.08294209702660407),
 ('D_VBF1j_ggH_ME', 0.06815336463223787),
 ('Z1Pt', 0.0680359937402191),
 ('ZZEta', 0.05837245696400626),
 ('ZZMassErr', 0.0559076682316119),
 ('JetEta(JetPt|0)', 0.049178403755868544),
 ('Z2Pt', 0.04894366197183098),
 ('Z1Mass', 0.04632237871674491),
 ('ZZPhi', 0.04475743348982786),
 ('Z2Mass', 0.03888888888888889),
 ('D_VBF2j_ggH_ME', 0.03841940532081377),
 ('JetPhi(JetPt|0)', 0.029499217527386542),
 ('JetPhi(JetPt|1)', 0.02038341158059468),
 ('JetPt(JetPt|1)', 0.019131455399061033),
 ('D_VBF2j_ZHh_ME', 0.016940532081377153),
 ('ZZMass_masked', 0.01584507042253521),
 ('JetEta(JetPt|1)', 0.012754303599374022),
 ('D_WHh_ZHh_ME', 0.012245696400625979),
 ('JetPhi(JetPt|2)', 0.011697965571205008),
 ('D_VBF2j_WHh_ME', 0.008998435054773083),
 ('D_WHh_ggH_ME', 0.007942097026604068),
 ('JetPt(JetPt|2)', 0.0034428794992175274),
 ('JetEta(JetPt|2)', 0.0030125195618153364),
 ('Z2Flav', 0.00

In [130]:
%%capture
fig, implist = plot_variables([("ZHMET", "WHh")])

In [131]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [132]:
append_variables(confhandler, impdict, threshold_fscore)

In [133]:
implist

[('PFMET', 0.1384194486771021),
 ('ZZPt', 0.07805790275661449),
 ('JetPt(JetPt|0)', 0.06295885856766865),
 ('Z1Pt', 0.06018839174400886),
 ('ZZEta', 0.05689846239091287),
 ('JetEta(JetPt|0)', 0.05132289790829755),
 ('Z2Pt', 0.050110818672946394),
 ('D_WHh_ZHh_ME', 0.04779055270813132),
 ('ZZPhi', 0.04678625848455465),
 ('D_VBF1j_ggH_ME', 0.04498545504917579),
 ('ZZMassErr', 0.044846931707992796),
 ('Z1Mass', 0.043046128272613934),
 ('Z2Mass', 0.03989472226070093),
 ('D_WHh_ggH_ME', 0.03778224130766034),
 ('JetPhi(JetPt|0)', 0.03303781687214295),
 ('D_ZHh_ggH_ME', 0.02375675301288267),
 ('JetPhi(JetPt|1)', 0.0182158193655631),
 ('JetPt(JetPt|1)', 0.017488571824352404),
 ('JetEta(JetPt|1)', 0.016865216789028952),
 ('D_VBF2j_ggH_ME', 0.015445352541903311),
 ('JetPt(JetPt|2)', 0.013575287435932955),
 ('D_VBF2j_WHh_ME', 0.010977974788751906),
 ('D_VBF2j_ZHh_ME', 0.007618783765064413),
 ('nCleanedJetsPt30', 0.007133952070923951),
 ('JetEta(JetPt|2)', 0.006510597035600499),
 ('JetPt(JetPt|3)'

In [134]:
%%capture
fig, implist = plot_variables([("ZHMET", "ttHh")])

In [135]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [136]:
append_variables(confhandler, impdict, threshold_fscore)

In [137]:
implist

[('PFMET', 0.2085315832649713),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.08498769483182937),
 ('JetPt(JetPt|0)', 0.07957342083675144),
 ('nCleanedJetsPt30', 0.04971287940935193),
 ('JetPt(JetPt|2)', 0.04561115668580804),
 ('JetEta(JetPt|0)', 0.04019688269073011),
 ('JetPt(JetPt|3)', 0.03970467596390484),
 ('JetPt(JetPt|1)', 0.03625922887612797),
 ('Z1Mass', 0.035438884331419195),
 ('ExtraLepPt(ExtraLepPt|0)', 0.031173092698933553),
 ('D_VBF2j_ggH_ME', 0.029532403609515995),
 ('D_WHh_ggH_ME', 0.029532403609515995),
 ('ZZEta', 0.026907301066447908),
 ('ZZMass_masked', 0.02395406070549631),
 ('Z2Pt', 0.02395406070549631),
 ('Z2Mass', 0.022149302707136997),
 ('ZZPhi', 0.02132895816242822),
 ('Z1Pt', 0.021164889253486464),
 ('D_ZHh_ggH_ME', 0.021164889253486464),
 ('ZZPt', 0.019688269073010665),
 ('JetEta(JetPt|2)', 0.01837571780147662),
 ('ZZMassErr', 0.016899097621000822),
 ('JetPhi(JetPt|0)', 0.014602132895816243),
 ('D_WHh_ZHh_ME', 0.013617719442165709),
 ('D_VBF1j_ggH_ME', 0.0113207547169

In [138]:
%%capture
fig, implist = plot_variables([("ZHMET", "ttHl")])

In [139]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [140]:
append_variables(confhandler, impdict, threshold_fscore)

In [141]:
implist

[('Z1Mass', 0.08568030704362398),
 ('PFMET', 0.07692307692307693),
 ('Z2Mass', 0.07546353856965242),
 ('ZZMass_masked', 0.06859830261095194),
 ('ExtraLepPt(ExtraLepPt|0)', 0.05362452024433753),
 ('JetPt(JetPt|0)', 0.04843505054327261),
 ('JetPt(JetPt|3)', 0.04562408778852911),
 ('ZZPt', 0.03886696578193416),
 ('nCleanedJetsPt30', 0.038812908805881396),
 ('Z2Pt', 0.035623547218768585),
 ('nExtraLep', 0.03486674955402995),
 ('ZZEta', 0.033893723985080275),
 ('D_WHh_ZHh_ME', 0.03356938212876372),
 ('D_VBF2j_ggH_ME', 0.03221795772744473),
 ('JetEta(JetPt|0)', 0.03221795772744473),
 ('ZZMassErr', 0.030812476350072976),
 ('ZZPhi', 0.02627169036164117),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.024055354343478025),
 ('Z1Pt', 0.02356884155900319),
 ('JetPt(JetPt|2)', 0.016865776528460996),
 ('D_ZHh_ggH_ME', 0.015676523055300286),
 ('D_VBF2j_WHh_ME', 0.015244067246878209),
 ('D_VBF1j_ggH_ME', 0.014865668414508893),
 ('JetPhi(JetPt|0)', 0.014703497486350614),
 ('D_WHh_ggH_ME', 0.012054705659765392),

In [142]:
%%capture
fig, implist = plot_variables([("ttHh", "ggH")])

In [143]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [144]:
append_variables(confhandler, impdict, threshold_fscore)

In [145]:
implist

[('JetPt(JetPt|0)', 0.07177153704427447),
 ('PFMET', 0.06250348947574116),
 ('JetPt(JetPt|1)', 0.05814862375076769),
 ('JetEta(JetPt|0)', 0.05510580090447211),
 ('ZZPt', 0.05359834738428899),
 ('Z1Pt', 0.04628440623080789),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04458154206911954),
 ('JetEta(JetPt|1)', 0.042599519848138014),
 ('D_VBF2j_ggH_ME', 0.03902629668918542),
 ('JetPt(JetPt|2)', 0.03796549606387136),
 ('Z2Pt', 0.03589972642510189),
 ('D_ZHh_ggH_ME', 0.035788063201384626),
 ('JetPt(JetPt|3)', 0.03268940874323042),
 ('JetEta(JetPt|2)', 0.03146111328234046),
 ('D_WHh_ggH_ME', 0.030149070403662555),
 ('Z2Mass', 0.029814080732510746),
 ('ZZPhi', 0.029004522360560548),
 ('Z1Mass', 0.02802746915303445),
 ('ZZEta', 0.027776226899670593),
 ('JetPhi(JetPt|0)', 0.027497068840377423),
 ('D_VBF1j_ggH_ME', 0.023951761487354138),
 ('JetPhi(JetPt|1)', 0.02157891798336218),
 ('nCleanedJetsPt30', 0.017698620959187093),
 ('ZZMassErr', 0.017475294511752556),
 ('D_WHh_ZHh_ME', 0.014572050695103568),


In [146]:
%%capture
fig, implist = plot_variables([("ttHh", "VBF")])

In [147]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [148]:
append_variables(confhandler, impdict, threshold_fscore)

In [149]:
implist

[('JetEta(JetPt|2)', 0.07874455489779962),
 ('JetEta(JetPt|0)', 0.0629956439182397),
 ('D_VBF2j_ggH_ME', 0.05796939573327376),
 ('D_ZHh_ggH_ME', 0.056433597676756396),
 ('JetPt(JetPt|0)', 0.05590304925723221),
 ('PFMET', 0.05087680107226628),
 ('JetPt(JetPt|1)', 0.043588741204065674),
 ('JetEta(JetPt|1)', 0.04330950519378979),
 ('JetPt(JetPt|2)', 0.042723109572210434),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03521166089578912),
 ('Z1Pt', 0.029263933876912767),
 ('ZZEta', 0.028900927063554117),
 ('D_WHh_ggH_ME', 0.028565843851223054),
 ('ZZPt', 0.026694962582374623),
 ('Z2Mass', 0.02527085892996761),
 ('JetPt(JetPt|3)', 0.024907852116608956),
 ('JetPhi(JetPt|1)', 0.024433150899139954),
 ('JetPhi(JetPt|0)', 0.02370713727242265),
 ('Z1Mass', 0.023651290070367474),
 ('ZZMassErr', 0.023260359655981236),
 ('D_VBF1j_ggH_ME', 0.022003797609739754),
 ('D_VBF2j_ZHh_ME', 0.020831006366581035),
 ('ZZPhi', 0.020775159164525856),
 ('JetEta(JetPt|3)', 0.02016083994191891),
 ('nCleanedJetsPt30', 0.01990

In [150]:
%%capture
fig, implist = plot_variables([("ttHh", "WHh")])

In [151]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [152]:
append_variables(confhandler, impdict, threshold_fscore)

In [153]:
implist

[('PFMET', 0.07118207571465361),
 ('JetPt(JetPt|0)', 0.06041720319340716),
 ('JetPt(JetPt|1)', 0.04666494978109709),
 ('D_WHh_ggH_ME', 0.04213237187741437),
 ('Z1Pt', 0.04112799381921195),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04048416173062065),
 ('ZZPt', 0.037883080092711824),
 ('JetEta(JetPt|0)', 0.03754828740664435),
 ('Z1Mass', 0.03752253412310069),
 ('ZZEta', 0.03533350502189029),
 ('JetPt(JetPt|2)', 0.03417460726242596),
 ('Z2Pt', 0.034097347411795),
 ('JetPhi(JetPt|1)', 0.03373680144218388),
 ('JetEta(JetPt|2)', 0.03345351532320371),
 ('ZZPhi', 0.03219160442956477),
 ('D_VBF2j_WHh_ME', 0.031006953386556787),
 ('JetEta(JetPt|1)', 0.030466134432140097),
 ('D_VBF2j_ggH_ME', 0.029616276075199587),
 ('Z2Mass', 0.028869430852433684),
 ('JetPhi(JetPt|0)', 0.028302858614473344),
 ('D_WHh_ZHh_ME', 0.02753026010816379),
 ('ZZMassErr', 0.02737574040690188),
 ('JetPt(JetPt|3)', 0.02382178727787793),
 ('D_VBF2j_ZHh_ME', 0.021066185938707186),
 ('JetEta(JetPt|3)', 0.01900592325521504),
 ('D_

In [154]:
%%capture
fig, implist = plot_variables([("ttHh", "ttHl")])

In [155]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [156]:
append_variables(confhandler, impdict, threshold_fscore)

In [157]:
implist

[('PFMET', 0.09821405693494248),
 ('Z1Mass', 0.059702257411535606),
 ('ZZMassErr', 0.04607066902400902),
 ('ZZPt', 0.044302662259461396),
 ('JetPt(JetPt|0)', 0.04232966920337203),
 ('ZZEta', 0.03733312834704179),
 ('JetPhi(JetPt|0)', 0.036308196889333025),
 ('Z2Pt', 0.03500140928075435),
 ('JetPt(JetPt|3)', 0.034463320265457245),
 ('JetPhi(JetPt|2)', 0.03423271068747277),
 ('ZZMass_masked', 0.03264406692802419),
 ('JetPt(JetPt|2)', 0.03228534091782612),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03059420401260666),
 ('Z1Pt', 0.03020985471596587),
 ('Z2Mass', 0.029723012273554207),
 ('nCleanedJetsPt30', 0.029492402695569735),
 ('D_WHh_ZHh_ME', 0.028493094524303687),
 ('D_WHh_ggH_ME', 0.028467471237860968),
 ('JetPhi(JetPt|1)', 0.028211238373433777),
 ('ZZPhi', 0.028006252081892023),
 ('JetEta(JetPt|0)', 0.027135060342839572),
 ('JetPhi(JetPt|3)', 0.022907218079790913),
 ('JetEta(JetPt|1)', 0.022036026340738462),
 ('JetEta(JetPt|2)', 0.021959156481410304),
 ('JetPt(JetPt|1)', 0.02006303328464909),


In [158]:
%%capture
fig, implist = plot_variables([("ttHl", "ggH")])

In [159]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [160]:
append_variables(confhandler, impdict, threshold_fscore)

In [161]:
implist

[('PFMET', 0.12164496980686658),
 ('Z1Mass', 0.06767367466684913),
 ('ZZPt', 0.06315753459808064),
 ('JetPt(JetPt|0)', 0.05953093727013018),
 ('ZZMass_masked', 0.054484492875104776),
 ('Z2Pt', 0.043621807482422975),
 ('Z2Mass', 0.04271515815043536),
 ('Z1Pt', 0.04192825495663479),
 ('ZZMassErr', 0.04165454949792155),
 ('D_VBF1j_ggH_ME', 0.04124399130985169),
 ('ZZPhi', 0.03831876421985391),
 ('ExtraLepPt(ExtraLepPt|0)', 0.037223942385000944),
 ('ZZEta', 0.03351181210120259),
 ('JetEta(JetPt|0)', 0.029218057717638607),
 ('JetEta(JetPt|1)', 0.02535196811331406),
 ('JetPt(JetPt|1)', 0.022478060796825017),
 ('JetPhi(JetPt|0)', 0.021058213729750072),
 ('D_WHh_ZHh_ME', 0.020870041226884718),
 ('D_VBF2j_ggH_ME', 0.019877858939049215),
 ('nExtraLep', 0.01956994029799682),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.018936996424722446),
 ('JetPt(JetPt|2)', 0.016028875925894248),
 ('D_WHh_ggH_ME', 0.015481465008467763),
 ('JetEta(JetPt|2)', 0.013565526797475067),
 ('nCleanedJetsPt30', 0.01117060403373

In [162]:
%%capture
fig, implist = plot_variables([("ttHl", "VBF")])

In [163]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [164]:
append_variables(confhandler, impdict, threshold_fscore)

In [165]:
implist

[('PFMET', 0.09475117396982412),
 ('D_VBF2j_ggH_ME', 0.06065742310150324),
 ('Z1Mass', 0.05121009197254717),
 ('ZZMass_masked', 0.049904137375309125),
 ('JetEta(JetPt|0)', 0.04690322042846425),
 ('JetPt(JetPt|0)', 0.042346272472144265),
 ('ZZPt', 0.03945650059740476),
 ('ZZEta', 0.03603878963016477),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03587207202200673),
 ('JetEta(JetPt|2)', 0.03514962905332185),
 ('D_VBF1j_ggH_ME', 0.03481619383700575),
 ('Z2Mass', 0.03423268220845258),
 ('D_WHh_ZHh_ME', 0.033871460724110146),
 ('D_ZHh_ggH_ME', 0.032148712106476976),
 ('ZZMassErr', 0.030509322292922837),
 ('ZZPhi', 0.030481536024896495),
 ('Z1Pt', 0.030314818416738446),
 ('Z2Pt', 0.030092528272527716),
 ('JetEta(JetPt|1)', 0.027841840562394065),
 ('JetPt(JetPt|2)', 0.026230237016866265),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.02514657256383895),
 ('JetPt(JetPt|1)', 0.019867181638834087),
 ('ExtraLepEta(ExtraLepPt|0)', 0.017032982300147267),
 ('JetPhi(JetPt|1)', 0.016227180527383367),
 ('JetPt(JetPt|3)', 0

In [166]:
%%capture
fig, implist = plot_variables([("ttHl", "WHh")])

In [167]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [168]:
append_variables(confhandler, impdict, threshold_fscore)

In [169]:
implist

[('PFMET', 0.12140742031005052),
 ('ZZMass_masked', 0.07407246124368577),
 ('ExtraLepPt(ExtraLepPt|0)', 0.060616617314056785),
 ('ZZPt', 0.055260407594495735),
 ('Z1Mass', 0.05465075770771643),
 ('JetPt(JetPt|0)', 0.045941473610869185),
 ('ZZMassErr', 0.04363351332520467),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0421093886082564),
 ('Z2Pt', 0.03866922139000174),
 ('Z2Mass', 0.03640480752482146),
 ('D_VBF1j_ggH_ME', 0.03592579689949486),
 ('D_VBF2j_WHh_ME', 0.028261626894269292),
 ('JetEta(JetPt|0)', 0.024081170527782615),
 ('D_WHh_ZHh_ME', 0.023602159902456017),
 ('ZZEta', 0.023297334959066364),
 ('D_WHh_ggH_ME', 0.021816756662602334),
 ('ZZPhi', 0.021555478139696917),
 ('JetEta(JetPt|1)', 0.021381292457759972),
 ('JetPt(JetPt|2)', 0.020989374673401846),
 ('JetPt(JetPt|1)', 0.02024908552516983),
 ('JetPhi(JetPt|1)', 0.01911687859257969),
 ('JetPt(JetPt|3)', 0.0187685072287058),
 ('Z1Pt', 0.018028218080473786),
 ('D_VBF2j_ggH_ME', 0.015241247169482669),
 ('nCleanedJetsPt30', 0.01463159728

In [62]:
# save the variable configuration
confhandler.save_configuration(out_path)

In [171]:
df = df.fillna(0.0)

In [172]:
df.to_csv("input_parameters_table_inclusive_ZZMask.csv")

In [173]:
# now plot the data contained in the table to have a global picture of the relevant input variables
datacol_labels = [col for col in df.columns.tolist() if col != "discriminant"]
variable_data = df[datacol_labels].as_matrix().transpose()
datacol_labels = np.concatenate([[''], np.array(datacol_labels)])

In [174]:
discriminant_labels = np.concatenate([[''], df["discriminant"].as_matrix()])

In [175]:
fig = plt.figure(figsize = (15, 10))
ax = fig.add_subplot(111)
cax = ax.matshow(variable_data, interpolation = 'nearest', cmap = 'Blues', vmin = np.min(variable_data), vmax = np.max(variable_data))
ax.set_xticklabels(discriminant_labels, rotation = 'vertical')
ax.set_yticklabels(datacol_labels)
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

In [176]:
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "input_variables_inclusive_fullmassrange_ZZMask.pdf"))