In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from trainlib.FileCollection import FileCollection
from trainlib.config import Config
from trainlib.ConfigFileHandler import ConfigFileHandler
from trainlib.ConfigFileUtils import ConfigFileUtils
import trainlib.cuts as cuts
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import copy
import re
from scipy import interpolate
import scipy.integrate as integrate
import pickle
import os

Welcome to JupyROOT 6.10/09


In [3]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error



In [4]:
#candidate_branches = ["PFMET", "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF", "nExtraLep", "ZZMass", "nExtraZ", "Z1Mass", "Z2Mass", "Z1Pt", "Z2Pt", "ZZMassErr", "ZZPt", "ZZEta", "ZZPhi", "Z1Flav", "Z2Flav", "costhetastar", "helphi", "helcosthetaZ1", "helcosthetaZ2", "phistarZ1", "phistarZ2", "xi", "xistar"]
candidate_branches = ["PFMET", "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF", "nExtraLep", "ZZMass", "nExtraZ", "Z1Mass", "Z2Mass", "Z1Pt", "Z2Pt", "ZZMassErr", "ZZPt", "ZZEta", "ZZPhi", "Z1Flav", "Z2Flav", "D_VBF2j_ggH_ME", "D_VBF1j_ggH_ME", "D_WHh_ggH_ME", "D_ZHh_ggH_ME", "D_WHh_ZHh_ME", "D_VBF2j_WHh_ME", "D_VBF2j_ZHh_ME"]
#list_branches = ["Jet", "Lep", "ExtraLep"]
MELA_branches = []
list_branches = ["Jet", "ExtraLep"]
pt_limits = [30.0, 0.0, 0.0]

In [5]:
allbranches = ["JetPt", "JetEta", "JetPhi", "LepPt", "LepEta", "LepPhi", "ExtraLepPt", "ExtraLepEta", "ExtraLepPhi"] + candidate_branches + MELA_branches + ["LHEAssociatedParticleId", "GenAssocLep1Id", "GenAssocLep2Id", "training_weight"]

In [6]:
#MC_path = "/data_CMS/cms/wind/CJLST_NTuples_randomizeda/"
MC_path = "/data_CMS/cms/wind/CJLST_NTuples/"

In [7]:
# these are the cuts without any m4l restriction imposed
def WHhadr0j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] == 0

def WHhadr01j_cut(row):
    return cuts.WHhadr_cut(row) and (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def WHhadr1j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] == 1

def WHhadr2j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] >= 2

def ZHhadr0j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] == 0

def ZHhadr01j_cut(row):
    return cuts.ZHhadr_cut(row) and (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def ZHhadr1j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] == 1

def ZHhadr2j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] >= 2

def mZZ0j_cut(row):
    return row["nCleanedJetsPt30"] == 0

def mZZ01j_cut(row):
    return (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def mZZ1j_cut(row):
    return row["nCleanedJetsPt30"] == 1

def mZZ2j_cut(row):
    return row["nCleanedJetsPt30"] >= 2

In [8]:
collections = {"VBF2j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ2j_cut},
            "VBF1j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ1j_cut},
            "VBF0j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ0j_cut},
            "VBF01j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ01j_cut},
            "VBF": {MC_path + "VBFH125/ZZ4lAnalysis.root": cuts.no_cut},
            "ggH2j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ2j_cut},
            "ggH1j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ1j_cut},
            "ggH0j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ0j_cut},
            "ggH01j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ01j_cut},
            "ggH" : {MC_path + "ggH125/ZZ4lAnalysis.root": cuts.no_cut},
            "WHh2j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr2j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr2j_cut},
            "WHh1j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr1j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr1j_cut},
            "WHh0j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr0j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr0j_cut},
            "WHh": {MC_path + "WplusH125/ZZ4lAnalysis.root": cuts.WHhadr_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": cuts.WHhadr_cut},
            "WHh01j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr01j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr01j_cut},
            "WHl": {MC_path + "WplusH125/ZZ4lAnalysis.root": cuts.WHlept_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": cuts.WHlept_cut},
            "ZHh2j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr2j_cut},
            "ZHh1j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr1j_cut},
            "ZHh01j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr01j_cut},
            "ZHh0j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr0j_cut},
            "ZHh": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHhadr_cut},
            "ZHl": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHlept_cut},
            "ttHh": {MC_path + "ttH125/ZZ4lAnalysis.root": cuts.ttHhadr_cut},
            "ttHl": {MC_path + "ttH125/ZZ4lAnalysis.root": cuts.ttHlept_cut},
            "ZHMET": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHMET_cut}
          }

In [9]:
# all the model combinations for which neural networks are currently trained
discriminant_pairs = [("VBF", "ggH"), ("WHh", "ggH"), ("ZHh", "ggH"), ("WHh", "ZHh"), ("VBF", "WHh"),
                     ("VBF", "ZHh"), ("WHl", "ggH"), ("WHl", "VBF"), ("WHl", "WHh"), ("WHl", "ZHh"),
                     ("WHl", "ZHl"), ("WHl", "ZHMET"), ("WHl", "ttHh"), ("WHl", "ttHl"), ("ZHh", "ZHl"),
                     ("ZHh", "ZHMET"), ("ZHh", "ttHh"), ("ZHh", "ttHl"), ("ZHl", "ggH"), ("ZHl", "VBF"),
                     ("ZHl", "WHh"), ("ZHl", "ZHMET"), ("ZHl", "ttHh"), ("ZHl", "ttHl"), ("ZHMET", "ggH"),
                     ("ZHMET", "VBF"), ("ZHMET", "WHh"), ("ZHMET", "ttHh"), ("ZHMET", "ttHl"), ("ttHh", "ggH"),
                      ("ttHh", "VBF"), ("ttHh", "WHh"), ("ttHh", "ttHl"), ("ttHl", "ggH"), ("ttHl", "VBF"),
                     ("ttHl", "WHh")]

In [10]:
def extract_order(df, col_basename, sorted_column, columns, order):
    def get_index(row, order, col_basename, sorted_column):
        sorted_column = row[col_basename + sorted_column]
        if order >= len(sorted_column):
            return -1
        else:
            return np.flipud(np.argsort(sorted_column))[order]
    
    index_column = pd.DataFrame(df.transform(lambda row: get_index(row, order, col_basename, sorted_column), axis = 1, raw = True))
    index_column.columns = ["index"]
    df_temp = pd.concat([index_column, df], axis = 1)
    
    def get_element(row, column_name):
        if row["index"] == -1:
            return 0
        else:
            return row[column_name][row["index"]]
        
    extracted_cols = pd.DataFrame()
    for column in columns:
        extracted_col = pd.DataFrame(df_temp.transform(lambda row: get_element(row, col_basename + column), axis = 1, raw = True))
        extracted_col.columns = [col_basename + column + "(" + col_basename + "Pt|" + str(order) + ")"]
        extracted_cols = pd.concat([extracted_cols, extracted_col], axis = 1)
        
    return extracted_cols

In [11]:
def prepare_data(df, col_basenames, sorted_column, columns, orders, pt_limits):
    all_extracted = pd.DataFrame()
    for col_basename, pt_limit in zip(col_basenames, pt_limits):
        for order in orders:
            extracted = extract_order(df, col_basename, sorted_column, columns, order)
            mask = extracted[col_basename + "Pt(" + col_basename + "Pt|" + str(order) + ")"] < pt_limit
            extracted[mask] = 0.0

            all_extracted = pd.concat([all_extracted, extracted], axis = 1)
            
    return all_extracted

In [12]:
def get_data(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits):
    H1_df = H1_coll.get_data(read_branches, 0.0, 1.0)
    H0_df = H0_coll.get_data(read_branches, 0.0, 1.0)
    
    H1_list_df = prepare_data(H1_df, list_branches, "Pt", ["Pt", "Eta", "Phi"], range(4), pt_limits)
    H0_list_df = prepare_data(H0_df, list_branches, "Pt", ["Pt", "Eta", "Phi"], range(4), pt_limits)
    
    list_branches_unrolled = H1_list_df.columns
            
    H1_df = pd.concat([H1_df, H1_list_df], axis = 1)
    H0_df = pd.concat([H0_df, H0_list_df], axis = 1)        
    
    complete_input_branches = np.concatenate([input_branches, list_branches_unrolled])
            
    H1_df = H1_df[complete_input_branches]
    H0_df = H0_df[complete_input_branches]
    
    return H1_df, H0_df

In [13]:
def get_data_dmatrix(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits):
    H1_df, H0_df = get_data(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits)
    
    complete_input_branches = H1_df.columns
    print "number of input variables: " + str(len(complete_input_branches))
    print "final list of inputs: " + str(complete_input_branches)
    
    # try with the same weights as used later in the neural network training, to balance out some (very)
    # unbalanced datasets
    H1_class_weight = 1.0 + float(len(H0_df)) / float(len(H1_df))
    H0_class_weight = 1.0 + float(len(H1_df)) / float(len(H0_df))
    
    print "using class weights: " + str(H1_class_weight) + " (H1), " + str(H0_class_weight) + " (H0)"
    
    H1_weights = np.full(len(H1_df), H1_class_weight)
    H0_weights = np.full(len(H0_df), H0_class_weight)
    
    H1_data = H1_df.as_matrix()
    H0_data = H0_df.as_matrix()
    H1_target = np.ones(np.shape(H1_data)[0])
    H0_target = np.zeros(np.shape(H0_data)[0])
    
    target = np.concatenate([H1_target, H0_target])
    data = np.concatenate([H1_data, H0_data])
    weights = np.concatenate([H1_weights, H0_weights])
    
    dmatrix = xgb.DMatrix(data, label = target, feature_names = complete_input_branches, weight = weights)
    
    return dmatrix

In [14]:
def get_feature_correlation(source, corr_branches, mandatory_branches, optional_branches, list_branches, pt_limits):    
    coll = FileCollection(collections[source], 0.0, 0.5)
    
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if "0j" in source and ("0j" in optional_branch):
            input_branches.append(optional_branch)
            
        if "1j" in source and ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
        if "2j" in source and ("2j" in optional_branch):
            input_branches.append(optional_branch)

    df, _ = get_data(coll, coll, allbranches, input_branches, list_branches, pt_limits)

    df = df[corr_branches]
    
    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(111)
    
    cax = ax.matshow(df.corr(), vmin = -1.0, vmax = 1.0, cmap = "RdBu")
    
    fig.colorbar(cax)
    
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    
    ax.set_yticklabels([''] + corr_branches)
    ax.set_xticklabels([''] + corr_branches, rotation = 'vertical')
    
    return fig

In [15]:
def get_interpolating_function(data, bins):
    bin_centers = [np.mean([bins[i], bins[i + 1]]) for i in range(len(bins) - 1)]
    intf = interpolate.interp1d(bin_centers, data, kind = "linear")
    interpolated_function = lambda x: intf(x) if x > bin_centers[0] and x < bin_centers[-1] else 0
    
    return interpolated_function

In [16]:
def get_binned_data(df, branch):
    data = df[branch].as_matrix()
    weights = df["training_weight"].as_matrix()
    
    # set the bin width
    q75, q25 = np.percentile(data, [75, 25])
    bin_width = max(2 * (q75 - q25) / len(data)**0.33, 0.005)

    data_max = np.max(data)
    data_min = np.min(data)
    bins = np.arange(data_min, data_max + bin_width, bin_width)
    
    weights = weights / (np.sum(weights) * bin_width)
    
    hist = np.histogram(data, bins = bins, weights = weights)
    return hist

In [17]:
def get_feature_importance_list_BDT(disc_pair, mandatory_branches, optional_branches, list_branches, pt_limits):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    # first assemble the list of branches that can serve as input: it will *always* contain the mandatory branches,
    # and *can* contain some of the optional branches, if the name of the categories allows it
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if ("0j" in H1_name or "0j" in H0_name) and ("0j" in optional_branch):
            input_branches.append(optional_branch)  
        elif ("1j" in H1_name or "1j" in H0_name) and ("1j" in optional_branch):
            input_branches.append(optional_branch)
        elif ("2j" in H1_name or "2j" in H0_name) and not ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
        # the fully inclusive categories (i.e. those with NO "xxj" in their name, can not use MELA, since there may
        # be events with low number of jets contained)
    
    # get the training data for the BDT ...
    H1_coll_train = FileCollection(collections[H1_name], 0.0, 0.5)
    H0_coll_train = FileCollection(collections[H0_name], 0.0, 0.5)
    
    dtrain = get_data_dmatrix(H1_coll_train, H0_coll_train, allbranches, input_branches, list_branches, pt_limits)
    
    # ... and the validation data as well
    H1_coll_val = FileCollection(collections[H1_name], 0.5, 1.0)
    H0_coll_val = FileCollection(collections[H0_name], 0.5, 1.0)
    dval = get_data_dmatrix(H1_coll_val, H0_coll_val, allbranches, input_branches, list_branches, pt_limits)
    
    evallist = [(dtrain, 'train'), (dval, 'eval')]
    
    # perform the training
    # try different tree depths and choose the one that gives the best RMSE (i.e. avoid too deep trees to start with)
        
    params = {'eta': 0.01, 'silent': 1, 'gamma': 0.5, 'objective': 'binary:logistic'}
    params['nthread'] = 4
    params['eval_metric'] = 'rmse'
    max_num_rounds = 2000
    
    best_loss = 1e6
    best_imp = None
    best_params = None
    for tree_depth in range(1,8):
        params['max_depth'] = tree_depth
        
        bst = xgb.train(params, dtrain, max_num_rounds, evals = evallist, early_stopping_rounds = 10, verbose_eval = False)
    
        pred = bst.predict(dval)
        cur_loss = np.sqrt(mean_squared_error(pred, dval.get_label()))
        cur_imp = bst.get_fscore()

        print "for max_depth = " + str(params['max_depth']) + ": loss = " + str(cur_loss)
        
        if cur_loss < best_loss:
            best_loss = cur_loss
            best_imp = copy.copy(cur_imp)
            best_params = copy.copy(params)
            
    # normalize the usage score w.r.t. the total score (i.e. sum of all individuals)
    score_sum = sum([val for key, val in best_imp.iteritems()])
    used_variables = {key: val / float(score_sum) for key, val in sorted(best_imp.iteritems(), key = lambda x: x[1], reverse = True)}           
    return best_params, dtrain.feature_names, used_variables

In [18]:
def get_histogram(df, branch, label):
    data = df[branch].as_matrix()
    weights = df["training_weight"].as_matrix()
    
    # set the bin width
    q75, q25 = np.percentile(data, [75, 25])
    bin_width = max(2 * (q75 - q25) / len(data)**0.33, 0.005)

    data_max = np.max(data)
    data_min = np.min(data)
    bins = np.arange(data_min, data_max + bin_width, bin_width)
    
    weights = weights / (np.sum(weights) * bin_width)
    
    fig = plt.hist(data, bins = bins, weights = weights, alpha = 0.5, label = label)
    return fig

In [19]:
def plot_branch(disc_pair, branch, start_fraction = 0.0, end_fraction = 1.0):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    # get the training data for the BDT ...
    H1_coll = FileCollection(collections[H1_name], start_fraction, end_fraction)
    H0_coll = FileCollection(collections[H0_name], start_fraction, end_fraction)
    
    H1_df, H0_df = get_data(H1_coll, H0_coll, allbranches, allbranches, list_branches, pt_limits)
    
    plt.figure()
    H1_hist = get_histogram(H1_df, branch, H1_name)
    H0_hist = get_histogram(H0_df, branch, H0_name)
    
    plt.legend(loc = 'upper right')
    plt.show()

In [20]:
def plot_variables(discs):
    plotframe = pd.DataFrame()
    
    for disc in discs:
        _, _, implist = get_feature_importance_list_BDT(disc, candidate_branches, MELA_branches, list_branches, pt_limits)
        
        # cut the list to select only the 95% most important variables
        cutimplist = {key: val for key, val in implist.iteritems() if val > 0.00}
        curframe = pd.DataFrame(cutimplist, index = [len(plotframe)])
        
        plotframe = pd.concat([plotframe, curframe])
        
    plotframe = plotframe.fillna(0.0)
    
    print plotframe
    print "number of pre-selected input variables = " + str(len(plotframe.columns))
    
    # start the plotting
    parameters = plotframe.columns
    plotdata = np.transpose(plotframe.as_matrix())
    
    plt.close('all')
    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(111)
    cax = ax.matshow(plotdata, cmap = 'Blues')
    
    # make axis labels
    disclabels = []
    for disc in discs:
        if "0j" in disc[0] or "0j" in disc[1]:
            disclabels.append('D_' + re.sub('0j', '', disc[0]) + "_" + re.sub('0j', '', disc[1]) + "_0j")
        elif "01j" in disc[0] or "01j" in disc[1]:
            disclabels.append('D_' + re.sub('01j', '', disc[0]) + "_" + re.sub('01j', '', disc[1]) + "_01j")
        elif "1j" in disc[0] or "1j" in disc[1]:
            disclabels.append('D_' + re.sub('1j', '', disc[0]) + "_" + re.sub('1j', '', disc[1]) + "_1j")
        elif "2j" in disc[0] or "2j" in disc[1]:
            disclabels.append('D_' + re.sub('2j', '', disc[0]) + "_" + re.sub('2j', '', disc[1]) + "_2j")
        else:
            disclabels.append('D_' + disc[0] + "_" + disc[1] + "_2j")
            
    disclabels = np.concatenate([[''], np.array(disclabels)])
    parameters = np.concatenate([[''], np.array(parameters)])
        
    ax.set_xticklabels(disclabels, rotation = 'vertical')
    ax.set_yticklabels(parameters)
    
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    
    # sort the used variables according to their importance
    sorted_implist = []
    for key, val in sorted(cutimplist.iteritems(), key = lambda x: x[1], reverse = True):
        sorted_implist.append((key, val))
    
    return fig, sorted_implist

In [21]:
def append_variables(confhandler, impdict, threshold_fscore):
    confhandler.new_section(impdict["discriminant"])
    cur_sec = confhandler.get_section(impdict["discriminant"])

    periodic_inputs = []
    nonperiodic_inputs = []
    for key, val in impdict.iteritems():
        if val[0] > threshold_fscore and key is not "discriminant":
            if "phi" in key or "Phi" in key:
                periodic_inputs.append(key)
            else:
                nonperiodic_inputs.append(key)
    cur_sec["nonperiodic_columns"] = ConfigFileUtils.serialize_list(nonperiodic_inputs, lambda x: x)
    cur_sec["periodic_columns"] = ConfigFileUtils.serialize_list(periodic_inputs, lambda x: x)

In [22]:
def convert_varname(raw):
    raw = raw.replace('(', '[')
    raw = raw.replace(')', ']')
    return raw

In [23]:
df = pd.DataFrame()

In [24]:
out_dir = "/data_CMS/cms/wind/InputConfigurations/"
out_path = os.path.join(out_dir, "inclusive_99_fullmassrange.conf")
threshold_fscore = 0.01

In [25]:
confhandler = ConfigFileHandler()

In [26]:
%%capture
fig, implist = plot_variables([("VBF", "ggH")])

In [27]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [28]:
append_variables(confhandler, impdict, threshold_fscore)

In [29]:
implist

[('ZZPt', 0.08845336323620479),
 ('JetEta(JetPt|0)', 0.08286331151985807),
 ('JetPt(JetPt|0)', 0.06100520510803528),
 ('ZZEta', 0.05874575306699694),
 ('PFMET', 0.055766623709183415),
 ('ZZMass', 0.04984183835712731),
 ('Z1Pt', 0.047984066678940236),
 ('D_VBF2j_ggH_ME', 0.046929655726455674),
 ('ZZPhi', 0.04234380491723711),
 ('Z2Mass', 0.0414232874190363),
 ('Z1Mass', 0.03976635592227485),
 ('Z2Pt', 0.03784163751694589),
 ('ZZMassErr', 0.0374566938358801),
 ('JetPt(JetPt|1)', 0.03526418852198363),
 ('JetPhi(JetPt|0)', 0.03166580193810775),
 ('D_VBF1j_ggH_ME', 0.02970761016920785),
 ('JetEta(JetPt|1)', 0.02510502267820382),
 ('JetPhi(JetPt|1)', 0.02384977154429363),
 ('D_ZHh_ggH_ME', 0.022008736547892014),
 ('D_WHh_ggH_ME', 0.020686538686839947),
 ('JetEta(JetPt|2)', 0.02003380809720665),
 ('D_WHh_ZHh_ME', 0.01979949455221008),
 ('JetPt(JetPt|2)', 0.018594453463656294),
 ('D_VBF2j_ZHh_ME', 0.01703794205760766),
 ('D_VBF2j_WHh_ME', 0.01621784465011967),
 ('JetPhi(JetPt|2)', 0.0082679208

In [30]:
%%capture
fig, implist = plot_variables([("WHh", "ggH")])

In [31]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [32]:
append_variables(confhandler, impdict, threshold_fscore)

In [33]:
implist

[('ZZEta', 0.08010279001468429),
 ('JetEta(JetPt|0)', 0.07851688693098385),
 ('ZZPt', 0.07795888399412629),
 ('ZZMass', 0.05804698972099853),
 ('JetPt(JetPt|0)', 0.05697503671071953),
 ('PFMET', 0.05568281938325991),
 ('ZZMassErr', 0.05568281938325991),
 ('Z1Pt', 0.0555359765051395),
 ('ZZPhi', 0.04615271659324523),
 ('Z2Mass', 0.044963289280469894),
 ('Z2Pt', 0.04315712187958884),
 ('Z1Mass', 0.04196769456681351),
 ('JetPhi(JetPt|0)', 0.02882525697503671),
 ('JetPt(JetPt|1)', 0.02684287812041116),
 ('D_WHh_ggH_ME', 0.02681350954478708),
 ('D_VBF1j_ggH_ME', 0.02591776798825257),
 ('D_VBF2j_ggH_ME', 0.02433186490455213),
 ('JetEta(JetPt|2)', 0.023083700440528633),
 ('JetEta(JetPt|1)', 0.020822320117474303),
 ('D_WHh_ZHh_ME', 0.01709251101321586),
 ('D_ZHh_ggH_ME', 0.015580029368575624),
 ('JetPt(JetPt|2)', 0.015183553597650513),
 ('JetPhi(JetPt|1)', 0.014185022026431718),
 ('JetPhi(JetPt|2)', 0.011336270190895741),
 ('D_VBF2j_ZHh_ME', 0.011160058737151249),
 ('D_VBF2j_WHh_ME', 0.0098972

In [34]:
%%capture
fig, implist = plot_variables([("ZHh", "ggH")])

In [35]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [36]:
append_variables(confhandler, impdict, threshold_fscore)

In [37]:
implist

[('ZZPt', 0.08632427291385093),
 ('ZZEta', 0.06690193722781199),
 ('JetEta(JetPt|0)', 0.06464934674876108),
 ('ZZPhi', 0.06202132452320168),
 ('JetPt(JetPt|0)', 0.0591680432497372),
 ('PFMET', 0.05679030885518346),
 ('Z2Mass', 0.05563898483255744),
 ('Z1Pt', 0.05556389848325574),
 ('Z1Mass', 0.0435751113780848),
 ('ZZMass', 0.04287430545126896),
 ('Z2Pt', 0.040796916453922014),
 ('ZZMassErr', 0.03924513190168694),
 ('D_ZHh_ggH_ME', 0.03371377083646193),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03246233168143365),
 ('D_VBF1j_ggH_ME', 0.03078540321369575),
 ('JetPt(JetPt|1)', 0.027756920458527307),
 ('JetEta(JetPt|1)', 0.025404214847074134),
 ('D_VBF2j_ggH_ME', 0.020748861190368926),
 ('JetPhi(JetPt|0)', 0.020248285528357612),
 ('JetEta(JetPt|2)', 0.017344946688691996),
 ('JetPt(JetPt|2)', 0.016443910497071633),
 ('D_WHh_ZHh_ME', 0.01381588827151224),
 ('JetPhi(JetPt|2)', 0.012189017369975472),
 ('D_WHh_ggH_ME', 0.012013815888271513),
 ('JetPhi(JetPt|1)', 0.010136657155729088),
 ('D_VBF2j_W

In [38]:
%%capture
fig, implist = plot_variables([("WHh", "ZHh")])

In [39]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ZHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [40]:
append_variables(confhandler, impdict, threshold_fscore)

In [41]:
implist

[('PFMET', 0.05654778887303852),
 ('D_WHh_ZHh_ME', 0.05609129814550642),
 ('ZZMassErr', 0.05352353780313837),
 ('JetPt(JetPt|0)', 0.047075606276747506),
 ('Z2Mass', 0.04553495007132668),
 ('JetEta(JetPt|0)', 0.044907275320970046),
 ('JetPhi(JetPt|1)', 0.04456490727532097),
 ('ZZMass', 0.04433666191155492),
 ('Z1Pt', 0.04365192582025677),
 ('JetPhi(JetPt|0)', 0.042738944365192585),
 ('Z1Mass', 0.04251069900142653),
 ('D_WHh_ggH_ME', 0.0385734664764622),
 ('ZZEta', 0.03754636233951498),
 ('JetEta(JetPt|1)', 0.035606276747503565),
 ('ZZPhi', 0.034293865905848785),
 ('JetPt(JetPt|1)', 0.03372325249643367),
 ('Z2Pt', 0.03320970042796006),
 ('D_ZHh_ggH_ME', 0.032924393723252496),
 ('ZZPt', 0.030813124108416547),
 ('JetPt(JetPt|2)', 0.02579172610556348),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.025677603423680456),
 ('JetPhi(JetPt|2)', 0.024308131241084167),
 ('D_VBF2j_ggH_ME', 0.021854493580599144),
 ('D_VBF2j_WHh_ME', 0.019514978601997145),
 ('JetEta(JetPt|2)', 0.018716119828815977),
 ('D_VBF2

In [42]:
%%capture
fig, implist = plot_variables([("VBF", "WHh")])

In [43]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [44]:
append_variables(confhandler, impdict, threshold_fscore)

In [45]:
implist

[('JetEta(JetPt|0)', 0.11353795560427514),
 ('ZZEta', 0.08503699643738011),
 ('JetEta(JetPt|2)', 0.06363387229377912),
 ('JetPt(JetPt|0)', 0.0562893943546177),
 ('D_VBF2j_ggH_ME', 0.054343655796108524),
 ('JetEta(JetPt|1)', 0.039380652233488626),
 ('ZZMass', 0.03798300904357358),
 ('ZZPt', 0.03757193751712798),
 ('PFMET', 0.035955056179775284),
 ('Z1Mass', 0.034365579610852286),
 ('JetPhi(JetPt|0)', 0.03214579336804604),
 ('JetPt(JetPt|2)', 0.03189915045217868),
 ('Z2Mass', 0.03113181693614689),
 ('Z1Pt', 0.03080295971499041),
 ('ZZPhi', 0.030391888188544807),
 ('ZZMassErr', 0.03017265004110715),
 ('D_ZHh_ggH_ME', 0.029926007125239792),
 ('Z2Pt', 0.029213483146067417),
 ('D_WHh_ggH_ME', 0.027898054261441492),
 ('D_VBF1j_ggH_ME', 0.027158125513839407),
 ('D_WHh_ZHh_ME', 0.02170457659632776),
 ('JetPt(JetPt|1)', 0.019375171279802685),
 ('D_VBF2j_ZHh_ME', 0.016525075363113182),
 ('JetPhi(JetPt|1)', 0.015894765689229925),
 ('D_VBF2j_WHh_ME', 0.012030693340641272),
 ('JetPt(JetPt|3)', 0.010

In [46]:
%%capture
fig, implist = plot_variables([("VBF", "ZHh")])

In [47]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ZHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [48]:
append_variables(confhandler, impdict, threshold_fscore)

In [49]:
implist

[('JetEta(JetPt|0)', 0.09752560101964576),
 ('ZZEta', 0.08308794444688612),
 ('JetEta(JetPt|2)', 0.057442974552806225),
 ('D_VBF2j_ggH_ME', 0.05412473080472905),
 ('PFMET', 0.04891662637893904),
 ('JetPt(JetPt|0)', 0.04014855183931789),
 ('D_ZHh_ggH_ME', 0.03797301454753219),
 ('JetEta(JetPt|1)', 0.03757746231266207),
 ('Z2Mass', 0.03570957675910869),
 ('ZZMass', 0.03524809915176021),
 ('ZZPt', 0.03291873599085835),
 ('JetPhi(JetPt|0)', 0.03261108425262603),
 ('ZZMassErr', 0.030281721091724167),
 ('Z1Mass', 0.03017184547092691),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.029073089262954335),
 ('JetPt(JetPt|2)', 0.028567661407286952),
 ('Z2Pt', 0.027710631565068342),
 ('JetPt(JetPt|1)', 0.026985452467806443),
 ('D_WHh_ZHh_ME', 0.026567925108776865),
 ('Z1Pt', 0.02518349228673142),
 ('ZZPhi', 0.024546213686107328),
 ('D_VBF2j_ZHh_ME', 0.023469432602294203),
 ('D_WHh_ggH_ME', 0.020986243572276183),
 ('JetPhi(JetPt|1)', 0.019294159011998418),
 ('D_VBF2j_WHh_ME', 0.018678855535533775),
 ('JetPhi

In [50]:
%%capture
fig, implist = plot_variables([("WHl", "ggH")])

In [51]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [52]:
append_variables(confhandler, impdict, threshold_fscore)

In [53]:
implist

[('PFMET', 0.11870082035827892),
 ('ZZMass', 0.11156454043194375),
 ('ZZPt', 0.0882303699983258),
 ('Z1Mass', 0.06470785200066968),
 ('ZZMassErr', 0.060334003013561024),
 ('ZZEta', 0.05275824543780345),
 ('Z2Pt', 0.051272392432613424),
 ('Z1Pt', 0.05051900217646074),
 ('JetPt(JetPt|0)', 0.04999581449857693),
 ('ZZPhi', 0.047170601038004356),
 ('D_VBF1j_ggH_ME', 0.04497321279089235),
 ('Z2Mass', 0.04124811652435962),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03894609074167085),
 ('JetEta(JetPt|0)', 0.033588648920140636),
 ('JetPhi(JetPt|0)', 0.02592918131592165),
 ('ExtraLepEta(ExtraLepPt|0)', 0.015632847815168258),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.013707517160555835),
 ('D_WHh_ZHh_ME', 0.010798593671521849),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.010568391093252971),
 ('JetEta(JetPt|1)', 0.008810480495563369),
 ('JetPt(JetPt|1)', 0.006717729784028127),
 ('JetPhi(JetPt|1)', 0.006654947262682069),
 ('D_VBF2j_WHh_ME', 0.006027122049221497),
 ('D_VBF2j_ggH_ME', 0.005838774485183325),
 ('ExtraLepPhi(E

In [54]:
%%capture
fig, implist = plot_variables([("WHl", "VBF")])

In [55]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [56]:
append_variables(confhandler, impdict, threshold_fscore)

In [57]:
implist

[('PFMET', 0.10766624311732317),
 ('ZZMass', 0.08235916984328674),
 ('ZZEta', 0.06361711139347734),
 ('JetEta(JetPt|0)', 0.06304531977975435),
 ('JetPt(JetPt|0)', 0.05622617534942821),
 ('D_VBF1j_ggH_ME', 0.052244811520542146),
 ('ZZMassErr', 0.044684455739093604),
 ('ZZPt', 0.043244387971198645),
 ('Z1Mass', 0.04273612875900042),
 ('Z1Pt', 0.04133841592545531),
 ('ExtraLepEta(ExtraLepPt|0)', 0.03979246082168573),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03962304108428632),
 ('Z2Mass', 0.03888182973316391),
 ('Z2Pt', 0.035959339263024144),
 ('D_VBF2j_ggH_ME', 0.03202033036848793),
 ('ZZPhi', 0.030199068191444304),
 ('JetPhi(JetPt|0)', 0.0295425667090216),
 ('JetPt(JetPt|1)', 0.017598475222363404),
 ('D_VBF2j_ZHh_ME', 0.017090216010165183),
 ('D_WHh_ZHh_ME', 0.016412537060567556),
 ('JetEta(JetPt|1)', 0.015607793307920373),
 ('JetEta(JetPt|2)', 0.015523083439220669),
 ('JetPhi(JetPt|1)', 0.0120288013553579),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.01012282930961457),
 ('D_VBF2j_WHh_ME', 0.0097839898348

In [58]:
%%capture
fig, implist = plot_variables([("WHl", "WHh")])

In [59]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [60]:
append_variables(confhandler, impdict, threshold_fscore)

In [61]:
implist

[('PFMET', 0.1185586181247159),
 ('ZZMass', 0.09886772180668622),
 ('ZZPt', 0.061469482210008676),
 ('JetPt(JetPt|0)', 0.05785363031530229),
 ('ZZMassErr', 0.05204760527294516),
 ('Z1Mass', 0.05045663043927435),
 ('Z2Pt', 0.04646886234968387),
 ('Z2Mass', 0.046386214306376294),
 ('JetEta(JetPt|0)', 0.045353113765031615),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04353485681226497),
 ('Z1Pt', 0.04225381214099756),
 ('ZZPhi', 0.03721228149923551),
 ('ZZEta', 0.037191619488408614),
 ('JetPhi(JetPt|0)', 0.03467085416752758),
 ('D_VBF1j_ggH_ME', 0.02570354146865573),
 ('D_WHh_ggH_ME', 0.022087689573949337),
 ('D_ZHh_ggH_ME', 0.021013265010950865),
 ('JetPhi(JetPt|1)', 0.015393198066035787),
 ('ExtraLepEta(ExtraLepPt|0)', 0.015289888011901318),
 ('JetPt(JetPt|1)', 0.014752675730402082),
 ('D_VBF2j_WHh_ME', 0.013698913178230505),
 ('JetEta(JetPt|1)', 0.012789784701847183),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.012727798669366503),
 ('D_WHh_ZHh_ME', 0.012107938344559692),
 ('D_VBF2j_ggH_ME', 0.00991776519690

In [62]:
%%capture
fig, implist = plot_variables([("WHl", "ZHh")])

In [63]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [64]:
append_variables(confhandler, impdict, threshold_fscore)

In [65]:
implist

[('PFMET', 0.12824846234831275),
 ('ZZMass', 0.10544688867955893),
 ('Z2Pt', 0.05859699673075858),
 ('ZZPhi', 0.04757023327976949),
 ('JetPt(JetPt|0)', 0.046739070205574336),
 ('JetEta(JetPt|0)', 0.046489721283315785),
 ('D_VBF1j_ggH_ME', 0.04435640272621488),
 ('JetPhi(JetPt|0)', 0.04258325483459855),
 ('ZZMassErr', 0.04191832437524242),
 ('Z1Pt', 0.04039452540588463),
 ('Z1Mass', 0.03984041668975453),
 ('ZZPt', 0.03981271125394802),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03657117526458691),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03637723721394138),
 ('ZZEta', 0.035352136089100684),
 ('Z2Mass', 0.026929683603923088),
 ('D_ZHh_ggH_ME', 0.023798969357787997),
 ('ExtraLepEta(ExtraLepPt|0)', 0.01883969634842356),
 ('D_WHh_ggH_ME', 0.017315897379065772),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.015902920152934007),
 ('JetPt(JetPt|1)', 0.01576439297390148),
 ('D_VBF2j_WHh_ME', 0.009586080789050811),
 ('D_WHh_ZHh_ME', 0.009586080789050811),
 ('JetPt(JetPt|2)', 0.009198204687759738),
 ('JetPhi(JetPt|1)', 0.0

In [66]:
%%capture
fig, implist = plot_variables([("WHl", "ZHl")])

In [67]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [68]:
append_variables(confhandler, impdict, threshold_fscore)

In [69]:
implist

[('PFMET', 0.10454989259910173),
 ('ZZMass', 0.10015621948838117),
 ('Z2Mass', 0.09193516891232181),
 ('Z1Mass', 0.08808826401093536),
 ('Z2Pt', 0.06992774848662371),
 ('Z1Pt', 0.0568638937707479),
 ('ZZPt', 0.05264596758445616),
 ('ZZEta', 0.05036125756688147),
 ('ZZMassErr', 0.0431361062292521),
 ('JetPt(JetPt|0)', 0.03690685413005272),
 ('ZZPhi', 0.03669205233352861),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03141964460066393),
 ('JetEta(JetPt|0)', 0.02472173403632103),
 ('JetPhi(JetPt|0)', 0.02327670376879516),
 ('ExtraLepEta(ExtraLepPt|0)', 0.023159539152509276),
 ('D_VBF1j_ggH_ME', 0.01816051552431166),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.01527045498925991),
 ('nExtraLep', 0.012087482913493458),
 ('JetPhi(JetPt|1)', 0.010076157000585824),
 ('D_ZHh_ggH_ME', 0.01003710212849053),
 ('JetEta(JetPt|1)', 0.010017574692442882),
 ('D_VBF2j_ggH_ME', 0.009529388791251708),
 ('ExtraLepPt(ExtraLepPt|1)', 0.007830501855106425),
 ('nExtraZ', 0.007459480570201133),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.007

In [70]:
%%capture
fig, implist = plot_variables([("WHl", "ZHMET")])

In [71]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [72]:
append_variables(confhandler, impdict, threshold_fscore)

In [73]:
implist

[('ZZMass', 0.13284155715348375),
 ('PFMET', 0.11796429456979916),
 ('ZZPt', 0.08269278452764692),
 ('Z1Mass', 0.07122489461939004),
 ('Z1Pt', 0.060190924869823956),
 ('ZZMassErr', 0.05145053310190925),
 ('Z2Mass', 0.05083064716092239),
 ('ZZEta', 0.04704934292090256),
 ('Z2Pt', 0.046925365732705185),
 ('ExtraLepPt(ExtraLepPt|0)', 0.044817753533349866),
 ('ZZPhi', 0.04456979915695512),
 ('JetPt(JetPt|0)', 0.03973468881725763),
 ('JetEta(JetPt|0)', 0.03880485990577734),
 ('ExtraLepEta(ExtraLepPt|0)', 0.025477312174559882),
 ('D_VBF1j_ggH_ME', 0.024547483263079595),
 ('JetPhi(JetPt|0)', 0.0205802132407637),
 ('JetEta(JetPt|1)', 0.013079593354822713),
 ('JetPhi(JetPt|1)', 0.011839821472848996),
 ('ExtraLepPhi(ExtraLepPt|1)', 0.01177783287875031),
 ('nExtraLep', 0.010538060996776594),
 ('JetPt(JetPt|1)', 0.0065707909744607),
 ('D_ZHh_ggH_ME', 0.006446813786263328),
 ('D_VBF2j_ggH_ME', 0.0061988594098685845),
 ('D_VBF2j_WHh_ME', 0.006012893627572527),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.004153

In [74]:
%%capture
fig, implist = plot_variables([("WHl", "ttHh")])

In [75]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [76]:
append_variables(confhandler, impdict, threshold_fscore)

In [77]:
implist

[('PFMET', 0.12721868365180466),
 ('JetPt(JetPt|0)', 0.06900212314225053),
 ('ExtraLepPt(ExtraLepPt|0)', 0.06768577494692145),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.060764331210191085),
 ('ZZMass', 0.05549893842887473),
 ('Z1Mass', 0.039193205944798304),
 ('ZZMassErr', 0.03707006369426752),
 ('ZZEta', 0.03592356687898089),
 ('Z2Mass', 0.0326963906581741),
 ('JetPt(JetPt|1)', 0.03150743099787686),
 ('D_WHh_ggH_ME', 0.030658174097664543),
 ('JetPt(JetPt|2)', 0.02781316348195329),
 ('ZZPhi', 0.026454352441613586),
 ('Z2Pt', 0.02543524416135881),
 ('JetPhi(JetPt|0)', 0.024670912951167728),
 ('JetEta(JetPt|0)', 0.02416135881104034),
 ('JetEta(JetPt|1)', 0.02394904458598726),
 ('JetPt(JetPt|3)', 0.023524416135881104),
 ('D_WHh_ZHh_ME', 0.0229723991507431),
 ('ZZPt', 0.020552016985138005),
 ('D_VBF1j_ggH_ME', 0.019787685774946922),
 ('Z1Pt', 0.019447983014861996),
 ('D_ZHh_ggH_ME', 0.018004246284501063),
 ('D_VBF2j_ggH_ME', 0.017579617834394906),
 ('JetEta(JetPt|2)', 0.017494692144373672),
 (

In [78]:
%%capture
fig, implist = plot_variables([("WHl", "ttHl")])

In [79]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [80]:
append_variables(confhandler, impdict, threshold_fscore)

In [81]:
implist

[('ZZMass', 0.12014992120618424),
 ('JetPt(JetPt|0)', 0.07087184292346352),
 ('PFMET', 0.07036074790238085),
 ('ZZEta', 0.055283444780442094),
 ('ZZPt', 0.05081136334596874),
 ('Z2Pt', 0.04420971932365092),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04344307679202692),
 ('Z1Mass', 0.03603219898632821),
 ('JetPhi(JetPt|0)', 0.03185825631415307),
 ('JetPt(JetPt|1)', 0.03130457004131351),
 ('JetEta(JetPt|0)', 0.030239788747391284),
 ('Z2Mass', 0.029089824949955278),
 ('ExtraLepEta(ExtraLepPt|0)', 0.026321393585757486),
 ('Z1Pt', 0.02615102857872993),
 ('D_VBF2j_ggH_ME', 0.02559734230589037),
 ('D_WHh_ZHh_ME', 0.024489969760211252),
 ('ZZMassErr', 0.02372332722858725),
 ('ZZPhi', 0.0211678521231739),
 ('ExtraLepPt(ExtraLepPt|0)', 0.020358618339793008),
 ('ExtraLepPt(ExtraLepPt|1)', 0.02018825333276545),
 ('JetEta(JetPt|1)', 0.018867924528301886),
 ('D_ZHh_ggH_ME', 0.018867924528301886),
 ('JetPt(JetPt|2)', 0.017888325737893437),
 ('JetPhi(JetPt|1)', 0.0176327782273521),
 ('D_VBF1j_ggH_ME', 0.01

In [82]:
%%capture
fig, implist = plot_variables([("ZHh", "ZHl")])

In [83]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ZHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [84]:
append_variables(confhandler, impdict, threshold_fscore)

In [85]:
implist

[('ZZMass', 0.14115536995395653),
 ('Z1Mass', 0.10865724381625441),
 ('ZZMassErr', 0.07690866259770854),
 ('Z2Pt', 0.07463325837884142),
 ('Z2Mass', 0.06850305171859943),
 ('PFMET', 0.06799443195202913),
 ('JetPt(JetPt|0)', 0.05080843773423279),
 ('ZZPt', 0.04920227005032659),
 ('ZZPhi', 0.04063604240282685),
 ('Z1Pt', 0.040555734018631544),
 ('JetPhi(JetPt|0)', 0.039458186101295645),
 ('JetEta(JetPt|0)', 0.031079344683584965),
 ('ZZEta', 0.0300085662276475),
 ('ExtraLepPt(ExtraLepPt|0)', 0.02623407217046793),
 ('D_VBF1j_ggH_ME', 0.02422636256558518),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.015660134918085447),
 ('D_WHh_ggH_ME', 0.014401970232358926),
 ('D_ZHh_ggH_ME', 0.010333012099796552),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.009074847414070029),
 ('JetEta(JetPt|1)', 0.00851268872470286),
 ('JetPhi(JetPt|1)', 0.008325302494913802),
 ('ExtraLepEta(ExtraLepPt|0)', 0.007896991112538816),
 ('D_VBF2j_ggH_ME', 0.006478209658421673),
 ('nCleanedJetsPt30', 0.0064514401970232355),
 ('nExtraLep', 0.

In [86]:
%%capture
fig, implist = plot_variables([("ZHh", "ZHMET")])

In [87]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [88]:
append_variables(confhandler, impdict, threshold_fscore)

In [89]:
implist

[('PFMET', 0.13909844290014492),
 ('ZZPt', 0.0678211750715374),
 ('ZZMass', 0.06187520903786837),
 ('JetPt(JetPt|0)', 0.058121817979114795),
 ('Z1Pt', 0.05641235274443495),
 ('ZZEta', 0.05418261548180906),
 ('Z2Pt', 0.04968597866884685),
 ('ZZPhi', 0.046601508788881044),
 ('Z2Mass', 0.0448920435542012),
 ('Z1Mass', 0.04318257831952135),
 ('JetEta(JetPt|0)', 0.04132446393399978),
 ('D_VBF1j_ggH_ME', 0.03597309450369765),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.032108216581812773),
 ('D_WHh_ggH_ME', 0.029358207291240848),
 ('D_WHh_ZHh_ME', 0.02917239585268869),
 ('ZZMassErr', 0.02909807127726783),
 ('D_ZHh_ggH_ME', 0.026794009439221077),
 ('JetPhi(JetPt|0)', 0.0204392582407373),
 ('JetEta(JetPt|1)', 0.018692630718347023),
 ('D_VBF2j_ggH_ME', 0.017986547251848825),
 ('JetPt(JetPt|1)', 0.01791222267642796),
 ('JetPhi(JetPt|1)', 0.015645323126091643),
 ('JetPt(JetPt|2)', 0.015459511687539484),
 ('D_VBF2j_ZHh_ME', 0.01047976513434167),
 ('nCleanedJetsPt30', 0.006875023226429819),
 ('D_VBF2j_WH

In [90]:
%%capture
fig, implist = plot_variables([("ZHh", "ttHh")])

In [91]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [92]:
append_variables(confhandler, impdict, threshold_fscore)

In [93]:
implist

[('nCleanedJetsPt30BTagged_bTagSF', 0.09059597119298612),
 ('PFMET', 0.06502452771109488),
 ('ZZMass', 0.061371464356538986),
 ('JetPt(JetPt|0)', 0.060223358730821415),
 ('nCleanedJetsPt30', 0.05970149253731343),
 ('JetPt(JetPt|2)', 0.0472810771318234),
 ('ZZEta', 0.04185366871934036),
 ('JetPt(JetPt|1)', 0.040809936332324395),
 ('JetEta(JetPt|2)', 0.040183696900114814),
 ('JetPt(JetPt|3)', 0.03757436593257489),
 ('D_WHh_ggH_ME', 0.03653063354555892),
 ('ZZPt', 0.03548690115854295),
 ('D_VBF2j_ZHh_ME', 0.034338795532825386),
 ('Z1Pt', 0.030476985700866297),
 ('JetEta(JetPt|1)', 0.027972027972027972),
 ('D_ZHh_ggH_ME', 0.027554535017221583),
 ('D_VBF1j_ggH_ME', 0.027137042062415198),
 ('D_WHh_ZHh_ME', 0.022231499843440142),
 ('JetEta(JetPt|0)', 0.021396513933827365),
 ('Z2Mass', 0.020874647740319382),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.018995929443690638),
 ('ExtraLepPt(ExtraLepPt|0)', 0.018995929443690638),
 ('Z1Mass', 0.016073478760045924),
 ('Z2Pt', 0.013986013986013986),
 ('D_VBF2j_gg

In [94]:
%%capture
fig, implist = plot_variables([("ZHh", "ttHl")])

In [95]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [96]:
append_variables(confhandler, impdict, threshold_fscore)

In [97]:
implist

[('PFMET', 0.11440391320185253),
 ('ZZMass', 0.11021491387833689),
 ('Z1Mass', 0.05711089139824114),
 ('Z2Pt', 0.053858562730915334),
 ('ZZPhi', 0.045220377790498),
 ('ExtraLepPt(ExtraLepPt|0)', 0.044726023833064475),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03921007441327991),
 ('ZZEta', 0.039001925378571056),
 ('D_VBF1j_ggH_ME', 0.03829942238642868),
 ('ZZPt', 0.03369412499349534),
 ('JetPt(JetPt|0)', 0.03171670916376125),
 ('D_VBF2j_ZHh_ME', 0.031638653275745436),
 ('ZZMassErr', 0.030832075766248634),
 ('Z2Mass', 0.026955299994796275),
 ('JetPhi(JetPt|0)', 0.021621480980381955),
 ('JetEta(JetPt|0)', 0.02128323879898007),
 ('JetPt(JetPt|2)', 0.0193838788572618),
 ('Z1Pt', 0.018551282718426394),
 ('JetPt(JetPt|3)', 0.018525264089087785),
 ('D_WHh_ZHh_ME', 0.01829109642504033),
 ('D_VBF2j_WHh_ME', 0.01652182963001509),
 ('D_VBF2j_ggH_ME', 0.01639173648332206),
 ('JetPt(JetPt|1)', 0.015897382525888536),
 ('ExtraLepEta(ExtraLepPt|0)', 0.014544413800281002),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.

In [98]:
%%capture
fig, implist = plot_variables([("ZHl", "ggH")])

In [99]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [100]:
append_variables(confhandler, impdict, threshold_fscore)

In [101]:
implist

[('ZZMass', 0.13273106323835998),
 ('Z1Mass', 0.09458520368874782),
 ('ZZMassErr', 0.07488308322220762),
 ('Z2Pt', 0.06791503108389835),
 ('Z1Pt', 0.060233270101233964),
 ('ZZPt', 0.059519561256878836),
 ('PFMET', 0.05533121724921586),
 ('ZZEta', 0.054842890145183405),
 ('Z2Mass', 0.05471141746332851),
 ('ZZPhi', 0.043724057622598275),
 ('JetPt(JetPt|0)', 0.04301034877824315),
 ('JetEta(JetPt|0)', 0.03831489585485416),
 ('D_VBF1j_ggH_ME', 0.028341753845575943),
 ('ExtraLepPt(ExtraLepPt|0)', 0.02734631782581748),
 ('JetPhi(JetPt|0)', 0.025562045714929663),
 ('D_VBF2j_ggH_ME', 0.01667824878387769),
 ('D_WHh_ggH_ME', 0.01027365099637511),
 ('D_ZHh_ggH_ME', 0.009860451139116879),
 ('JetPhi(JetPt|2)', 0.009541160340326428),
 ('JetEta(JetPt|1)', 0.009015269612906862),
 ('ExtraLepEta(ExtraLepPt|0)', 0.008639633379035741),
 ('JetPt(JetPt|1)', 0.007155870255244821),
 ('JetPhi(JetPt|1)', 0.006874143079841482),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.006122870612099243),
 ('JetPt(JetPt|2)', 0.00582

In [102]:
%%capture
fig, implist = plot_variables([("ZHl", "VBF")])

In [103]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [104]:
append_variables(confhandler, impdict, threshold_fscore)

In [105]:
implist

[('ZZMass', 0.11754224452825707),
 ('Z1Mass', 0.09102248091943099),
 ('JetEta(JetPt|0)', 0.06866035102307487),
 ('Z2Pt', 0.05817717459091854),
 ('PFMET', 0.05526683099218959),
 ('ZZEta', 0.04944614379473168),
 ('Z2Mass', 0.046327918510379235),
 ('ZZPt', 0.045644878686187745),
 ('D_VBF2j_ggH_ME', 0.04430849642146527),
 ('ZZMassErr', 0.044011522584860276),
 ('Z1Pt', 0.039735099337748346),
 ('JetPt(JetPt|0)', 0.039438125501143347),
 ('D_VBF1j_ggH_ME', 0.0364386897514329),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03352834615270395),
 ('ZZPhi', 0.02545065779704808),
 ('JetPhi(JetPt|0)', 0.021619695304843645),
 ('D_ZHh_ggH_ME', 0.01897662815905919),
 ('JetEta(JetPt|2)', 0.018620259555133193),
 ('ExtraLepEta(ExtraLepPt|0)', 0.017966917114602205),
 ('JetEta(JetPt|1)', 0.014789297062928756),
 ('JetPt(JetPt|2)', 0.013779586018471773),
 ('D_WHh_ZHh_ME', 0.013660796483829774),
 ('JetPhi(JetPt|2)', 0.012621388055712291),
 ('D_WHh_ggH_ME', 0.011730466545897307),
 ('nExtraLep', 0.009354675853057345),
 ('JetPh

In [106]:
%%capture
fig, implist = plot_variables([("ZHl", "WHh")])

In [107]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [108]:
append_variables(confhandler, impdict, threshold_fscore)

In [109]:
implist

[('ZZMass', 0.15121786197564277),
 ('Z1Mass', 0.11007273342354533),
 ('Z2Mass', 0.07717354533152909),
 ('PFMET', 0.07632780784844384),
 ('Z2Pt', 0.07341001353179973),
 ('ZZMassErr', 0.06529093369418133),
 ('ZZPt', 0.05192828146143437),
 ('JetPt(JetPt|0)', 0.04507780784844384),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04393606224627875),
 ('Z1Pt', 0.035055818673883625),
 ('JetEta(JetPt|0)', 0.03141914749661705),
 ('ZZEta', 0.020593707713125846),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.019198240866035183),
 ('JetPhi(JetPt|1)', 0.01767591339648173),
 ('ZZPhi', 0.01725304465493911),
 ('D_VBF1j_ggH_ME', 0.01649188092016238),
 ('JetPhi(JetPt|0)', 0.014927266576454669),
 ('JetPt(JetPt|1)', 0.013320365358592693),
 ('D_ZHh_ggH_ME', 0.013151217861975643),
 ('D_WHh_ggH_ME', 0.01226319350473613),
 ('nExtraLep', 0.011671177266576455),
 ('D_WHh_ZHh_ME', 0.010952300405953993),
 ('JetPhi(JetPt|2)', 0.01074086603518268),
 ('ExtraLepEta(ExtraLepPt|0)', 0.008034506089309877),
 ('JetPhi(JetPt|3)', 0.0070619079837618

In [110]:
%%capture
fig, implist = plot_variables([("ZHl", "ZHMET")])

In [111]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [112]:
append_variables(confhandler, impdict, threshold_fscore)

In [113]:
implist

[('ZZMass', 0.16440014903129657),
 ('PFMET', 0.12239195230998509),
 ('Z1Mass', 0.10846684053651266),
 ('Z2Pt', 0.09235283159463488),
 ('Z2Mass', 0.07288561847988077),
 ('ZZPt', 0.05635245901639344),
 ('Z1Pt', 0.045081967213114756),
 ('JetPt(JetPt|0)', 0.040238450074515646),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03851527570789866),
 ('ZZMassErr', 0.03562779433681073),
 ('ZZEta', 0.025055886736214606),
 ('JetEta(JetPt|1)', 0.022075260804769),
 ('D_VBF1j_ggH_ME', 0.021190387481371087),
 ('D_VBF2j_ggH_ME', 0.020491803278688523),
 ('ZZPhi', 0.019700074515648286),
 ('D_VBF2j_WHh_ME', 0.015368852459016393),
 ('JetEta(JetPt|0)', 0.013505961251862892),
 ('JetPhi(JetPt|0)', 0.013040238450074515),
 ('D_ZHh_ggH_ME', 0.00912816691505216),
 ('JetPhi(JetPt|1)', 0.008476154992548435),
 ('ExtraLepEta(ExtraLepPt|0)', 0.007311847988077496),
 ('nCleanedJetsPt30', 0.006985842026825634),
 ('D_WHh_ZHh_ME', 0.00647354694485842),
 ('nExtraLep', 0.005868107302533532),
 ('JetPt(JetPt|2)', 0.005448956780923994),
 ('Z2F

In [114]:
%%capture
fig, implist = plot_variables([("ZHl", "ttHh")])

In [115]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [116]:
append_variables(confhandler, impdict, threshold_fscore)

In [117]:
implist

[('ZZMass', 0.10585170762511183),
 ('Z1Mass', 0.07817186759985266),
 ('JetPt(JetPt|0)', 0.07030468873335789),
 ('Z2Mass', 0.06504236173235804),
 ('Z2Pt', 0.06475293374730305),
 ('JetPt(JetPt|1)', 0.041177708782823765),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04073041098773878),
 ('PFMET', 0.0367573541019839),
 ('ZZMassErr', 0.031100352575909068),
 ('D_WHh_ZHh_ME', 0.031100352575909068),
 ('JetPhi(JetPt|0)', 0.02796926801031416),
 ('ZZPt', 0.027811398200284165),
 ('D_WHh_ggH_ME', 0.02568015576487923),
 ('Z1Pt', 0.02528548123980424),
 ('JetPt(JetPt|2)', 0.025180234699784244),
 ('ExtraLepPt(ExtraLepPt|0)', 0.025101299794769247),
 ('JetPt(JetPt|3)', 0.02428563910961427),
 ('JetEta(JetPt|1)', 0.022917434089354313),
 ('JetPhi(JetPt|1)', 0.021601852339104353),
 ('JetEta(JetPt|0)', 0.02131242435404936),
 ('ZZPhi', 0.020154712413829396),
 ('ExtraLepEta(ExtraLepPt|0)', 0.019602168078724412),
 ('JetPhi(JetPt|2)', 0.019102247013629427),
 ('ZZEta', 0.01797084670841446),
 ('JetEta(JetPt|2)', 0.0125769

In [118]:
%%capture
fig, implist = plot_variables([("ZHl", "ttHl")])

In [119]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [120]:
append_variables(confhandler, impdict, threshold_fscore)

In [121]:
implist

[('PFMET', 0.09440200798851199),
 ('Z1Mass', 0.08365009834799503),
 ('Z2Mass', 0.08220202970954156),
 ('Z1Pt', 0.0667680314713584),
 ('Z2Pt', 0.05984143648408934),
 ('JetPt(JetPt|0)', 0.059551822756398656),
 ('ZZMass', 0.051587445244904606),
 ('ZZPt', 0.05093581435760055),
 ('ZZEta', 0.04356273154014167),
 ('ZZPhi', 0.03421062158346306),
 ('JetEta(JetPt|0)', 0.03142308945444014),
 ('ZZMassErr', 0.027983926438113167),
 ('JetPhi(JetPt|0)', 0.02584802519639431),
 ('D_VBF1j_ggH_ME', 0.022312324270837104),
 ('JetPt(JetPt|1)', 0.01910243878893193),
 ('ExtraLepPt(ExtraLepPt|0)', 0.01843874066297409),
 ('D_VBF2j_ggH_ME', 0.018330135515090083),
 ('JetEta(JetPt|1)', 0.016544184194330812),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.015144384510492465),
 ('JetPhi(JetPt|1)', 0.014082467508959924),
 ('D_WHh_ggH_ME', 0.013949727883768357),
 ('D_WHh_ZHh_ME', 0.01339463490569453),
 ('JetPt(JetPt|2)', 0.01317742460992651),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.012248247233585538),
 ('D_ZHh_ggH_ME', 0.012163776563

In [122]:
%%capture
fig, implist = plot_variables([("ZHMET", "ggH")])

In [123]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [124]:
append_variables(confhandler, impdict, threshold_fscore)

In [125]:
implist

[('ZZPt', 0.14483097835807549),
 ('PFMET', 0.13155070649257736),
 ('ZZPhi', 0.07440529422285816),
 ('D_VBF1j_ggH_ME', 0.06988910749418709),
 ('ZZMass', 0.06868181005186907),
 ('Z2Pt', 0.0672509390091218),
 ('JetPt(JetPt|0)', 0.06340547308173851),
 ('Z1Pt', 0.05669826506886067),
 ('ZZEta', 0.051511357538901806),
 ('Z2Mass', 0.04708460025040243),
 ('Z1Mass', 0.044535861205508856),
 ('ZZMassErr', 0.04301556072258988),
 ('JetPhi(JetPt|0)', 0.025263816848506527),
 ('JetEta(JetPt|0)', 0.022804507243784654),
 ('D_VBF2j_ggH_ME', 0.01560543730996244),
 ('JetPt(JetPt|1)', 0.01046324450008943),
 ('D_WHh_ZHh_ME', 0.008808799856912896),
 ('JetEta(JetPt|1)', 0.008764085136827044),
 ('JetPhi(JetPt|1)', 0.008406367376140225),
 ('D_VBF2j_WHh_ME', 0.006260060812019317),
 ('nCleanedJetsPt30', 0.005857628331246647),
 ('Z2Flav', 0.00433732784832767),
 ('D_WHh_ggH_ME', 0.004069039527812556),
 ('D_ZHh_ggH_ME', 0.003219459846181363),
 ('D_VBF2j_ZHh_ME', 0.003174745126095511),
 ('nCleanedJetsPt30BTagged_bTagSF

In [126]:
%%capture
fig, implist = plot_variables([("ZHMET", "VBF")])

In [127]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [128]:
append_variables(confhandler, impdict, threshold_fscore)

In [129]:
implist

[('PFMET', 0.1393362062448023),
 ('JetPt(JetPt|0)', 0.07926967566341574),
 ('ZZPt', 0.07911846979662811),
 ('Z1Pt', 0.07031072805624858),
 ('D_VBF1j_ggH_ME', 0.06619036818628563),
 ('ZZEta', 0.05534134724427308),
 ('ZZMassErr', 0.05477432524381946),
 ('ZZMass', 0.05428290617675966),
 ('Z1Mass', 0.047894458304982235),
 ('JetEta(JetPt|0)', 0.04615559083692447),
 ('Z2Pt', 0.045701973236561576),
 ('ZZPhi', 0.04120359869962954),
 ('D_VBF2j_ggH_ME', 0.0376124593634233),
 ('Z2Mass', 0.03602479776215317),
 ('JetPhi(JetPt|0)', 0.026612232554623118),
 ('JetPhi(JetPt|1)', 0.02011038028275497),
 ('JetPt(JetPt|1)', 0.017993498147728132),
 ('D_VBF2j_ZHh_ME', 0.017350873213880697),
 ('JetEta(JetPt|1)', 0.01311710894382702),
 ('D_WHh_ZHh_ME', 0.012172072276404324),
 ('JetPhi(JetPt|2)', 0.010395403341649656),
 ('D_WHh_ggH_ME', 0.008467528540107357),
 ('D_VBF2j_WHh_ME', 0.00839192560671354),
 ('JetEta(JetPt|2)', 0.0032131246692371665),
 ('JetPt(JetPt|2)', 0.0031753232025402587),
 ('Z2Flav', 0.0015876616

In [130]:
%%capture
fig, implist = plot_variables([("ZHMET", "WHh")])

In [131]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [132]:
append_variables(confhandler, impdict, threshold_fscore)

In [133]:
implist

[('PFMET', 0.13626567744214804),
 ('ZZPt', 0.07804274863098393),
 ('JetPt(JetPt|0)', 0.06369899311075782),
 ('Z1Pt', 0.05560854972619678),
 ('ZZEta', 0.050909733262674436),
 ('ZZMass', 0.04889595477830772),
 ('JetEta(JetPt|0)', 0.04847200141317788),
 ('Z2Pt', 0.0450803744921392),
 ('D_WHh_ZHh_ME', 0.044833068362480126),
 ('D_VBF1j_ggH_ME', 0.04409114997350291),
 ('ZZPhi', 0.044020491079314605),
 ('ZZMassErr', 0.042430665960077725),
 ('Z2Mass', 0.03949832185126303),
 ('Z1Mass', 0.038261791202967674),
 ('D_WHh_ggH_ME', 0.03607136548313019),
 ('JetPhi(JetPt|0)', 0.029394099982335276),
 ('D_ZHh_ggH_ME', 0.023458752870517578),
 ('JetEta(JetPt|1)', 0.017382087970323264),
 ('D_VBF2j_ggH_ME', 0.0172054407348525),
 ('JetPhi(JetPt|1)', 0.017064122946475888),
 ('JetPt(JetPt|1)', 0.016781487369722663),
 ('JetPt(JetPt|2)', 0.013107224871930754),
 ('D_VBF2j_WHh_ME', 0.008443737855502561),
 ('D_VBF2j_ZHh_ME', 0.00794912559618442),
 ('JetPhi(JetPt|2)', 0.006323971029853383),
 ('nCleanedJetsPt30', 0.00

In [134]:
%%capture
fig, implist = plot_variables([("ZHMET", "ttHh")])

In [135]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [136]:
append_variables(confhandler, impdict, threshold_fscore)

In [137]:
implist

[('PFMET', 0.22811816192560175),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.08588621444201312),
 ('JetPt(JetPt|0)', 0.08096280087527352),
 ('nCleanedJetsPt30', 0.07494529540481401),
 ('ZZMass', 0.054978118161925604),
 ('JetPt(JetPt|2)', 0.04950765864332604),
 ('D_WHh_ggH_ME', 0.046225382932166305),
 ('JetEta(JetPt|0)', 0.04513129102844639),
 ('JetPt(JetPt|1)', 0.0437636761487965),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04321663019693654),
 ('JetEta(JetPt|2)', 0.029814004376367616),
 ('JetPt(JetPt|3)', 0.02762582056892779),
 ('Z1Pt', 0.026531728665207876),
 ('D_VBF2j_ggH_ME', 0.024343544857768053),
 ('ZZEta', 0.024070021881838075),
 ('Z2Pt', 0.02188183807439825),
 ('D_ZHh_ggH_ME', 0.021334792122538294),
 ('ZZPt', 0.0175054704595186),
 ('D_WHh_ZHh_ME', 0.013949671772428883),
 ('Z2Mass', 0.012035010940919038),
 ('JetPhi(JetPt|0)', 0.010667396061269147),
 ('ZZPhi', 0.006838074398249453),
 ('JetPhi(JetPt|2)', 0.004102844638949671),
 ('D_VBF2j_WHh_ME', 0.0035557986870897156),
 ('JetPhi(JetPt|1)', 0.0013

In [138]:
%%capture
fig, implist = plot_variables([("ZHMET", "ttHl")])

In [139]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [140]:
append_variables(confhandler, impdict, threshold_fscore)

In [141]:
implist

[('ZZMass', 0.20171001926782273),
 ('PFMET', 0.07496387283236994),
 ('Z1Mass', 0.06328275529865125),
 ('JetPt(JetPt|3)', 0.04588150289017341),
 ('Z2Mass', 0.045640655105973024),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04359344894026975),
 ('nCleanedJetsPt30', 0.04335260115606936),
 ('JetPt(JetPt|0)', 0.040402215799614644),
 ('nExtraLep', 0.040161368015414256),
 ('ZZMassErr', 0.03395953757225433),
 ('JetEta(JetPt|0)', 0.030647880539499038),
 ('Z2Pt', 0.02980491329479769),
 ('ZZEta', 0.029503853564547208),
 ('D_VBF2j_ggH_ME', 0.028901734104046242),
 ('Z1Pt', 0.02661368015414258),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.020712909441233142),
 ('ZZPhi', 0.02005057803468208),
 ('D_WHh_ZHh_ME', 0.01740125240847784),
 ('ZZPt', 0.016919556840077073),
 ('D_WHh_ggH_ME', 0.016738921001926782),
 ('JetPhi(JetPt|0)', 0.016678709055876685),
 ('JetPt(JetPt|2)', 0.016377649325626204),
 ('JetPt(JetPt|1)', 0.014270231213872832),
 ('D_ZHh_ggH_ME', 0.014270231213872832),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0105370905587

In [142]:
%%capture
fig, implist = plot_variables([("ttHh", "ggH")])

In [143]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [144]:
append_variables(confhandler, impdict, threshold_fscore)

In [145]:
implist

[('JetPt(JetPt|0)', 0.07197969204048231),
 ('PFMET', 0.058853001102241224),
 ('JetPt(JetPt|1)', 0.058685994856207624),
 ('ZZPt', 0.057617154881592574),
 ('JetEta(JetPt|0)', 0.04970105881959985),
 ('ZZMass', 0.04849861384815792),
 ('Z1Pt', 0.04552590266875981),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04295400647984235),
 ('JetEta(JetPt|1)', 0.04248638899094826),
 ('D_VBF2j_ggH_ME', 0.04111693777347273),
 ('JetPt(JetPt|2)', 0.0384448378369351),
 ('Z2Pt', 0.03390226794482114),
 ('D_ZHh_ggH_ME', 0.03296703296703297),
 ('JetPt(JetPt|3)', 0.03233240923210528),
 ('D_WHh_ggH_ME', 0.029626908046360933),
 ('JetEta(JetPt|2)', 0.028725074317779486),
 ('JetPhi(JetPt|0)', 0.02819065433047196),
 ('ZZEta', 0.025919369384414977),
 ('Z2Mass', 0.0243161094224924),
 ('D_VBF1j_ggH_ME', 0.02428270817328568),
 ('Z1Mass', 0.02351447944153111),
 ('ZZPhi', 0.02324726944787735),
 ('JetPhi(JetPt|1)', 0.01810347707004242),
 ('nCleanedJetsPt30', 0.017502254584321454),
 ('ZZMassErr', 0.014896957146197268),
 ('D_WHh_ZH

In [146]:
%%capture
fig, implist = plot_variables([("ttHh", "VBF")])

In [147]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [148]:
append_variables(confhandler, impdict, threshold_fscore)

In [149]:
implist

[('JetEta(JetPt|2)', 0.07562189054726368),
 ('JetEta(JetPt|0)', 0.06135986733001658),
 ('D_VBF2j_ggH_ME', 0.05740345889599621),
 ('D_ZHh_ggH_ME', 0.050888415067519545),
 ('JetPt(JetPt|0)', 0.049372186685619525),
 ('PFMET', 0.04619758351101635),
 ('JetEta(JetPt|1)', 0.0445392087183132),
 ('ZZMass', 0.04318881781568349),
 ('JetPt(JetPt|1)', 0.042620232172470976),
 ('JetPt(JetPt|2)', 0.04041696280502251),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.033665008291873966),
 ('Z1Pt', 0.027315801942667615),
 ('ZZEta', 0.02703150912106136),
 ('D_WHh_ggH_ME', 0.026699834162520728),
 ('JetPhi(JetPt|0)', 0.025657427149964464),
 ('Z1Mass', 0.02515991471215352),
 ('ZZPt', 0.025112532575219143),
 ('Z2Mass', 0.025065150438284767),
 ('JetPhi(JetPt|1)', 0.0248045486851457),
 ('JetPt(JetPt|3)', 0.023833214877990998),
 ('ZZPhi', 0.023619995261786306),
 ('JetEta(JetPt|3)', 0.02155887230514096),
 ('ZZMassErr', 0.02125088841506752),
 ('D_VBF1j_ggH_ME', 0.020161099265576878),
 ('Z2Pt', 0.020090026060175313),
 ('nCle

In [150]:
%%capture
fig, implist = plot_variables([("ttHh", "WHh")])

In [151]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [152]:
append_variables(confhandler, impdict, threshold_fscore)

In [153]:
implist

[('PFMET', 0.06787287453227869),
 ('ZZMass', 0.05970255292947473),
 ('JetPt(JetPt|0)', 0.05828162743333491),
 ('JetPt(JetPt|1)', 0.04812201013593521),
 ('Z1Pt', 0.03971486761710794),
 ('ZZEta', 0.0381992137545588),
 ('D_WHh_ggH_ME', 0.03791502865533084),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.037536115189693554),
 ('JetEta(JetPt|0)', 0.036801970350021314),
 ('ZZPt', 0.03445744328139061),
 ('Z2Pt', 0.03381802680812769),
 ('JetPt(JetPt|2)', 0.03239710131198788),
 ('JetPhi(JetPt|1)', 0.03185241320513428),
 ('D_VBF2j_WHh_ME', 0.03057358025860844),
 ('JetEta(JetPt|1)', 0.030407805617392127),
 ('D_VBF2j_ggH_ME', 0.028868469663240658),
 ('ZZPhi', 0.028607966655615023),
 ('JetEta(JetPt|2)', 0.028134324823568417),
 ('Z1Mass', 0.028063278548761426),
 ('JetPhi(JetPt|0)', 0.027258087434282197),
 ('D_WHh_ZHh_ME', 0.027258087434282197),
 ('ZZMassErr', 0.026547624686212287),
 ('Z2Mass', 0.025600341022119073),
 ('JetPt(JetPt|3)', 0.023516316961114007),
 ('D_ZHh_ggH_ME', 0.0201534599535831),
 ('D_VBF2j_

In [154]:
%%capture
fig, implist = plot_variables([("ttHh", "ttHl")])

In [155]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [156]:
append_variables(confhandler, impdict, threshold_fscore)

In [157]:
implist

[('PFMET', 0.10206678091083762),
 ('ZZMass', 0.07233733187931661),
 ('Z1Mass', 0.054681414550553045),
 ('ZZMassErr', 0.042841564106558656),
 ('ZZPt', 0.04027107026016513),
 ('JetPt(JetPt|3)', 0.04019317650724412),
 ('nCleanedJetsPt30', 0.03596094926520226),
 ('JetPt(JetPt|0)', 0.03385781793633484),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03281923456405463),
 ('JetPhi(JetPt|2)', 0.032299942877914524),
 ('Z2Pt', 0.030352599054889132),
 ('JetPt(JetPt|2)', 0.029625590694292984),
 ('JetPhi(JetPt|1)', 0.02915822817676689),
 ('D_WHh_ZHh_ME', 0.028301396894635716),
 ('Z1Pt', 0.027937892714337643),
 ('JetPhi(JetPt|0)', 0.027885963545723633),
 ('Z2Mass', 0.02697720309497845),
 ('ZZEta', 0.026250194734382302),
 ('JetPhi(JetPt|3)', 0.02599054889131225),
 ('ZZPhi', 0.02588669055408423),
 ('JetEta(JetPt|1)', 0.025808796801163214),
 ('JetEta(JetPt|2)', 0.02305655086462066),
 ('JetPt(JetPt|1)', 0.02287479877447162),
 ('JetEta(JetPt|0)', 0.0212909591317443),
 ('D_WHh_ggH_ME', 0.02040816326530612),
 ('nCleanedJ

In [158]:
%%capture
fig, implist = plot_variables([("ttHl", "ggH")])

In [159]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [160]:
append_variables(confhandler, impdict, threshold_fscore)

In [161]:
implist

[('ZZMass', 0.13970722781335773),
 ('PFMET', 0.1207227813357731),
 ('ZZPt', 0.05546660567246112),
 ('JetPt(JetPt|0)', 0.05519213174748399),
 ('Z1Mass', 0.048284537968892954),
 ('D_VBF1j_ggH_ME', 0.04707227813357731),
 ('ExtraLepPt(ExtraLepPt|0)', 0.042703568161024705),
 ('Z2Pt', 0.04153705397987191),
 ('ZZMassErr', 0.03989021043000915),
 ('Z1Pt', 0.036207685269899356),
 ('ZZPhi', 0.03156450137236962),
 ('Z2Mass', 0.030237877401646844),
 ('ZZEta', 0.02884263494967978),
 ('JetEta(JetPt|0)', 0.024382433668801464),
 ('JetPt(JetPt|1)', 0.021340347666971637),
 ('JetEta(JetPt|1)', 0.021180237877401645),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.019876486733760294),
 ('JetPhi(JetPt|0)', 0.019304666056724613),
 ('D_VBF2j_ggH_ME', 0.0192131747483989),
 ('JetPt(JetPt|2)', 0.015667886550777676),
 ('nCleanedJetsPt30', 0.014569990850869168),
 ('nExtraLep', 0.013838060384263495),
 ('D_WHh_ZHh_ME', 0.013014638609332113),
 ('D_ZHh_ggH_ME', 0.01187099725526075),
 ('JetPt(JetPt|3)', 0.011550777676120769),
 (

In [162]:
%%capture
fig, implist = plot_variables([("ttHl", "VBF")])

In [163]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [164]:
append_variables(confhandler, impdict, threshold_fscore)

In [165]:
implist

[('ZZMass', 0.10641869387972609),
 ('PFMET', 0.08297141030081799),
 ('D_VBF2j_ggH_ME', 0.050065279369054916),
 ('JetEta(JetPt|0)', 0.044016945991313845),
 ('JetPt(JetPt|0)', 0.041885374756867655),
 ('Z1Mass', 0.04145906050997842),
 ('D_VBF1j_ggH_ME', 0.03922091071380992),
 ('ZZMassErr', 0.03887453038821241),
 ('ZZPt', 0.03796861261357278),
 ('ExtraLepPt(ExtraLepPt|0)', 0.036103487783432364),
 ('JetEta(JetPt|2)', 0.03586368601955717),
 ('D_ZHh_ggH_ME', 0.031600543550664786),
 ('ZZEta', 0.03130745250592843),
 ('JetEta(JetPt|1)', 0.030374890090858225),
 ('Z2Pt', 0.0289893687884682),
 ('ZZPhi', 0.02704431003703605),
 ('JetPt(JetPt|2)', 0.025632144094215447),
 ('Z1Pt', 0.024726226319575818),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.02336734965761637),
 ('D_WHh_ZHh_ME', 0.021182489142309024),
 ('Z2Mass', 0.019956835682502463),
 ('ExtraLepEta(ExtraLepPt|0)', 0.019850257120780156),
 ('JetPhi(JetPt|0)', 0.01878447150355706),
 ('JetPt(JetPt|1)', 0.01878447150355706),
 ('D_WHh_ggH_ME', 0.01787855372

In [166]:
%%capture
fig, implist = plot_variables([("ttHl", "WHh")])

In [167]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [168]:
append_variables(confhandler, impdict, threshold_fscore)

In [169]:
implist

[('ZZMass', 0.13358230393565068),
 ('PFMET', 0.11031312841137604),
 ('ExtraLepPt(ExtraLepPt|0)', 0.06664751508187303),
 ('ZZMassErr', 0.047441211474535235),
 ('D_VBF1j_ggH_ME', 0.04551237329174704),
 ('JetPt(JetPt|0)', 0.042680674682972874),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04173677514671482),
 ('ZZPt', 0.0410391102720893),
 ('Z1Mass', 0.03389830508474576),
 ('D_VBF2j_WHh_ME', 0.02774243854393237),
 ('JetEta(JetPt|0)', 0.02585463947141626),
 ('Z2Pt', 0.024418270611893136),
 ('ZZPhi', 0.02281774531128165),
 ('JetEta(JetPt|1)', 0.02236631509828867),
 ('JetPt(JetPt|2)', 0.022079041326384045),
 ('D_WHh_ggH_ME', 0.022079041326384045),
 ('Z1Pt', 0.02175072844420733),
 ('D_WHh_ZHh_ME', 0.02166865022366315),
 ('ZZEta', 0.02010916403332376),
 ('nCleanedJetsPt30', 0.0199450075922354),
 ('JetPt(JetPt|3)', 0.01818032585053556),
 ('JetPhi(JetPt|3)', 0.01760577830672631),
 ('Z2Mass', 0.017441621865637955),
 ('JetPhi(JetPt|1)', 0.01711330898346124),
 ('JetPt(JetPt|1)', 0.014938236139040505),
 ('

In [170]:
# save the variable configuration
confhandler.save_configuration(out_path)

In [171]:
df = df.fillna(0.0)

In [172]:
df.to_csv("input_parameters_table_inclusive.csv")

In [173]:
# now plot the data contained in the table to have a global picture of the relevant input variables
datacol_labels = [col for col in df.columns.tolist() if col != "discriminant"]
variable_data = df[datacol_labels].as_matrix().transpose()
datacol_labels = np.concatenate([[''], np.array(datacol_labels)])

In [174]:
discriminant_labels = np.concatenate([[''], df["discriminant"].as_matrix()])

In [175]:
fig = plt.figure(figsize = (15, 10))
ax = fig.add_subplot(111)
cax = ax.matshow(variable_data, interpolation = 'nearest', cmap = 'Blues', vmin = np.min(variable_data), vmax = np.max(variable_data))
ax.set_xticklabels(discriminant_labels, rotation = 'vertical')
ax.set_yticklabels(datacol_labels)
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

In [176]:
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "input_variables_inclusive_fullmassrange.pdf"))