In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from trainlib.FileCollection import FileCollection
from trainlib.config import Config
from trainlib.ConfigFileHandler import ConfigFileHandler
from trainlib.ConfigFileUtils import ConfigFileUtils
import trainlib.cuts as cuts
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import copy
import re
from scipy import interpolate
import scipy.integrate as integrate
import pickle
import os

Welcome to JupyROOT 6.10/09


In [3]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error



In [4]:
#candidate_branches = ["PFMET", "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF", "nExtraLep", "ZZMass", "nExtraZ", "Z1Mass", "Z2Mass", "Z1Pt", "Z2Pt", "ZZMassErr", "ZZPt", "ZZEta", "ZZPhi", "Z1Flav", "Z2Flav", "costhetastar", "helphi", "helcosthetaZ1", "helcosthetaZ2", "phistarZ1", "phistarZ2", "xi", "xistar"]
candidate_branches = ["PFMET", "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF", "nExtraLep", "ZZMass", "nExtraZ", "Z1Mass", "Z2Mass", "Z1Pt", "Z2Pt", "ZZMassErr", "ZZPt", "ZZEta", "ZZPhi", "Z1Flav", "Z2Flav", "D_VBF2j_ggH_ME", "D_VBF1j_ggH_ME", "D_WHh_ggH_ME", "D_ZHh_ggH_ME", "D_WHh_ZHh_ME", "D_VBF2j_WHh_ME", "D_VBF2j_ZHh_ME"]
#list_branches = ["Jet", "Lep", "ExtraLep"]
MELA_branches = []
list_branches = ["Jet", "ExtraLep"]
pt_limits = [30.0, 0.0, 0.0]

In [5]:
allbranches = ["JetPt", "JetEta", "JetPhi", "LepPt", "LepEta", "LepPhi", "ExtraLepPt", "ExtraLepEta", "ExtraLepPhi"] + candidate_branches + MELA_branches + ["LHEAssociatedParticleId", "GenAssocLep1Id", "GenAssocLep2Id", "training_weight"]

In [6]:
#MC_path = "/data_CMS/cms/wind/CJLST_NTuples_randomizeda/"
MC_path = "/data_CMS/cms/wind/CJLST_NTuples/"

In [7]:
# these are the cuts without any m4l restriction imposed
def WHhadr0j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] == 0

def WHhadr01j_cut(row):
    return cuts.WHhadr_cut(row) and (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def WHhadr1j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] == 1

def WHhadr2j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] >= 2

def ZHhadr0j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] == 0

def ZHhadr01j_cut(row):
    return cuts.ZHhadr_cut(row) and (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def ZHhadr1j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] == 1

def ZHhadr2j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] >= 2

def mZZ0j_cut(row):
    return row["nCleanedJetsPt30"] == 0

def mZZ01j_cut(row):
    return (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def mZZ1j_cut(row):
    return row["nCleanedJetsPt30"] == 1

def mZZ2j_cut(row):
    return row["nCleanedJetsPt30"] >= 2

In [8]:
collections = {"VBF2j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ2j_cut},
            "VBF1j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ1j_cut},
            "VBF0j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ0j_cut},
            "VBF01j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ01j_cut},
            "VBF": {MC_path + "VBFH125/ZZ4lAnalysis.root": cuts.mZZ_cut},
            "ggH2j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ2j_cut},
            "ggH1j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ1j_cut},
            "ggH0j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ0j_cut},
            "ggH01j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ01j_cut},
            "ggH" : {MC_path + "ggH125/ZZ4lAnalysis.root": cuts.mZZ_cut},
            "WHh2j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr2j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr2j_cut},
            "WHh1j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr1j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr1j_cut},
            "WHh0j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr0j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr0j_cut},
            "WHh": {MC_path + "WplusH125/ZZ4lAnalysis.root": cuts.WHhadr_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": cuts.WHhadr_cut},
            "WHh01j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr01j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr01j_cut},
            "WHl": {MC_path + "WplusH125/ZZ4lAnalysis.root": cuts.WHlept_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": cuts.WHlept_cut},
            "ZHh2j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr2j_cut},
            "ZHh1j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr1j_cut},
            "ZHh01j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr01j_cut},
            "ZHh0j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr0j_cut},
            "ZHh": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHhadr_cut},
            "ZHl": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHlept_cut},
            "ttHh": {MC_path + "ttH125/ZZ4lAnalysis.root": cuts.ttHhadr_cut},
            "ttHl": {MC_path + "ttH125/ZZ4lAnalysis.root": cuts.ttHlept_cut},
            "ZHMET": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHMET_cut}
          }

In [9]:
# all the model combinations for which neural networks are currently trained
discriminant_pairs = [("VBF", "ggH"), ("WHh", "ggH"), ("ZHh", "ggH"), ("WHh", "ZHh"), ("VBF", "WHh"),
                     ("VBF", "ZHh"), ("WHl", "ggH"), ("WHl", "VBF"), ("WHl", "WHh"), ("WHl", "ZHh"),
                     ("WHl", "ZHl"), ("WHl", "ZHMET"), ("WHl", "ttHh"), ("WHl", "ttHl"), ("ZHh", "ZHl"),
                     ("ZHh", "ZHMET"), ("ZHh", "ttHh"), ("ZHh", "ttHl"), ("ZHl", "ggH"), ("ZHl", "VBF"),
                     ("ZHl", "WHh"), ("ZHl", "ZHMET"), ("ZHl", "ttHh"), ("ZHl", "ttHl"), ("ZHMET", "ggH"),
                     ("ZHMET", "VBF"), ("ZHMET", "WHh"), ("ZHMET", "ttHh"), ("ZHMET", "ttHl"), ("ttHh", "ggH"),
                      ("ttHh", "VBF"), ("ttHh", "WHh"), ("ttHh", "ttHl"), ("ttHl", "ggH"), ("ttHl", "VBF"),
                     ("ttHl", "WHh")]

In [10]:
def extract_order(df, col_basename, sorted_column, columns, order):
    def get_index(row, order, col_basename, sorted_column):
        sorted_column = row[col_basename + sorted_column]
        if order >= len(sorted_column):
            return -1
        else:
            return np.flipud(np.argsort(sorted_column))[order]
    
    index_column = pd.DataFrame(df.transform(lambda row: get_index(row, order, col_basename, sorted_column), axis = 1, raw = True))
    index_column.columns = ["index"]
    df_temp = pd.concat([index_column, df], axis = 1)
    
    def get_element(row, column_name):
        if row["index"] == -1:
            return 0
        else:
            return row[column_name][row["index"]]
        
    extracted_cols = pd.DataFrame()
    for column in columns:
        extracted_col = pd.DataFrame(df_temp.transform(lambda row: get_element(row, col_basename + column), axis = 1, raw = True))
        extracted_col.columns = [col_basename + column + "(" + col_basename + "Pt|" + str(order) + ")"]
        extracted_cols = pd.concat([extracted_cols, extracted_col], axis = 1)
        
    return extracted_cols

In [11]:
def prepare_data(df, col_basenames, sorted_column, columns, orders, pt_limits):
    all_extracted = pd.DataFrame()
    for col_basename, pt_limit in zip(col_basenames, pt_limits):
        for order in orders:
            extracted = extract_order(df, col_basename, sorted_column, columns, order)
            mask = extracted[col_basename + "Pt(" + col_basename + "Pt|" + str(order) + ")"] < pt_limit
            extracted[mask] = 0.0

            all_extracted = pd.concat([all_extracted, extracted], axis = 1)
            
    return all_extracted

In [12]:
def get_data(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits):
    H1_df = H1_coll.get_data(read_branches, 0.0, 1.0)
    H0_df = H0_coll.get_data(read_branches, 0.0, 1.0)
    
    H1_list_df = prepare_data(H1_df, list_branches, "Pt", ["Pt", "Eta", "Phi"], range(4), pt_limits)
    H0_list_df = prepare_data(H0_df, list_branches, "Pt", ["Pt", "Eta", "Phi"], range(4), pt_limits)
    
    list_branches_unrolled = H1_list_df.columns
            
    H1_df = pd.concat([H1_df, H1_list_df], axis = 1)
    H0_df = pd.concat([H0_df, H0_list_df], axis = 1)        
    
    complete_input_branches = np.concatenate([input_branches, list_branches_unrolled])
            
    H1_df = H1_df[complete_input_branches]
    H0_df = H0_df[complete_input_branches]
    
    return H1_df, H0_df

In [13]:
def get_data_dmatrix(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits):
    H1_df, H0_df = get_data(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits)
    
    complete_input_branches = H1_df.columns
    print "number of input variables: " + str(len(complete_input_branches))
    print "final list of inputs: " + str(complete_input_branches)
    
    # try with the same weights as used later in the neural network training, to balance out some (very)
    # unbalanced datasets
    H1_class_weight = 1.0 + float(len(H0_df)) / float(len(H1_df))
    H0_class_weight = 1.0 + float(len(H1_df)) / float(len(H0_df))
    
    print "using class weights: " + str(H1_class_weight) + " (H1), " + str(H0_class_weight) + " (H0)"
    
    H1_weights = np.full(len(H1_df), H1_class_weight)
    H0_weights = np.full(len(H0_df), H0_class_weight)
    
    H1_data = H1_df.as_matrix()
    H0_data = H0_df.as_matrix()
    H1_target = np.ones(np.shape(H1_data)[0])
    H0_target = np.zeros(np.shape(H0_data)[0])
    
    target = np.concatenate([H1_target, H0_target])
    data = np.concatenate([H1_data, H0_data])
    weights = np.concatenate([H1_weights, H0_weights])
    
    dmatrix = xgb.DMatrix(data, label = target, feature_names = complete_input_branches, weight = weights)
    
    return dmatrix

In [14]:
def get_feature_correlation(source, corr_branches, mandatory_branches, optional_branches, list_branches, pt_limits):    
    coll = FileCollection(collections[source], 0.0, 0.5)
    
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if "0j" in source and ("0j" in optional_branch):
            input_branches.append(optional_branch)
            
        if "1j" in source and ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
        if "2j" in source and ("2j" in optional_branch):
            input_branches.append(optional_branch)

    df, _ = get_data(coll, coll, allbranches, input_branches, list_branches, pt_limits)

    df = df[corr_branches]
    
    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(111)
    
    cax = ax.matshow(df.corr(), vmin = -1.0, vmax = 1.0, cmap = "RdBu")
    
    fig.colorbar(cax)
    
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    
    ax.set_yticklabels([''] + corr_branches)
    ax.set_xticklabels([''] + corr_branches, rotation = 'vertical')
    
    return fig

In [15]:
def get_interpolating_function(data, bins):
    bin_centers = [np.mean([bins[i], bins[i + 1]]) for i in range(len(bins) - 1)]
    intf = interpolate.interp1d(bin_centers, data, kind = "linear")
    interpolated_function = lambda x: intf(x) if x > bin_centers[0] and x < bin_centers[-1] else 0
    
    return interpolated_function

In [16]:
def get_binned_data(df, branch):
    data = df[branch].as_matrix()
    weights = df["training_weight"].as_matrix()
    
    # set the bin width
    q75, q25 = np.percentile(data, [75, 25])
    bin_width = max(2 * (q75 - q25) / len(data)**0.33, 0.005)

    data_max = np.max(data)
    data_min = np.min(data)
    bins = np.arange(data_min, data_max + bin_width, bin_width)
    
    weights = weights / (np.sum(weights) * bin_width)
    
    hist = np.histogram(data, bins = bins, weights = weights)
    return hist

In [17]:
def get_feature_importance_list_BDT(disc_pair, mandatory_branches, optional_branches, list_branches, pt_limits):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    # first assemble the list of branches that can serve as input: it will *always* contain the mandatory branches,
    # and *can* contain some of the optional branches, if the name of the categories allows it
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if ("0j" in H1_name or "0j" in H0_name) and ("0j" in optional_branch):
            input_branches.append(optional_branch)  
        elif ("1j" in H1_name or "1j" in H0_name) and ("1j" in optional_branch):
            input_branches.append(optional_branch)
        elif ("2j" in H1_name or "2j" in H0_name) and not ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
        # the fully inclusive categories (i.e. those with NO "xxj" in their name, can not use MELA, since there may
        # be events with low number of jets contained)
    
    # get the training data for the BDT ...
    H1_coll_train = FileCollection(collections[H1_name], 0.0, 0.5)
    H0_coll_train = FileCollection(collections[H0_name], 0.0, 0.5)
    
    dtrain = get_data_dmatrix(H1_coll_train, H0_coll_train, allbranches, input_branches, list_branches, pt_limits)
    
    # ... and the validation data as well
    H1_coll_val = FileCollection(collections[H1_name], 0.5, 1.0)
    H0_coll_val = FileCollection(collections[H0_name], 0.5, 1.0)
    dval = get_data_dmatrix(H1_coll_val, H0_coll_val, allbranches, input_branches, list_branches, pt_limits)
    
    evallist = [(dtrain, 'train'), (dval, 'eval')]
    
    # perform the training
    # try different tree depths and choose the one that gives the best RMSE (i.e. avoid too deep trees to start with)
        
    params = {'eta': 0.01, 'silent': 1, 'gamma': 0.5, 'objective': 'binary:logistic'}
    params['nthread'] = 4
    params['eval_metric'] = 'rmse'
    max_num_rounds = 2000
    
    best_loss = 1e6
    best_imp = None
    best_params = None
    for tree_depth in range(1,8):
        params['max_depth'] = tree_depth
        
        bst = xgb.train(params, dtrain, max_num_rounds, evals = evallist, early_stopping_rounds = 10, verbose_eval = False)
    
        pred = bst.predict(dval)
        cur_loss = np.sqrt(mean_squared_error(pred, dval.get_label()))
        cur_imp = bst.get_fscore()

        print "for max_depth = " + str(params['max_depth']) + ": loss = " + str(cur_loss)
        
        if cur_loss < best_loss:
            best_loss = cur_loss
            best_imp = copy.copy(cur_imp)
            best_params = copy.copy(params)
            
    # normalize the usage score w.r.t. the total score (i.e. sum of all individuals)
    score_sum = sum([val for key, val in best_imp.iteritems()])
    used_variables = {key: val / float(score_sum) for key, val in sorted(best_imp.iteritems(), key = lambda x: x[1], reverse = True)}           
    return best_params, dtrain.feature_names, used_variables

In [18]:
def get_histogram(df, branch, label):
    data = df[branch].as_matrix()
    weights = df["training_weight"].as_matrix()
    
    # set the bin width
    q75, q25 = np.percentile(data, [75, 25])
    bin_width = max(2 * (q75 - q25) / len(data)**0.33, 0.005)

    data_max = np.max(data)
    data_min = np.min(data)
    bins = np.arange(data_min, data_max + bin_width, bin_width)
    
    weights = weights / (np.sum(weights) * bin_width)
    
    fig = plt.hist(data, bins = bins, weights = weights, alpha = 0.5, label = label)
    return fig

In [19]:
def plot_branch(disc_pair, branch, start_fraction = 0.0, end_fraction = 1.0):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    # get the training data for the BDT ...
    H1_coll = FileCollection(collections[H1_name], start_fraction, end_fraction)
    H0_coll = FileCollection(collections[H0_name], start_fraction, end_fraction)
    
    H1_df, H0_df = get_data(H1_coll, H0_coll, allbranches, allbranches, list_branches, pt_limits)
    
    plt.figure()
    H1_hist = get_histogram(H1_df, branch, H1_name)
    H0_hist = get_histogram(H0_df, branch, H0_name)
    
    plt.legend(loc = 'upper right')
    plt.show()

In [20]:
def plot_variables(discs):
    plotframe = pd.DataFrame()
    
    for disc in discs:
        _, _, implist = get_feature_importance_list_BDT(disc, candidate_branches, MELA_branches, list_branches, pt_limits)
        
        # cut the list to select only the 95% most important variables
        cutimplist = {key: val for key, val in implist.iteritems() if val > 0.00}
        curframe = pd.DataFrame(cutimplist, index = [len(plotframe)])
        
        plotframe = pd.concat([plotframe, curframe])
        
    plotframe = plotframe.fillna(0.0)
    
    print plotframe
    print "number of pre-selected input variables = " + str(len(plotframe.columns))
    
    # start the plotting
    parameters = plotframe.columns
    plotdata = np.transpose(plotframe.as_matrix())
    
    plt.close('all')
    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(111)
    cax = ax.matshow(plotdata, cmap = 'Blues')
    
    # make axis labels
    disclabels = []
    for disc in discs:
        if "0j" in disc[0] or "0j" in disc[1]:
            disclabels.append('D_' + re.sub('0j', '', disc[0]) + "_" + re.sub('0j', '', disc[1]) + "_0j")
        elif "01j" in disc[0] or "01j" in disc[1]:
            disclabels.append('D_' + re.sub('01j', '', disc[0]) + "_" + re.sub('01j', '', disc[1]) + "_01j")
        elif "1j" in disc[0] or "1j" in disc[1]:
            disclabels.append('D_' + re.sub('1j', '', disc[0]) + "_" + re.sub('1j', '', disc[1]) + "_1j")
        elif "2j" in disc[0] or "2j" in disc[1]:
            disclabels.append('D_' + re.sub('2j', '', disc[0]) + "_" + re.sub('2j', '', disc[1]) + "_2j")
        else:
            disclabels.append('D_' + disc[0] + "_" + disc[1] + "_2j")
            
    disclabels = np.concatenate([[''], np.array(disclabels)])
    parameters = np.concatenate([[''], np.array(parameters)])
        
    ax.set_xticklabels(disclabels, rotation = 'vertical')
    ax.set_yticklabels(parameters)
    
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    
    # sort the used variables according to their importance
    sorted_implist = []
    for key, val in sorted(cutimplist.iteritems(), key = lambda x: x[1], reverse = True):
        sorted_implist.append((key, val))
    
    return fig, sorted_implist

In [21]:
def append_variables(confhandler, impdict, threshold_fscore):
    confhandler.new_section(impdict["discriminant"])
    cur_sec = confhandler.get_section(impdict["discriminant"])

    periodic_inputs = []
    nonperiodic_inputs = []
    for key, val in impdict.iteritems():
        if val[0] > threshold_fscore and key is not "discriminant":
            if "phi" in key or "Phi" in key:
                periodic_inputs.append(key)
            else:
                nonperiodic_inputs.append(key)
    cur_sec["nonperiodic_columns"] = ConfigFileUtils.serialize_list(nonperiodic_inputs, lambda x: x)
    cur_sec["periodic_columns"] = ConfigFileUtils.serialize_list(periodic_inputs, lambda x: x)

In [22]:
def convert_varname(raw):
    raw = raw.replace('(', '[')
    raw = raw.replace(')', ']')
    return raw

In [23]:
df = pd.DataFrame()

In [24]:
out_dir = "/data_CMS/cms/wind/InputConfigurations/"
out_path = os.path.join(out_dir, "inclusive_99_fullmassrange.conf")
threshold_fscore = 0.01

In [25]:
confhandler = ConfigFileHandler()

In [23]:
%%capture
fig, implist = plot_variables([("VBF", "ggH")])

In [27]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [28]:
append_variables(confhandler, impdict, threshold_fscore)

In [24]:
implist

[('ZZPt', 0.08877805486284289),
 ('JetEta(JetPt|0)', 0.083854878931703),
 ('JetPt(JetPt|0)', 0.06099267959134422),
 ('ZZEta', 0.057887539216474944),
 ('PFMET', 0.05447671144718848),
 ('Z1Pt', 0.04863647333279704),
 ('ZZPhi', 0.046561016812806694),
 ('D_VBF2j_ggH_ME', 0.04437293862118896),
 ('ZZMassErr', 0.043150189043520235),
 ('ZZMass', 0.041106910143994854),
 ('Z2Mass', 0.04060815702678787),
 ('Z2Pt', 0.040254203201673236),
 ('Z1Mass', 0.03835572359424021),
 ('JetPt(JetPt|1)', 0.033529080524495214),
 ('JetPhi(JetPt|0)', 0.03330383718124045),
 ('D_VBF1j_ggH_ME', 0.03019869680637117),
 ('JetEta(JetPt|1)', 0.02548467540825356),
 ('D_WHh_ZHh_ME', 0.022057758828734616),
 ('JetEta(JetPt|2)', 0.021961225967339716),
 ('JetPhi(JetPt|1)', 0.021269407127342932),
 ('D_WHh_ggH_ME', 0.020223634462231518),
 ('D_VBF2j_WHh_ME', 0.019773147775721985),
 ('D_ZHh_ggH_ME', 0.019563993242699702),
 ('JetPt(JetPt|2)', 0.017665513635266673),
 ('D_VBF2j_ZHh_ME', 0.01303193628831148),
 ('JetPhi(JetPt|2)', 0.008

In [30]:
%%capture
fig, implist = plot_variables([("WHh", "ggH")])

In [31]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [32]:
append_variables(confhandler, impdict, threshold_fscore)

In [33]:
implist

[('ZZPt', 0.08439166983204241),
 ('ZZEta', 0.08024640385790491),
 ('JetEta(JetPt|0)', 0.07464781754223712),
 ('JetPt(JetPt|0)', 0.0632194348565672),
 ('ZZMassErr', 0.05700979339730145),
 ('PFMET', 0.056051923172201944),
 ('Z1Pt', 0.04985879671681723),
 ('ZZMass', 0.04495384056415253),
 ('ZZPhi', 0.04483823553698535),
 ('Z2Pt', 0.0421297749004971),
 ('Z1Mass', 0.03958646430281911),
 ('Z2Mass', 0.03948737427953296),
 ('D_VBF1j_ggH_ME', 0.031494112401116416),
 ('JetPhi(JetPt|0)', 0.030205942098396395),
 ('D_WHh_ggH_ME', 0.030189427094515366),
 ('JetPt(JetPt|1)', 0.027976416574457895),
 ('D_VBF2j_ggH_ME', 0.027365361430859936),
 ('JetEta(JetPt|2)', 0.023798120592558338),
 ('JetEta(JetPt|1)', 0.022064045185050618),
 ('D_WHh_ZHh_ME', 0.01862892437779723),
 ('JetPt(JetPt|2)', 0.01821604928077158),
 ('JetPhi(JetPt|1)', 0.013360638139749963),
 ('D_ZHh_ggH_ME', 0.012700037984508927),
 ('D_VBF2j_ZHh_ME', 0.01184125778269558),
 ('JetPhi(JetPt|2)', 0.010850357549834024),
 ('D_VBF2j_WHh_ME', 0.00977

In [34]:
%%capture
fig, implist = plot_variables([("ZHh", "ggH")])

In [35]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [36]:
append_variables(confhandler, impdict, threshold_fscore)

In [37]:
implist

[('ZZPt', 0.09188676261268386),
 ('ZZEta', 0.07087880225631293),
 ('Z1Pt', 0.06278665190574094),
 ('JetEta(JetPt|0)', 0.061310559333649636),
 ('JetPt(JetPt|0)', 0.05954452000632611),
 ('ZZPhi', 0.05211133955400917),
 ('Z1Mass', 0.05126785808424271),
 ('Z2Mass', 0.05081975855342928),
 ('PFMET', 0.049080078022035956),
 ('ZZMassErr', 0.043571089672623756),
 ('Z2Pt', 0.03777215456797933),
 ('ZZMass', 0.034002846749960465),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.033765617586588643),
 ('D_ZHh_ggH_ME', 0.03236860140228794),
 ('D_VBF1j_ggH_ME', 0.02983815699298856),
 ('JetPt(JetPt|1)', 0.027544941747060993),
 ('JetEta(JetPt|1)', 0.024223733459855554),
 ('D_VBF2j_ggH_ME', 0.02401286309241394),
 ('JetPhi(JetPt|0)', 0.020902525172650115),
 ('JetEta(JetPt|2)', 0.019373715008698403),
 ('JetPt(JetPt|2)', 0.01853023353893194),
 ('D_WHh_ggH_ME', 0.017713110865095683),
 ('D_WHh_ZHh_ME', 0.01478728451684327),
 ('D_VBF2j_WHh_ME', 0.012098687331962676),
 ('JetPhi(JetPt|2)', 0.011571511413358639),
 ('JetPhi

In [38]:
%%capture
fig, implist = plot_variables([("WHh", "ZHh")])

In [39]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ZHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [40]:
append_variables(confhandler, impdict, threshold_fscore)

In [41]:
implist

[('D_WHh_ZHh_ME', 0.07646768623581648),
 ('PFMET', 0.05605574740996547),
 ('ZZMassErr', 0.0540823877651702),
 ('JetPhi(JetPt|1)', 0.053527380365071535),
 ('ZZMass', 0.04847064627528367),
 ('ZZEta', 0.04353724716329551),
 ('JetEta(JetPt|0)', 0.04279723729649729),
 ('Z1Mass', 0.042303897385298474),
 ('Z2Mass', 0.04168722249629995),
 ('JetPhi(JetPt|0)', 0.041625555007400096),
 ('JetPt(JetPt|0)', 0.041193882585101135),
 ('Z1Pt', 0.036877158362111494),
 ('JetEta(JetPt|1)', 0.03644548593981253),
 ('ZZPhi', 0.034657128761716824),
 ('ZZPt', 0.034163788850518004),
 ('JetPt(JetPt|1)', 0.03188209176122348),
 ('D_WHh_ggH_ME', 0.031018746916625557),
 ('D_ZHh_ggH_ME', 0.02818204242723236),
 ('Z2Pt', 0.02719536260483473),
 ('JetPhi(JetPt|2)', 0.025098667982239763),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.024173655648741982),
 ('JetPt(JetPt|2)', 0.023063640848544648),
 ('D_VBF2j_ggH_ME', 0.02257030093734583),
 ('D_VBF2j_WHh_ME', 0.021213616181549086),
 ('D_VBF2j_ZHh_ME', 0.01726689689195856),
 ('JetEta(

In [42]:
%%capture
fig, implist = plot_variables([("VBF", "WHh")])

In [43]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [44]:
append_variables(confhandler, impdict, threshold_fscore)

In [45]:
implist

[('JetEta(JetPt|0)', 0.08828222229313591),
 ('ZZEta', 0.07219899798959696),
 ('D_VBF2j_ggH_ME', 0.055238216804416505),
 ('JetEta(JetPt|2)', 0.05491910521109232),
 ('JetPt(JetPt|0)', 0.05359479209879695),
 ('PFMET', 0.04909531863292593),
 ('JetEta(JetPt|1)', 0.03883588090755337),
 ('Z2Mass', 0.03787854612758081),
 ('D_WHh_ggH_ME', 0.03672974439161375),
 ('ZZMass', 0.035676676133643934),
 ('JetPhi(JetPt|0)', 0.03522991990299008),
 ('ZZPhi', 0.03518205316399145),
 ('Z1Pt', 0.034767208092670004),
 ('ZZMassErr', 0.034767208092670004),
 ('JetPt(JetPt|2)', 0.034368318601014775),
 ('Z1Mass', 0.033985384689025754),
 ('Z2Pt', 0.0335545840380381),
 ('D_WHh_ZHh_ME', 0.033442894980374636),
 ('ZZPt', 0.0302836902064652),
 ('JetPt(JetPt|1)', 0.024667326163959537),
 ('JetPhi(JetPt|1)', 0.02302390145833998),
 ('D_ZHh_ggH_ME', 0.01881162842646073),
 ('D_VBF1j_ggH_ME', 0.017567093212496408),
 ('JetPhi(JetPt|2)', 0.015843890608545807),
 ('D_VBF2j_ZHh_ME', 0.015732201550882344),
 ('D_VBF2j_WHh_ME', 0.01504

In [46]:
%%capture
fig, implist = plot_variables([("VBF", "ZHh")])

In [47]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ZHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [48]:
append_variables(confhandler, impdict, threshold_fscore)

In [49]:
implist

[('JetEta(JetPt|0)', 0.08837377761680551),
 ('ZZEta', 0.08417666233461875),
 ('JetEta(JetPt|2)', 0.059057886102648234),
 ('D_VBF2j_ggH_ME', 0.054455972899844475),
 ('PFMET', 0.04825617316828941),
 ('JetPt(JetPt|0)', 0.04439994034556959),
 ('Z1Pt', 0.03628267677951296),
 ('D_ZHh_ggH_ME', 0.03615484585721286),
 ('JetEta(JetPt|1)', 0.03613354070349618),
 ('Z1Mass', 0.03329995525917719),
 ('ZZMassErr', 0.0330229882608603),
 ('Z2Mass', 0.03268210580139336),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03153162750069242),
 ('ZZPt', 0.030168097662824637),
 ('ZZMass', 0.029251976053007223),
 ('JetPhi(JetPt|0)', 0.02886848328610691),
 ('JetPhi(JetPt|1)', 0.028676736902656753),
 ('D_VBF2j_WHh_ME', 0.0280801925985896),
 ('ZZPhi', 0.0280801925985896),
 ('JetPt(JetPt|2)', 0.027867141061422757),
 ('Z2Pt', 0.025608794767454247),
 ('D_WHh_ZHh_ME', 0.025161386539403884),
 ('JetPt(JetPt|1)', 0.02315870209003558),
 ('D_WHh_ggH_ME', 0.022349106248801584),
 ('D_VBF2j_ZHh_ME', 0.016745850821313676),
 ('D_VBF1j_ggH

In [50]:
%%capture
fig, implist = plot_variables([("WHl", "ggH")])

In [51]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [52]:
append_variables(confhandler, impdict, threshold_fscore)

In [53]:
implist

[('PFMET', 0.1397021917349841),
 ('ZZPt', 0.12129831018905805),
 ('JetPt(JetPt|0)', 0.07866822820813117),
 ('D_VBF1j_ggH_ME', 0.06561820311192906),
 ('ZZEta', 0.05879203613853103),
 ('Z1Pt', 0.05614856951647984),
 ('ZZMassErr', 0.051363560314539064),
 ('ZZPhi', 0.050761251463945126),
 ('ZZMass', 0.04818470804751548),
 ('Z1Mass', 0.040187384975740335),
 ('Z2Mass', 0.03998661535887569),
 ('JetEta(JetPt|0)', 0.03925046009703865),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0348335285260164),
 ('Z2Pt', 0.0344319892922871),
 ('JetPhi(JetPt|0)', 0.03306006357704534),
 ('D_WHh_ZHh_ME', 0.013317717918688306),
 ('JetEta(JetPt|1)', 0.012882717082148235),
 ('ExtraLepEta(ExtraLepPt|0)', 0.012548101054040488),
 ('D_VBF2j_ggH_ME', 0.009436171992638447),
 ('JetPhi(JetPt|1)', 0.0092354023757738),
 ('JetPt(JetPt|1)', 0.00779655345491049),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.006023088505939435),
 ('JetEta(JetPt|3)', 0.004985778818805421),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.004684624393508449),
 ('D_VBF2j_WHh_ME', 0

In [54]:
%%capture
fig, implist = plot_variables([("WHl", "VBF")])

In [55]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [56]:
append_variables(confhandler, impdict, threshold_fscore)

In [57]:
implist

[('PFMET', 0.1308762310894507),
 ('JetPt(JetPt|0)', 0.07658983991606592),
 ('ZZEta', 0.06944867499238501),
 ('JetEta(JetPt|0)', 0.06037838020780451),
 ('D_VBF1j_ggH_ME', 0.05564016651436694),
 ('ExtraLepEta(ExtraLepPt|0)', 0.05110501912207669),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04112092598233323),
 ('ZZMassErr', 0.04007175009307205),
 ('ZZPt', 0.03611195722069922),
 ('D_VBF2j_ggH_ME', 0.03445358242799607),
 ('Z2Pt', 0.034318204893897857),
 ('Z1Pt', 0.0342166717433242),
 ('ZZMass', 0.03391207229160321),
 ('ZZPhi', 0.03309980708701391),
 ('Z1Mass', 0.03022303448742681),
 ('JetPhi(JetPt|0)', 0.029783057501607608),
 ('Z2Mass', 0.02656784106677497),
 ('D_WHh_ZHh_ME', 0.022066538058009275),
 ('JetEta(JetPt|1)', 0.021761938606288285),
 ('JetEta(JetPt|2)', 0.021355806003993637),
 ('JetPt(JetPt|1)', 0.018512877787931092),
 ('D_VBF2j_ZHh_ME', 0.01692219176227705),
 ('JetPhi(JetPt|1)', 0.016786814228178834),
 ('D_WHh_ggH_ME', 0.01221782245236403),
 ('D_VBF2j_WHh_ME', 0.012183978068839476),
 ('Extra

In [58]:
%%capture
fig, implist = plot_variables([("WHl", "WHh")])

In [59]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [60]:
append_variables(confhandler, impdict, threshold_fscore)

In [61]:
implist

[('PFMET', 0.14713430282292558),
 ('ZZPt', 0.06574605890260296),
 ('JetPt(JetPt|0)', 0.06525724062079921),
 ('JetEta(JetPt|0)', 0.06302700721006965),
 ('ExtraLepPt(ExtraLepPt|0)', 0.05153977758768178),
 ('D_VBF1j_ggH_ME', 0.04243553708908713),
 ('ZZMass', 0.04200782109250886),
 ('ZZMassErr', 0.041793963094219726),
 ('ZZEta', 0.04069412196016131),
 ('Z1Pt', 0.03797507026762801),
 ('ZZPhi', 0.03791396798240254),
 ('Z2Mass', 0.03534767200293291),
 ('Z2Pt', 0.03491995600635464),
 ('JetPhi(JetPt|0)', 0.029634608334351706),
 ('Z1Mass', 0.0293596480508371),
 ('D_WHh_ggH_ME', 0.027404374923622143),
 ('ExtraLepEta(ExtraLepPt|0)', 0.022699498961261153),
 ('JetPt(JetPt|1)', 0.017689111572772823),
 ('D_WHh_ZHh_ME', 0.017139191005743615),
 ('JetPhi(JetPt|1)', 0.016986435292679947),
 ('D_ZHh_ggH_ME', 0.01643651472565074),
 ('JetEta(JetPt|1)', 0.015672736160332396),
 ('D_VBF2j_ggH_ME', 0.014328485885372113),
 ('JetPt(JetPt|2)', 0.013350849321764634),
 ('D_VBF2j_WHh_ME', 0.01307588903825003),
 ('Extra

In [62]:
%%capture
fig, implist = plot_variables([("WHl", "ZHh")])

In [63]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [64]:
append_variables(confhandler, impdict, threshold_fscore)

In [65]:
implist

[('PFMET', 0.17979078054948844),
 ('ExtraLepPt(ExtraLepPt|0)', 0.07334176342108288),
 ('JetPt(JetPt|0)', 0.06081158753879756),
 ('D_VBF1j_ggH_ME', 0.05655822508334291),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.05460397746867456),
 ('D_ZHh_ggH_ME', 0.05425911024255661),
 ('JetEta(JetPt|0)', 0.04575238533164731),
 ('ZZPhi', 0.038165306357052536),
 ('JetPhi(JetPt|0)', 0.03609610300034487),
 ('ZZPt', 0.031727784802850906),
 ('Z1Pt', 0.03115300609265433),
 ('ZZMassErr', 0.030348315898379125),
 ('D_WHh_ggH_ME', 0.028738935509828718),
 ('Z2Pt', 0.027704333831474883),
 ('nExtraLep', 0.027589378089435568),
 ('ZZMass', 0.027129555121278306),
 ('ExtraLepEta(ExtraLepPt|0)', 0.02483044028049201),
 ('ZZEta', 0.022876192665823657),
 ('Z1Mass', 0.017703184274054488),
 ('D_VBF2j_ZHh_ME', 0.01678353833773997),
 ('Z2Mass', 0.01655362685366134),
 ('JetPt(JetPt|2)', 0.015059202207150247),
 ('D_VBF2j_ggH_ME', 0.014484423496953672),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.011610529945970801),
 ('nCleanedJetsPt30', 0.0

In [66]:
%%capture
fig, implist = plot_variables([("WHl", "ZHl")])

In [67]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [68]:
append_variables(confhandler, impdict, threshold_fscore)

In [69]:
implist

[('PFMET', 0.0950408945025365),
 ('ZZMass', 0.09240086965524381),
 ('Z2Mass', 0.0755771819028885),
 ('Z2Pt', 0.07505952997204679),
 ('Z1Mass', 0.06486178693446526),
 ('JetPt(JetPt|0)', 0.05383580080753701),
 ('ZZMassErr', 0.052127549435759396),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04974635055388757),
 ('ExtraLepEta(ExtraLepPt|0)', 0.04555336991406978),
 ('ZZPhi', 0.04332746661145046),
 ('ZZPt', 0.040014494254063566),
 ('ZZEta', 0.03592504400041412),
 ('JetEta(JetPt|0)', 0.03230148048452221),
 ('Z1Pt', 0.03028263795423957),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.027228491562273527),
 ('JetPhi(JetPt|0)', 0.024019049591054973),
 ('D_VBF1j_ggH_ME', 0.019463712599647996),
 ('nExtraZ', 0.014028367325810125),
 ('JetPhi(JetPt|1)', 0.013407185008800082),
 ('nExtraLep', 0.013096593850295062),
 ('ExtraLepEta(ExtraLepPt|1)', 0.010404803809918211),
 ('D_WHh_ZHh_ME', 0.010404803809918211),
 ('JetPt(JetPt|1)', 0.010249508230665701),
 ('D_WHh_ggH_ME', 0.009369499948234807),
 ('JetEta(JetPt|1)', 0.008955378403561

In [70]:
%%capture
fig, implist = plot_variables([("WHl", "ZHMET")])

In [71]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [72]:
append_variables(confhandler, impdict, threshold_fscore)

In [73]:
implist

[('PFMET', 0.13071853192039287),
 ('ZZPt', 0.08380718531920393),
 ('Z1Pt', 0.08270871026104937),
 ('ZZMass', 0.07314551563711554),
 ('Z2Mass', 0.07127164642026364),
 ('ZZMassErr', 0.05692685448436288),
 ('ZZEta', 0.05666838976479711),
 ('Z1Mass', 0.05582837942620832),
 ('ZZPhi', 0.05563453088653399),
 ('JetPt(JetPt|0)', 0.05246833807185319),
 ('Z2Pt', 0.05124063065391574),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04568363918325149),
 ('JetEta(JetPt|0)', 0.03424657534246575),
 ('JetPhi(JetPt|0)', 0.02894804859136728),
 ('D_VBF1j_ggH_ME', 0.0268157146549496),
 ('nExtraLep', 0.011178599121219953),
 ('JetEta(JetPt|1)', 0.010790902041871284),
 ('ExtraLepEta(ExtraLepPt|0)', 0.008917032825019384),
 ('JetPhi(JetPt|1)', 0.007430860687516154),
 ('JetPt(JetPt|1)', 0.007366244507624709),
 ('D_VBF2j_ggH_ME', 0.00723701214784182),
 ('JetPt(JetPt|2)', 0.00523391057120703),
 ('D_ZHh_ggH_ME', 0.00523391057120703),
 ('D_WHh_ZHh_ME', 0.004975445851641251),
 ('JetEta(JetPt|2)', 0.004587748772292582),
 ('nCleanedJe

In [74]:
%%capture
fig, implist = plot_variables([("WHl", "ttHh")])

In [75]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [76]:
append_variables(confhandler, impdict, threshold_fscore)

In [77]:
implist

[('PFMET', 0.12138538380651945),
 ('ExtraLepPt(ExtraLepPt|0)', 0.08931388012618297),
 ('JetPt(JetPt|0)', 0.08103312302839116),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.07958727655099895),
 ('D_WHh_ggH_ME', 0.042915352260778125),
 ('JetPt(JetPt|3)', 0.03640904311251315),
 ('JetPt(JetPt|2)', 0.03371451104100946),
 ('JetPt(JetPt|1)', 0.03318874868559411),
 ('ZZEta', 0.03174290220820189),
 ('JetEta(JetPt|0)', 0.02655099894847529),
 ('ZZMassErr', 0.02648527865404837),
 ('JetEta(JetPt|2)', 0.025893796004206097),
 ('ZZMass', 0.02576235541535226),
 ('Z2Mass', 0.02444794952681388),
 ('JetPhi(JetPt|0)', 0.023396424815983176),
 ('Z2Pt', 0.022804942166140905),
 ('D_VBF1j_ggH_ME', 0.021424815983175603),
 ('D_ZHh_ggH_ME', 0.020373291272344902),
 ('D_WHh_ZHh_ME', 0.02024185068349106),
 ('Z1Pt', 0.019256046267087278),
 ('nCleanedJetsPt30', 0.019124605678233438),
 ('JetPhi(JetPt|2)', 0.018007360672975814),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.017218717139852787),
 ('ZZPhi', 0.016824395373291272),
 ('JetEta(Je

In [78]:
%%capture
fig, implist = plot_variables([("WHl", "ttHl")])

In [79]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [80]:
append_variables(confhandler, impdict, threshold_fscore)

In [81]:
implist

[('JetPt(JetPt|0)', 0.08105130626377086),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0683034309096632),
 ('PFMET', 0.058467107333962855),
 ('Z1Mass', 0.056185080264400375),
 ('ZZEta', 0.051621026125275415),
 ('JetPt(JetPt|1)', 0.046663519043122444),
 ('Z2Pt', 0.041942083726786275),
 ('ExtraLepEta(ExtraLepPt|0)', 0.03855838841674536),
 ('D_WHh_ggH_ME', 0.037378029587661316),
 ('ZZMass', 0.03517469310670444),
 ('JetPt(JetPt|2)', 0.035017311929493235),
 ('nExtraLep', 0.032184450739691534),
 ('ExtraLepPt(ExtraLepPt|1)', 0.030610638967579477),
 ('JetPhi(JetPt|0)', 0.02872206484104501),
 ('ExtraLepPt(ExtraLepPt|0)', 0.025889203651243312),
 ('Z1Pt', 0.025810513062637708),
 ('ZZPt', 0.02447277305634246),
 ('D_VBF1j_ggH_ME', 0.02297765187283601),
 ('D_ZHh_ggH_ME', 0.022112055398174377),
 ('ZZPhi', 0.021010387157695938),
 ('JetPhi(JetPt|1)', 0.020538243626062325),
 ('Z2Mass', 0.020223481271639913),
 ('JetPt(JetPt|3)', 0.018098835379288636),
 ('ZZMassErr', 0.017784073024866225),
 ('JetEta(JetPt|0)', 0

In [82]:
%%capture
fig, implist = plot_variables([("ZHh", "ZHl")])

In [83]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ZHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [84]:
append_variables(confhandler, impdict, threshold_fscore)

In [85]:
implist

[('ZZMass', 0.09109518935516889),
 ('Z2Mass', 0.07753326509723643),
 ('Z1Mass', 0.07693619924940294),
 ('PFMET', 0.07139201637666326),
 ('Z2Pt', 0.06495223473217332),
 ('ZZMassErr', 0.06299044694643466),
 ('JetPt(JetPt|0)', 0.052882975093824634),
 ('ZZPhi', 0.04593142272262027),
 ('JetPhi(JetPt|0)', 0.043884339815762535),
 ('JetEta(JetPt|0)', 0.041538723984988056),
 ('ZZPt', 0.041069600818833166),
 ('D_VBF1j_ggH_ME', 0.03693278744455817),
 ('ZZEta', 0.036335721596724664),
 ('Z1Pt', 0.03522688502217673),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03501364721937905),
 ('D_WHh_ggH_ME', 0.019617877857386556),
 ('D_ZHh_ggH_ME', 0.01722961446605254),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.015523712043671102),
 ('JetEta(JetPt|1)', 0.014883998635278062),
 ('JetPt(JetPt|1)', 0.013903104742408735),
 ('ExtraLepEta(ExtraLepPt|0)', 0.013689866939611054),
 ('D_VBF2j_ggH_ME', 0.013263391334015695),
 ('JetPhi(JetPt|1)', 0.01287956328897987),
 ('D_WHh_ZHh_ME', 0.011131013306038894),
 ('JetEta(JetPt|2)', 0.00985158

In [86]:
%%capture
fig, implist = plot_variables([("ZHh", "ZHMET")])

In [87]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [88]:
append_variables(confhandler, impdict, threshold_fscore)

In [89]:
implist

[('PFMET', 0.1385470719051149),
 ('ZZPt', 0.07209043736100815),
 ('Z1Pt', 0.06756856931060044),
 ('JetPt(JetPt|0)', 0.059191994069681245),
 ('Z1Mass', 0.05652335063009637),
 ('Z2Pt', 0.05337286879169755),
 ('ZZEta', 0.05018532246108228),
 ('JetEta(JetPt|0)', 0.04529280948851001),
 ('Z2Mass', 0.04525574499629355),
 ('ZZPhi', 0.04236471460340993),
 ('ZZMass', 0.041586360266864346),
 ('D_VBF1j_ggH_ME', 0.034655300222386956),
 ('D_WHh_ggH_ME', 0.0346182357301705),
 ('ZZMassErr', 0.031097108969607115),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.02994810971089696),
 ('D_WHh_ZHh_ME', 0.029911045218680505),
 ('JetPhi(JetPt|0)', 0.027983691623424758),
 ('D_ZHh_ggH_ME', 0.024759080800593032),
 ('D_VBF2j_ggH_ME', 0.023387694588584137),
 ('JetPt(JetPt|2)', 0.016567828020756115),
 ('JetPhi(JetPt|1)', 0.01267605633802817),
 ('JetEta(JetPt|1)', 0.01200889547813195),
 ('JetPt(JetPt|1)', 0.011452928094885101),
 ('D_VBF2j_ZHh_ME', 0.007894736842105263),
 ('JetEta(JetPt|2)', 0.006745737583395107),
 ('nCleaned

In [90]:
%%capture
fig, implist = plot_variables([("ZHh", "ttHh")])

In [91]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [92]:
append_variables(confhandler, impdict, threshold_fscore)

In [93]:
implist

[('nCleanedJetsPt30BTagged_bTagSF', 0.09411896997638593),
 ('JetPt(JetPt|0)', 0.069155515574047),
 ('nCleanedJetsPt30', 0.06072191611379737),
 ('PFMET', 0.05712358034409086),
 ('JetPt(JetPt|2)', 0.05150118070392443),
 ('JetPt(JetPt|1)', 0.05048914876869448),
 ('JetPt(JetPt|3)', 0.044754301135724726),
 ('ZZEta', 0.039919037445181604),
 ('Z1Pt', 0.03969414145957495),
 ('JetEta(JetPt|2)', 0.03946924547396829),
 ('D_VBF2j_ZHh_ME', 0.037445181603508376),
 ('ZZPt', 0.03362194984819521),
 ('D_WHh_ggH_ME', 0.03339705386258855),
 ('D_ZHh_ggH_ME', 0.02991116608568537),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.025188350387945577),
 ('JetEta(JetPt|1)', 0.025188350387945577),
 ('Z1Mass', 0.02350163049589565),
 ('Z2Pt', 0.023276734510288992),
 ('D_WHh_ZHh_ME', 0.022826942539075676),
 ('D_VBF1j_ggH_ME', 0.021702462611042393),
 ('JetEta(JetPt|0)', 0.021365118632632407),
 ('D_VBF2j_WHh_ME', 0.019790846733385808),
 ('JetPhi(JetPt|2)', 0.015742718992465984),
 ('JetPhi(JetPt|3)', 0.014280895086022715),
 ('ExtraLe

In [94]:
%%capture
fig, implist = plot_variables([("ZHh", "ttHl")])

In [95]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [96]:
append_variables(confhandler, impdict, threshold_fscore)

In [97]:
implist

[('PFMET', 0.174284140969163),
 ('ExtraLepPt(ExtraLepPt|0)', 0.10545154185022027),
 ('D_VBF1j_ggH_ME', 0.057681718061674006),
 ('JetPt(JetPt|0)', 0.05506607929515418),
 ('ZZMass', 0.050798458149779735),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0483204845814978),
 ('nCleanedJetsPt30', 0.038821585903083704),
 ('D_VBF2j_ZHh_ME', 0.03579295154185022),
 ('ZZPt', 0.034691629955947136),
 ('Z2Pt', 0.03303964757709251),
 ('Z1Mass', 0.03180066079295154),
 ('D_VBF2j_WHh_ME', 0.029598017621145375),
 ('JetPt(JetPt|3)', 0.027946035242290748),
 ('ZZEta', 0.026156387665198236),
 ('JetPt(JetPt|2)', 0.0246420704845815),
 ('ZZPhi', 0.024229074889867842),
 ('Z1Pt', 0.021475770925110133),
 ('JetEta(JetPt|2)', 0.019823788546255508),
 ('ZZMassErr', 0.01486784140969163),
 ('JetEta(JetPt|0)', 0.014454845814977973),
 ('nExtraLep', 0.014317180616740088),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.013215859030837005),
 ('JetEta(JetPt|1)', 0.01238986784140969),
 ('JetPhi(JetPt|3)', 0.01238986784140969),
 ('JetPt(JetPt|1)', 0.0

In [98]:
%%capture
fig, implist = plot_variables([("ZHl", "ggH")])

In [99]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [100]:
append_variables(confhandler, impdict, threshold_fscore)

In [101]:
implist

[('ZZMass', 0.09064909861600531),
 ('Z1Mass', 0.07650273224043716),
 ('ZZMassErr', 0.07308104795464991),
 ('PFMET', 0.07160002042796589),
 ('ZZPt', 0.06593125989479598),
 ('Z2Pt', 0.06424595270925898),
 ('Z2Mass', 0.060007149788059856),
 ('Z1Pt', 0.05924110106736122),
 ('JetPt(JetPt|0)', 0.05362341044890455),
 ('ExtraLepPt(ExtraLepPt|0)', 0.05076349522496298),
 ('ZZPhi', 0.043000868188550125),
 ('JetEta(JetPt|0)', 0.03845564577907155),
 ('ExtraLepEta(ExtraLepPt|0)', 0.03625963944640213),
 ('D_VBF1j_ggH_ME', 0.031050508145651398),
 ('ZZEta', 0.02916092130126143),
 ('D_VBF2j_ggH_ME', 0.02507532812420203),
 ('JetPhi(JetPt|0)', 0.01950870742045861),
 ('D_ZHh_ggH_ME', 0.01685307185537),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.014861345181553546),
 ('D_WHh_ggH_ME', 0.013993156631428425),
 ('JetEta(JetPt|1)', 0.008988304989530668),
 ('D_WHh_ZHh_ME', 0.007813696951126091),
 ('JetPhi(JetPt|2)', 0.007405137633420152),
 ('JetPt(JetPt|2)', 0.006588018998008273),
 ('JetEta(JetPt|2)', 0.00556662070374

In [102]:
%%capture
fig, implist = plot_variables([("ZHl", "VBF")])

In [103]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [104]:
append_variables(confhandler, impdict, threshold_fscore)

In [105]:
implist

[('ZZMass', 0.08262193975405212),
 ('PFMET', 0.07972622315821143),
 ('JetEta(JetPt|0)', 0.06190064307472453),
 ('Z2Pt', 0.06065962167650709),
 ('ZZEta', 0.059155353315031405),
 ('ZZMassErr', 0.05667331051859652),
 ('Z2Mass', 0.04531608438945508),
 ('D_VBF2j_ggH_ME', 0.04505283742619683),
 ('Z1Mass', 0.044714377044864806),
 ('JetPt(JetPt|0)', 0.04422548982738521),
 ('Z1Pt', 0.03433492535068256),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03380843142416607),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0331691173705389),
 ('JetPhi(JetPt|0)', 0.03234176977172728),
 ('D_VBF1j_ggH_ME', 0.030987928246399156),
 ('ZZPhi', 0.028242638486706027),
 ('JetPt(JetPt|2)', 0.02519649505471776),
 ('ZZPt', 0.023541799857094505),
 ('D_ZHh_ggH_ME', 0.018577714264224738),
 ('JetEta(JetPt|1)', 0.018427287428077168),
 ('D_WHh_ZHh_ME', 0.01812643375578203),
 ('D_WHh_ggH_ME', 0.016622165394306346),
 ('JetEta(JetPt|2)', 0.015719604377420933),
 ('D_VBF2j_WHh_ME', 0.013387988417133616),
 ('JetPhi(JetPt|1)', 0.0115452596743259),
 ('JetPh

In [106]:
%%capture
fig, implist = plot_variables([("ZHl", "WHh")])

In [107]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [108]:
append_variables(confhandler, impdict, threshold_fscore)

In [109]:
implist

[('PFMET', 0.08599895123230204),
 ('ZZMass', 0.08123579793742353),
 ('Z1Mass', 0.07096661422828177),
 ('ZZMassErr', 0.06851948959972033),
 ('Z2Mass', 0.0681699003670687),
 ('JetPt(JetPt|0)', 0.05619646914875022),
 ('Z2Pt', 0.05256948085998951),
 ('ZZPt', 0.050646740080405525),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04771893025694809),
 ('Z1Pt', 0.04758783429470372),
 ('JetEta(JetPt|0)', 0.04190700926411466),
 ('ZZEta', 0.03836741828351687),
 ('JetPhi(JetPt|0)', 0.03596399230903688),
 ('D_VBF1j_ggH_ME', 0.031856318825380175),
 ('ZZPhi', 0.02879741303967838),
 ('D_WHh_ZHh_ME', 0.02608809648662821),
 ('JetPhi(JetPt|1)', 0.01914001048767698),
 ('ExtraLepEta(ExtraLepPt|0)', 0.01743576297850026),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.016955077783604264),
 ('JetEta(JetPt|1)', 0.01651809124278972),
 ('D_WHh_ggH_ME', 0.012803705645866107),
 ('D_ZHh_ggH_ME', 0.012672609683621744),
 ('D_VBF2j_ggH_ME', 0.011449047369341023),
 ('JetPhi(JetPt|2)', 0.00987589582240867),
 ('JetPt(JetPt|1)', 0.008652333508127

In [110]:
%%capture
fig, implist = plot_variables([("ZHl", "ZHMET")])

In [111]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [112]:
append_variables(confhandler, impdict, threshold_fscore)

In [113]:
implist

[('PFMET', 0.15825332920408794),
 ('ExtraLepPt(ExtraLepPt|0)', 0.10684422421802416),
 ('ZZMass', 0.10034066274388355),
 ('Z2Pt', 0.09135955404149891),
 ('Z1Mass', 0.08299783214617529),
 ('JetPt(JetPt|0)', 0.06890678228553732),
 ('Z2Mass', 0.05357695881077733),
 ('Z1Pt', 0.05094456488076804),
 ('ZZMassErr', 0.03638897491483432),
 ('ZZPt', 0.03174357386187674),
 ('JetEta(JetPt|0)', 0.02322700526478786),
 ('D_ZHh_ggH_ME', 0.020749458036543822),
 ('nExtraLep', 0.020594611334778568),
 ('D_VBF1j_ggH_ME', 0.018426757510065037),
 ('ZZPhi', 0.01610405698358625),
 ('ExtraLepEta(ExtraLepPt|0)', 0.014555589965933726),
 ('ZZEta', 0.013471663053576959),
 ('D_WHh_ggH_ME', 0.010065035614741405),
 ('D_VBF2j_ggH_ME', 0.009910188912976153),
 ('JetPt(JetPt|1)', 0.009910188912976153),
 ('D_WHh_ZHh_ME', 0.008516568597088882),
 ('JetEta(JetPt|1)', 0.00836172189532363),
 ('JetPt(JetPt|2)', 0.00712294828120161),
 ('nCleanedJetsPt30', 0.006968101579436358),
 ('JetPhi(JetPt|0)', 0.006813254877671105),
 ('JetPhi(

In [114]:
%%capture
fig, implist = plot_variables([("ZHl", "ttHh")])

In [115]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [116]:
append_variables(confhandler, impdict, threshold_fscore)

In [117]:
implist

[('JetPt(JetPt|0)', 0.1014554794520548),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.09653253424657535),
 ('ZZMass', 0.09310787671232877),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0819777397260274),
 ('Z2Pt', 0.06142979452054795),
 ('JetPt(JetPt|1)', 0.05950342465753425),
 ('Z2Mass', 0.0526541095890411),
 ('nCleanedJetsPt30', 0.05008561643835616),
 ('JetPhi(JetPt|2)', 0.04366438356164384),
 ('JetPt(JetPt|2)', 0.04023972602739726),
 ('D_ZHh_ggH_ME', 0.03617294520547945),
 ('D_WHh_ggH_ME', 0.03595890410958904),
 ('Z1Mass', 0.0331763698630137),
 ('JetEta(JetPt|1)', 0.030607876712328768),
 ('ZZPt', 0.021618150684931507),
 ('JetEta(JetPt|0)', 0.02011986301369863),
 ('JetPhi(JetPt|3)', 0.019263698630136987),
 ('PFMET', 0.016267123287671232),
 ('nExtraLep', 0.014982876712328766),
 ('D_VBF2j_ZHh_ME', 0.014126712328767123),
 ('ZZMassErr', 0.0136986301369863),
 ('JetPt(JetPt|3)', 0.0136986301369863),
 ('JetPhi(JetPt|0)', 0.006635273972602739),
 ('ExtraLepPt(ExtraLepPt|1)', 0.006421232876712328),
 ('ExtraLepEta

In [118]:
%%capture
fig, implist = plot_variables([("ZHl", "ttHl")])

In [119]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [120]:
append_variables(confhandler, impdict, threshold_fscore)

In [121]:
implist

[('PFMET', 0.11702717692734331),
 ('JetPt(JetPt|0)', 0.06898995501324952),
 ('Z2Pt', 0.05712701053799223),
 ('ZZMass', 0.053121340974918344),
 ('Z2Mass', 0.05151907314968879),
 ('Z1Mass', 0.050132495224009366),
 ('ExtraLepPt(ExtraLepPt|0)', 0.039163123189745484),
 ('JetEta(JetPt|0)', 0.03617427743883651),
 ('ZZPt', 0.035095827941085844),
 ('JetPhi(JetPt|0)', 0.032599987674862885),
 ('ZZEta', 0.03152153817711222),
 ('Z1Pt', 0.031244222591976335),
 ('ZZPhi', 0.027885622727552843),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.027330991557281074),
 ('ZZMassErr', 0.027084488814938065),
 ('JetPt(JetPt|1)', 0.021230048684291614),
 ('D_VBF1j_ggH_ME', 0.02070623035681272),
 ('nExtraLep', 0.019504529487890554),
 ('D_WHh_ggH_ME', 0.01922721390275467),
 ('D_VBF2j_ggH_ME', 0.018210390090589758),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.017286004806803475),
 ('nExtraZ', 0.017039502064460466),
 ('JetEta(JetPt|1)', 0.015313982868059408),
 ('JetPhi(JetPt|2)', 0.01482097738337339),
 ('JetPhi(JetPt|3)', 0.0134343994576

In [122]:
%%capture
fig, implist = plot_variables([("ZHMET", "ggH")])

In [123]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [124]:
append_variables(confhandler, impdict, threshold_fscore)

In [125]:
implist

[('ZZPt', 0.13906202983278168),
 ('PFMET', 0.13020676610927223),
 ('ZZPhi', 0.08017022739973348),
 ('Z2Pt', 0.06985341529467394),
 ('JetPt(JetPt|0)', 0.0665004513605296),
 ('Z1Pt', 0.06516786313029274),
 ('ZZMass', 0.06314748742638525),
 ('D_VBF1j_ggH_ME', 0.05712934703176718),
 ('ZZMassErr', 0.052701715170012466),
 ('Z2Mass', 0.05093925976873146),
 ('Z1Mass', 0.05081029961741822),
 ('ZZEta', 0.048489016893779824),
 ('JetPhi(JetPt|0)', 0.025491123242917937),
 ('JetEta(JetPt|0)', 0.02050466405880583),
 ('D_VBF2j_ggH_ME', 0.012638094828697932),
 ('JetPt(JetPt|1)', 0.01134849331556549),
 ('D_WHh_ZHh_ME', 0.009285130894553583),
 ('JetEta(JetPt|1)', 0.008124489532734385),
 ('D_VBF2j_WHh_ME', 0.007307741907750505),
 ('nCleanedJetsPt30', 0.005330352920947427),
 ('JetPhi(JetPt|1)', 0.005158406052529769),
 ('D_WHh_ggH_ME', 0.0044706185788591325),
 ('D_VBF2j_ZHh_ME', 0.004040751407814985),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0037828311051884968),
 ('Z1Flav', 0.00245024287495164),
 ('Z2Flav', 0

In [126]:
%%capture
fig, implist = plot_variables([("ZHMET", "VBF")])

In [127]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [128]:
append_variables(confhandler, impdict, threshold_fscore)

In [129]:
implist

[('PFMET', 0.14772507260406584),
 ('JetPt(JetPt|0)', 0.08120038722168442),
 ('ZZPt', 0.07887705711519845),
 ('Z1Pt', 0.07546950629235237),
 ('D_VBF1j_ggH_ME', 0.06846079380445305),
 ('ZZEta', 0.06609874152952565),
 ('ZZMassErr', 0.05424975798644724),
 ('Z2Pt', 0.0461568247821878),
 ('Z2Mass', 0.044220716360116166),
 ('JetEta(JetPt|0)', 0.042478218780251695),
 ('ZZPhi', 0.039767666989351406),
 ('Z1Mass', 0.038489835430784124),
 ('D_VBF2j_ggH_ME', 0.032642787996127784),
 ('ZZMass', 0.032333010648596325),
 ('JetPhi(JetPt|1)', 0.02633107454017425),
 ('JetPhi(JetPt|0)', 0.022536302032913843),
 ('JetPt(JetPt|1)', 0.021529525653436594),
 ('D_VBF2j_ZHh_ME', 0.014908034849951598),
 ('D_WHh_ZHh_ME', 0.01339787028073572),
 ('JetEta(JetPt|1)', 0.012352371732817037),
 ('D_WHh_ggH_ME', 0.011848983543078413),
 ('D_VBF2j_WHh_ME', 0.00968054211035818),
 ('JetPhi(JetPt|2)', 0.006195546950629236),
 ('Z1Flav', 0.0027105517909002904),
 ('nCleanedJetsPt30', 0.0026718296224588576),
 ('D_ZHh_ggH_ME', 0.002323

In [130]:
%%capture
fig, implist = plot_variables([("ZHMET", "WHh")])

In [131]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [132]:
append_variables(confhandler, impdict, threshold_fscore)

In [133]:
implist

[('PFMET', 0.1418257034695391),
 ('ZZPt', 0.08375287827342622),
 ('JetPt(JetPt|0)', 0.0681028763220544),
 ('Z1Pt', 0.06646372399797057),
 ('Z2Mass', 0.0490574874136518),
 ('JetEta(JetPt|0)', 0.04890137766850096),
 ('ZZPhi', 0.04843304843304843),
 ('D_WHh_ZHh_ME', 0.04636459430979979),
 ('ZZEta', 0.04476446942200367),
 ('D_VBF1j_ggH_ME', 0.04468641454942825),
 ('D_WHh_ggH_ME', 0.04300823478905671),
 ('Z1Mass', 0.04199352144557624),
 ('ZZMass', 0.04062756117550638),
 ('Z2Pt', 0.038793271669983996),
 ('ZZMassErr', 0.03590524138469344),
 ('JetPhi(JetPt|0)', 0.03141708621160676),
 ('JetPhi(JetPt|1)', 0.0182648401826484),
 ('D_ZHh_ggH_ME', 0.015610974515084104),
 ('JetPt(JetPt|1)', 0.014479178862740506),
 ('D_VBF2j_ggH_ME', 0.012957108847519806),
 ('JetPt(JetPt|2)', 0.011591148577449948),
 ('JetEta(JetPt|1)', 0.010303243179955509),
 ('JetPt(JetPt|3)', 0.007102993404363267),
 ('D_VBF2j_ZHh_ME', 0.006205362369745932),
 ('Z2Flav', 0.005619950825430277),
 ('nCleanedJetsPt30', 0.00530773133512859

In [134]:
%%capture
fig, implist = plot_variables([("ZHMET", "ttHh")])

In [135]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [136]:
append_variables(confhandler, impdict, threshold_fscore)

In [137]:
implist

[('PFMET', 0.2309924550203134),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.09866511897852583),
 ('JetPt(JetPt|0)', 0.09112013929193268),
 ('JetEta(JetPt|0)', 0.06442251886244922),
 ('nCleanedJetsPt30', 0.0641323273360418),
 ('JetPt(JetPt|2)', 0.05310504933255949),
 ('JetPt(JetPt|1)', 0.04381892048752176),
 ('D_WHh_ggH_ME', 0.04323853743470691),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0351131746952989),
 ('JetPt(JetPt|3)', 0.03366221706326175),
 ('D_VBF2j_ggH_ME', 0.02959953569355775),
 ('JetEta(JetPt|2)', 0.02959953569355775),
 ('Z1Pt', 0.028728961114335462),
 ('ZZEta', 0.02640742890307603),
 ('D_ZHh_ggH_ME', 0.020023215322112594),
 ('ZZPt', 0.01857225769007545),
 ('Z2Mass', 0.013348810214741729),
 ('D_WHh_ZHh_ME', 0.012768427161926872),
 ('Z2Pt', 0.012768427161926872),
 ('ZZMassErr', 0.011607661056297156),
 ('JetPhi(JetPt|0)', 0.009576320371445153),
 ('ZZPhi', 0.009286128845037725),
 ('JetPhi(JetPt|1)', 0.006964596633778294),
 ('D_VBF2j_WHh_ME', 0.005513639001741149),
 ('ZZMass', 0.004643064422518

In [138]:
%%capture
fig, implist = plot_variables([("ZHMET", "ttHl")])

In [139]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [140]:
append_variables(confhandler, impdict, threshold_fscore)

In [141]:
implist

[('ZZMass', 0.08970001477759716),
 ('PFMET', 0.07071080242352594),
 ('JetPt(JetPt|0)', 0.06849416284912073),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0538643416580464),
 ('Z1Mass', 0.050391606324811586),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.05031771833899808),
 ('JetEta(JetPt|0)', 0.04913551056598197),
 ('nExtraLep', 0.043224471700901435),
 ('Z2Mass', 0.03679621693512635),
 ('JetPt(JetPt|3)', 0.03450568937490764),
 ('JetPhi(JetPt|0)', 0.0340623614600266),
 ('D_VBF1j_ggH_ME', 0.03354514555933205),
 ('ZZEta', 0.03324959361607802),
 ('nCleanedJetsPt30', 0.032215161814688933),
 ('Z1Pt', 0.029629082311216196),
 ('ZZMassErr', 0.029185754396335157),
 ('ZZPt', 0.027264666765183982),
 ('Z2Pt', 0.025565243091473327),
 ('ZZPhi', 0.0252696911482193),
 ('D_VBF2j_ggH_ME', 0.024678587261711245),
 ('JetPt(JetPt|2)', 0.020393084084527856),
 ('JetPt(JetPt|1)', 0.019580316240579283),
 ('D_WHh_ZHh_ME', 0.01802866853849564),
 ('D_ZHh_ggH_ME', 0.017954780552682133),
 ('D_WHh_ggH_ME', 0.015516477020836411),
 ('JetPh

In [142]:
%%capture
fig, implist = plot_variables([("ttHh", "ggH")])

In [143]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [144]:
append_variables(confhandler, impdict, threshold_fscore)

In [145]:
implist

[('JetPt(JetPt|0)', 0.08638075180292297),
 ('JetPt(JetPt|1)', 0.05951241304486566),
 ('PFMET', 0.059480502903822834),
 ('Z1Pt', 0.0591614014933946),
 ('ZZPt', 0.0512476865147744),
 ('JetEta(JetPt|1)', 0.047227008743378644),
 ('JetEta(JetPt|0)', 0.044674197459952776),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04161082391984173),
 ('D_WHh_ggH_ME', 0.0380368881230455),
 ('D_VBF2j_ggH_ME', 0.03698385346863233),
 ('ZZEta', 0.03465441317250622),
 ('D_ZHh_ggH_ME', 0.03398430021060693),
 ('JetPt(JetPt|2)', 0.033633288659135875),
 ('Z2Pt', 0.029229689195226245),
 ('JetEta(JetPt|2)', 0.029006318207926478),
 ('JetPt(JetPt|3)', 0.028782947220626715),
 ('ZZPhi', 0.025974854808858257),
 ('JetPhi(JetPt|0)', 0.025209011423830495),
 ('D_VBF1j_ggH_ME', 0.024219797051502966),
 ('Z1Mass', 0.02252855957623333),
 ('JetPhi(JetPt|1)', 0.022209458165805093),
 ('Z2Mass', 0.02125215393452039),
 ('nCleanedJetsPt30', 0.019241815048822514),
 ('ZZMassErr', 0.018124960112323698),
 ('ZZMass', 0.017103835598953347),
 ('Jet

In [146]:
%%capture
fig, implist = plot_variables([("ttHh", "VBF")])

In [147]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [148]:
append_variables(confhandler, impdict, threshold_fscore)

In [149]:
implist

[('JetEta(JetPt|2)', 0.08293745570982389),
 ('JetEta(JetPt|0)', 0.05734757617910291),
 ('D_VBF2j_ggH_ME', 0.054224298574840554),
 ('JetPt(JetPt|0)', 0.05222960027295871),
 ('JetPt(JetPt|1)', 0.045720584761554814),
 ('D_ZHh_ggH_ME', 0.043673394399097135),
 ('JetPt(JetPt|2)', 0.041468727854911944),
 ('JetEta(JetPt|1)', 0.03981522794677305),
 ('PFMET', 0.03881787879583213),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0355896170704181),
 ('D_WHh_ggH_ME', 0.03419857746515839),
 ('Z1Pt', 0.03301750610220204),
 ('ZZMass', 0.03167895855751818),
 ('JetPhi(JetPt|1)', 0.03128526810319939),
 ('ZZPt', 0.02981549040707593),
 ('JetPhi(JetPt|0)', 0.027190887378284033),
 ('ZZEta', 0.02679719692396525),
 ('ZZPhi', 0.026114800136479356),
 ('JetPt(JetPt|3)', 0.02448754625862838),
 ('Z2Mass', 0.021994173381276082),
 ('nCleanedJetsPt30', 0.021679221017821056),
 ('ZZMassErr', 0.021416760714941866),
 ('JetEta(JetPt|3)', 0.02086559407889557),
 ('JetPhi(JetPt|2)', 0.020419411564000945),
 ('D_VBF2j_ZHh_ME', 0.01989449

In [150]:
%%capture
fig, implist = plot_variables([("ttHh", "WHh")])

In [151]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [152]:
append_variables(confhandler, impdict, threshold_fscore)

In [153]:
implist

[('PFMET', 0.06352825349120397),
 ('JetPt(JetPt|0)', 0.054252921211493124),
 ('JetPt(JetPt|1)', 0.05091069254087105),
 ('ZZEta', 0.04331942897116356),
 ('Z2Pt', 0.04142809026608286),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.040961732777158845),
 ('Z1Pt', 0.03995129155115683),
 ('ZZPt', 0.0355468041557634),
 ('ZZMass', 0.035028629168070056),
 ('D_WHh_ggH_ME', 0.034303184185299374),
 ('JetPt(JetPt|2)', 0.03414773168899137),
 ('JetEta(JetPt|0)', 0.03342228670622069),
 ('D_WHh_ZHh_ME', 0.03212684923698733),
 ('JetPhi(JetPt|1)', 0.032023214239448664),
 ('ZZMassErr', 0.03181594424437133),
 ('JetEta(JetPt|1)', 0.03181594424437133),
 ('Z2Mass', 0.03062414177267663),
 ('JetPhi(JetPt|0)', 0.029484156799751277),
 ('Z1Mass', 0.029199160556519936),
 ('ZZPhi', 0.028965981812057933),
 ('D_VBF2j_ggH_ME', 0.02886234681451926),
 ('D_VBF2j_WHh_ME', 0.028136901831748582),
 ('JetEta(JetPt|2)', 0.02715236935513123),
 ('D_VBF2j_ZHh_ME', 0.02510557815374252),
 ('JetPt(JetPt|3)', 0.0242505894240485),
 ('JetEta(Je

In [154]:
%%capture
fig, implist = plot_variables([("ttHh", "ttHl")])

In [155]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [156]:
append_variables(confhandler, impdict, threshold_fscore)

In [157]:
implist

[('PFMET', 0.1410740004114944),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0708456210136479),
 ('JetPhi(JetPt|2)', 0.06316439201700844),
 ('JetPhi(JetPt|1)', 0.0426582538920513),
 ('JetPt(JetPt|2)', 0.04231534188327275),
 ('JetPt(JetPt|3)', 0.039846375420067214),
 ('Z1Mass', 0.03682874974281599),
 ('nCleanedJetsPt30', 0.03621150812701461),
 ('Z2Pt', 0.035319936904190385),
 ('JetEta(JetPt|2)', 0.03195939921816062),
 ('JetPhi(JetPt|3)', 0.031204992798847815),
 ('JetEta(JetPt|3)', 0.030656333584802138),
 ('ZZMassErr', 0.027501543104039503),
 ('ZZMass', 0.02695288388999383),
 ('JetPt(JetPt|1)', 0.02695288388999383),
 ('ZZPt', 0.026267059872436733),
 ('JetPt(JetPt|0)', 0.02372951100747548),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.023180851793429807),
 ('Z2Mass', 0.023043686989918388),
 ('Z1Pt', 0.02119196214251423),
 ('JetEta(JetPt|1)', 0.01995747891091146),
 ('JetEta(JetPt|0)', 0.019751731705644333),
 ('ZZEta', 0.019477402098621494),
 ('ZZPhi', 0.018928742884575817),
 ('JetPhi(JetPt|0)', 0.018174336465

In [158]:
%%capture
fig, implist = plot_variables([("ttHl", "ggH")])

In [159]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [160]:
append_variables(confhandler, impdict, threshold_fscore)

In [161]:
implist

[('PFMET', 0.18323446581580682),
 ('JetPt(JetPt|0)', 0.06684043381868296),
 ('ZZPt', 0.06531248127509137),
 ('D_VBF1j_ggH_ME', 0.051860506920726225),
 ('ZZMass', 0.04715680987476781),
 ('ExtraLepPt(ExtraLepPt|0)', 0.042872550781952186),
 ('Z1Pt', 0.04230331355982983),
 ('Z1Mass', 0.03960692671819761),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0375696566600755),
 ('Z2Pt', 0.03373479537419857),
 ('ZZMassErr', 0.033555036251423094),
 ('D_VBF2j_ggH_ME', 0.03304571873689256),
 ('JetPt(JetPt|1)', 0.029450536281382948),
 ('Z2Mass', 0.026334711486607945),
 ('JetEta(JetPt|1)', 0.024027802744322608),
 ('JetPt(JetPt|2)', 0.022020492539996404),
 ('D_WHh_ggH_ME', 0.020372700581221162),
 ('ZZPhi', 0.020103061897057944),
 ('ZZEta', 0.02007310204326203),
 ('nCleanedJetsPt30', 0.01878482833003775),
 ('JetEta(JetPt|0)', 0.018035831985139913),
 ('JetPhi(JetPt|0)', 0.017646353885793036),
 ('nExtraLep', 0.014979926897956738),
 ('JetPt(JetPt|3)', 0.014200970699262988),
 ('D_ZHh_ggH_ME', 0.013721613038528372),
 

In [162]:
%%capture
fig, implist = plot_variables([("ttHl", "VBF")])

In [163]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [164]:
append_variables(confhandler, impdict, threshold_fscore)

In [165]:
implist

[('PFMET', 0.1434631700732742),
 ('D_VBF2j_ggH_ME', 0.05613014058829716),
 ('ExtraLepPt(ExtraLepPt|0)', 0.05017003821477404),
 ('D_VBF1j_ggH_ME', 0.044104757564071104),
 ('ExtraLepEta(ExtraLepPt|0)', 0.040844230971496684),
 ('JetEta(JetPt|2)', 0.04007292360551134),
 ('JetPt(JetPt|0)', 0.03817971461627458),
 ('JetPt(JetPt|2)', 0.034182940083441436),
 ('D_WHh_ggH_ME', 0.03355187042036251),
 ('JetEta(JetPt|0)', 0.033306454440276266),
 ('D_ZHh_ggH_ME', 0.03155348315394594),
 ('JetEta(JetPt|1)', 0.03141324545103951),
 ('ZZPt', 0.028994145075903657),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.02769694632401921),
 ('ZZEta', 0.026224450443501735),
 ('ZZMass', 0.025979034463415488),
 ('JetPt(JetPt|1)', 0.025172667671703536),
 ('ZZMassErr', 0.02461171686007783),
 ('Z1Mass', 0.024155944325631947),
 ('JetPt(JetPt|3)', 0.021596606247589665),
 ('Z1Pt', 0.021351190267503418),
 ('ZZPhi', 0.020720120604424498),
 ('Z2Mass', 0.01802054482347579),
 ('Z2Pt', 0.01802054482347579),
 ('D_VBF2j_WHh_ME', 0.017810188

In [166]:
%%capture
fig, implist = plot_variables([("ttHl", "WHh")])

In [167]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [168]:
append_variables(confhandler, impdict, threshold_fscore)

In [169]:
implist

[('PFMET', 0.14684491655852974),
 ('JetPt(JetPt|0)', 0.0590880284877931),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04876723903793343),
 ('ZZMassErr', 0.04261097866433292),
 ('ZZPt', 0.038114494371850195),
 ('Z1Mass', 0.03690738449467363),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03500618643812053),
 ('D_VBF2j_WHh_ME', 0.03413103177716752),
 ('ZZMass', 0.03355765458550865),
 ('Z2Pt', 0.032350544708332075),
 ('D_VBF1j_ggH_ME', 0.032199655973685),
 ('ZZEta', 0.030509702145637807),
 ('Z1Pt', 0.030419168904849563),
 ('JetPt(JetPt|2)', 0.028880103811449436),
 ('JetEta(JetPt|0)', 0.02881974831759061),
 ('D_WHh_ggH_ME', 0.028699037329872952),
 ('JetPt(JetPt|1)', 0.026737483779461026),
 ('ZZPhi', 0.025379485167637383),
 ('Z2Mass', 0.025379485167637383),
 ('nCleanedJetsPt30', 0.022271177233907716),
 ('D_WHh_ZHh_ME', 0.021818511029966504),
 ('JetEta(JetPt|1)', 0.021607266801460603),
 ('JetPt(JetPt|3)', 0.02130548933216646),
 ('D_VBF2j_ggH_ME', 0.021275311585237048),
 ('JetPhi(JetPt|1)', 0.01931375803482512),

In [170]:
# save the variable configuration
confhandler.save_configuration(out_path)

In [171]:
df = df.fillna(0.0)

In [172]:
df.to_csv("input_parameters_table_inclusive.csv")

In [173]:
# now plot the data contained in the table to have a global picture of the relevant input variables
datacol_labels = [col for col in df.columns.tolist() if col != "discriminant"]
variable_data = df[datacol_labels].as_matrix().transpose()
datacol_labels = np.concatenate([[''], np.array(datacol_labels)])

In [174]:
discriminant_labels = np.concatenate([[''], df["discriminant"].as_matrix()])

In [175]:
fig = plt.figure(figsize = (15, 10))
ax = fig.add_subplot(111)
cax = ax.matshow(variable_data, interpolation = 'nearest', cmap = 'Blues', vmin = np.min(variable_data), vmax = np.max(variable_data))
ax.set_xticklabels(discriminant_labels, rotation = 'vertical')
ax.set_yticklabels(datacol_labels)
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

In [176]:
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "input_variables_inclusive_fullmassrange.pdf"))