In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from trainlib.FileCollection import FileCollection
from trainlib.config import Config
from trainlib.ConfigFileHandler import ConfigFileHandler
from trainlib.ConfigFileUtils import ConfigFileUtils
import trainlib.cuts as cuts
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import copy
import re
from scipy import interpolate
import scipy.integrate as integrate
import pickle
import os

Welcome to JupyROOT 6.10/09


In [3]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error



In [4]:
#candidate_branches = ["PFMET", "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF", "nExtraLep", "ZZMass", "nExtraZ", "Z1Mass", "Z2Mass", "Z1Pt", "Z2Pt", "ZZMassErr", "ZZPt", "ZZEta", "ZZPhi", "Z1Flav", "Z2Flav", "costhetastar", "helphi", "helcosthetaZ1", "helcosthetaZ2", "phistarZ1", "phistarZ2", "xi", "xistar"]
candidate_branches = ["PFMET", "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF", "nExtraLep", "ZZMass_masked", "nExtraZ", "Z1Mass", "Z2Mass", "Z1Pt", "Z2Pt", "ZZMassErr", "ZZPt", "ZZEta", "ZZPhi", "Z1Flav", "Z2Flav"]
MELA_branches = ["D_VBF2j_ggH_ME", "D_VBF1j_ggH_ME", "D_WHh_ggH_ME", "D_ZHh_ggH_ME", "D_WHh_ZHh_ME", "D_VBF2j_WHh_ME", "D_VBF2j_ZHh_ME"]
#list_branches = ["Jet", "Lep", "ExtraLep"]
list_branches = ["Jet", "ExtraLep"]
pt_limits = [30.0, 0.0, 0.0]

In [5]:
allbranches = ["JetPt", "JetEta", "JetPhi", "LepPt", "LepEta", "LepPhi", "ExtraLepPt", "ExtraLepEta", "ExtraLepPhi"] + candidate_branches + MELA_branches + ["LHEAssociatedParticleId", "GenAssocLep1Id", "GenAssocLep2Id", "training_weight"]

In [6]:
#MC_path = "/data_CMS/cms/wind/CJLST_NTuples_randomizeda/"
MC_path = "/data_CMS/cms/wind/CJLST_NTuples_ZZMask/"

In [7]:
# these are the cuts without any m4l restriction imposed
def WHhadr0j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] == 0

def WHhadr01j_cut(row):
    return cuts.WHhadr_cut(row) and (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def WHhadr1j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] == 1

def WHhadr2j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] >= 2

def ZHhadr0j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] == 0

def ZHhadr01j_cut(row):
    return cuts.ZHhadr_cut(row) and (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def ZHhadr1j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] == 1

def ZHhadr2j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] >= 2

def mZZ0j_cut(row):
    return row["nCleanedJetsPt30"] == 0

def mZZ01j_cut(row):
    return (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def mZZ1j_cut(row):
    return row["nCleanedJetsPt30"] == 1

def mZZ2j_cut(row):
    return row["nCleanedJetsPt30"] >= 2

In [8]:
collections = {"VBF2j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ2j_cut},
            "VBF1j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ1j_cut},
            "VBF0j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ0j_cut},
            "VBF01j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ01j_cut},
            "VBF": {MC_path + "VBFH125/ZZ4lAnalysis.root": cuts.no_cut},
            "ggH2j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ2j_cut},
            "ggH1j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ1j_cut},
            "ggH0j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ0j_cut},
            "ggH01j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ01j_cut},
            "ggH" : {MC_path + "ggH125/ZZ4lAnalysis.root": cuts.no_cut},
            "WHh2j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr2j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr2j_cut},
            "WHh1j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr1j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr1j_cut},
            "WHh0j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr0j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr0j_cut},
            "WHh": {MC_path + "WplusH125/ZZ4lAnalysis.root": cuts.WHhadr_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": cuts.WHhadr_cut},
            "WHh01j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr01j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr01j_cut},
            "WHl": {MC_path + "WplusH125/ZZ4lAnalysis.root": cuts.WHlept_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": cuts.WHlept_cut},
            "ZHh2j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr2j_cut},
            "ZHh1j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr1j_cut},
            "ZHh01j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr01j_cut},
            "ZHh0j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr0j_cut},
            "ZHh": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHhadr_cut},
            "ZHl": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHlept_cut},
            "ttHh": {MC_path + "ttH125/ZZ4lAnalysis.root": cuts.ttHhadr_cut},
            "ttHl": {MC_path + "ttH125/ZZ4lAnalysis.root": cuts.ttHlept_cut},
            "ZHMET": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHMET_cut},
            "bck": {MC_path + "bck/ZZ4lAnalysis.root": cuts.no_cut}
          }

In [9]:
# all the model combinations for which neural networks are currently trained
discriminant_pairs = [("ggH", "bck"), ("VBF", "bck"), ("ZHh", "bck"), ("WHh", "bck"), ("ZHl", "bck"),
                      ("WHl", "bck"), ("ZHMET", "bck"), ("ttHh", "bck"), ("ttHl", "bck"), 
                      ("VBF2j", "ggH2j"), ("VBF1j", "ggH1j"), ("VBF0j", "ggH0j"), ("WHh2j", "ggH2j"), 
                     ("WHh1j", "ggH1j"), ("WHh0j", "ggH0j"), ("ZHh2j", "ggH2j"), ("ZHh1j", "ggH1j"), 
                      ("ZHh0j", "ggH0j"), ("WHh2j", "ZHh2j"), ("WHh01j", "ZHh01j"), ("VBF2j", "WHh2j"),
                     ("VBF1j", "WHh1j"), ("VBF0j", "WHh0j"), ("VBF2j", "ZHh2j"), ("VBF1j", "ZHh1j"), 
                      ("VBF0j", "ZHh0h"), ("WHl", "ggH"), ("WHl", "VBF"), ("WHl", "WHh"), ("WHl", "ZHh"),
                     ("WHl", "ZHl"), ("WHl", "ZHMET"), ("WHl", "ttHh"), ("WHl", "ttHl"), ("ZHh", "ZHl"),
                     ("ZHh", "ZHMET"), ("ZHh", "ttHh"), ("ZHh", "ttHl"), ("ZHl", "ggH"), ("ZHl", "VBF"),
                     ("ZHl", "WHh"), ("ZHl", "ZHMET"), ("ZHl", "ttHh"), ("ZHl", "ttHl"), ("ZHMET", "ggH"),
                     ("ZHMET", "VBF"), ("ZHMET", "WHh"), ("ZHMET", "ttHh"), ("ZHMET", "ttHl"), ("ttHh", "ggH"),
                      ("ttHh", "VBF"), ("ttHh", "WHh"), ("ttHh", "ttHl"), ("ttHl", "ggH"), ("ttHl", "VBF"),
                     ("ttHl", "WHh")]

In [10]:
def extract_order(df, col_basename, sorted_column, columns, order):
    def get_index(row, order, col_basename, sorted_column):
        sorted_column = row[col_basename + sorted_column]
        if order >= len(sorted_column):
            return -1
        else:
            return np.flipud(np.argsort(sorted_column))[order]
    
    index_column = pd.DataFrame(df.transform(lambda row: get_index(row, order, col_basename, sorted_column), axis = 1, raw = True))
    index_column.columns = ["index"]
    df_temp = pd.concat([index_column, df], axis = 1)
    
    def get_element(row, column_name):
        if row["index"] == -1:
            return 0
        else:
            return row[column_name][row["index"]]
        
    extracted_cols = pd.DataFrame()
    for column in columns:
        extracted_col = pd.DataFrame(df_temp.transform(lambda row: get_element(row, col_basename + column), axis = 1, raw = True))
        extracted_col.columns = [col_basename + column + "(" + col_basename + "Pt|" + str(order) + ")"]
        extracted_cols = pd.concat([extracted_cols, extracted_col], axis = 1)
        
    return extracted_cols

In [11]:
def prepare_data(df, col_basenames, sorted_column, columns, orders, pt_limits):
    all_extracted = pd.DataFrame()
    for col_basename, pt_limit in zip(col_basenames, pt_limits):
        for order in orders:
            extracted = extract_order(df, col_basename, sorted_column, columns, order)
            mask = extracted[col_basename + "Pt(" + col_basename + "Pt|" + str(order) + ")"] < pt_limit
            extracted[mask] = 0.0

            all_extracted = pd.concat([all_extracted, extracted], axis = 1)
            
    return all_extracted

In [12]:
def get_data(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits):
    H1_df = H1_coll.get_data(read_branches, 0.0, 1.0)
    H0_df = H0_coll.get_data(read_branches, 0.0, 1.0)
    
    H1_list_df = prepare_data(H1_df, list_branches, "Pt", ["Pt", "Eta", "Phi"], range(4), pt_limits)
    H0_list_df = prepare_data(H0_df, list_branches, "Pt", ["Pt", "Eta", "Phi"], range(4), pt_limits)
    
    list_branches_unrolled = H1_list_df.columns
            
    H1_df = pd.concat([H1_df, H1_list_df], axis = 1)
    H0_df = pd.concat([H0_df, H0_list_df], axis = 1)        
    
    complete_input_branches = np.concatenate([input_branches, list_branches_unrolled])
            
    H1_df = H1_df[complete_input_branches]
    H0_df = H0_df[complete_input_branches]
    
    return H1_df, H0_df

In [13]:
def get_data_dmatrix(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits):
    H1_df, H0_df = get_data(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits)
    
    complete_input_branches = H1_df.columns
    print "number of input variables: " + str(len(complete_input_branches))
    print "final list of inputs: " + str(complete_input_branches)
    
    # try with the same weights as used later in the neural network training, to balance out some (very)
    # unbalanced datasets
    H1_class_weight = 1.0 + float(len(H0_df)) / float(len(H1_df))
    H0_class_weight = 1.0 + float(len(H1_df)) / float(len(H0_df))
    
    print "using class weights: " + str(H1_class_weight) + " (H1), " + str(H0_class_weight) + " (H0)"
    
    H1_weights = np.full(len(H1_df), H1_class_weight)
    H0_weights = np.full(len(H0_df), H0_class_weight)
    
    H1_data = H1_df.as_matrix()
    H0_data = H0_df.as_matrix()
    H1_target = np.ones(np.shape(H1_data)[0])
    H0_target = np.zeros(np.shape(H0_data)[0])
    
    target = np.concatenate([H1_target, H0_target])
    data = np.concatenate([H1_data, H0_data])
    weights = np.concatenate([H1_weights, H0_weights])
    
    dmatrix = xgb.DMatrix(data, label = target, feature_names = complete_input_branches, weight = weights)
    
    return dmatrix

In [14]:
def get_feature_correlation(source, corr_branches, mandatory_branches, optional_branches, list_branches, pt_limits):    
    coll = FileCollection(collections[source], 0.0, 0.5)
    
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if "0j" in source and ("0j" in optional_branch):
            input_branches.append(optional_branch)
            
        if "1j" in source and ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
        if "2j" in source and ("2j" in optional_branch):
            input_branches.append(optional_branch)

    df, _ = get_data(coll, coll, allbranches, input_branches, list_branches, pt_limits)

    df = df[corr_branches]
    
    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(111)
    
    cax = ax.matshow(df.corr(), vmin = -1.0, vmax = 1.0, cmap = "RdBu")
    
    fig.colorbar(cax)
    
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    
    ax.set_yticklabels([''] + corr_branches)
    ax.set_xticklabels([''] + corr_branches, rotation = 'vertical')
    
    return fig

In [15]:
def get_interpolating_function(data, bins):
    bin_centers = [np.mean([bins[i], bins[i + 1]]) for i in range(len(bins) - 1)]
    intf = interpolate.interp1d(bin_centers, data, kind = "linear")
    interpolated_function = lambda x: intf(x) if x > bin_centers[0] and x < bin_centers[-1] else 0
    
    return interpolated_function

In [16]:
def get_binned_data(df, branch):
    data = df[branch].as_matrix()
    weights = df["training_weight"].as_matrix()
    
    # set the bin width
    q75, q25 = np.percentile(data, [75, 25])
    bin_width = max(2 * (q75 - q25) / len(data)**0.33, 0.005)

    data_max = np.max(data)
    data_min = np.min(data)
    bins = np.arange(data_min, data_max + bin_width, bin_width)
    
    weights = weights / (np.sum(weights) * bin_width)
    
    hist = np.histogram(data, bins = bins, weights = weights)
    return hist

In [17]:
def get_feature_importance_list_separation(disc_pair, mandatory_branches, optional_branches, list_branches, pt_limits):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if ("0j" in H1_name or "0j" in H0_name) and ("0j" in optional_branch):
            input_branches.append(optional_branch)  
        elif ("1j" in H1_name or "1j" in H0_name) and ("1j" in optional_branch):
            input_branches.append(optional_branch)
        elif ("2j" in H1_name or "2j" in H0_name) and not ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
    # needed to build the histograms
    input_branches.append("training_weight")
    
    H1_coll = FileCollection(collections[H1_name], 0.0, 0.5)
    H0_coll = FileCollection(collections[H0_name], 0.0, 0.5)
    
    H1_df, H0_df = get_data(H1_coll, H0_coll, allbranches, input_branches, list_branches, pt_limits)
    
    available_branches = H1_df.columns
    
    implist = {}
    for branch in available_branches:
                
        if "training_weight" not in branch:
            data_H1, bins_H1 = get_binned_data(H1_df, branch)
            data_H0, bins_H0 = get_binned_data(H0_df, branch)

            if(len(data_H1) > 0):
                H1_func = get_interpolating_function(data_H1, bins_H1)
                H0_func = get_interpolating_function(data_H0, bins_H0)

                # compute the separation in this branch
                global_min = np.min(np.concatenate([bins_H0, bins_H1]))
                global_max = np.max(np.concatenate([bins_H0, bins_H1]))

                separation_func = lambda x: (H1_func(x) + H0_func(x)) * (H1_func(x) - H0_func(x))**2

                sep = integrate.quad(separation_func, global_min, global_max)[0]
            else:
                sep = 0.0
                
            print "separation for " + branch + " = " + str(sep)
            implist[branch] = sep
        
    # normalize the importance list
    impsum = sum([val for key, val in implist.iteritems()])
    for key in implist.keys():
        implist[key] /= impsum
        
    return None, None, implist

In [18]:
def get_feature_importance_list_BDT(disc_pair, mandatory_branches, optional_branches, list_branches, pt_limits):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    # first assemble the list of branches that can serve as input: it will *always* contain the mandatory branches,
    # and *can* contain some of the optional branches, if the name of the categories allows it
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if ("0j" in H1_name or "0j" in H0_name) and ("0j" in optional_branch):
            input_branches.append(optional_branch)  
        elif ("1j" in H1_name or "1j" in H0_name) and ("1j" in optional_branch):
            input_branches.append(optional_branch)
        elif ("2j" in H1_name or "2j" in H0_name) and not ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
        # the fully inclusive categories (i.e. those with NO "xxj" in their name, can not use MELA, since there may
        # be events with low number of jets contained)
    
    # get the training data for the BDT ...
    H1_coll_train = FileCollection(collections[H1_name], 0.0, 0.5)
    H0_coll_train = FileCollection(collections[H0_name], 0.0, 0.5)
    
    dtrain = get_data_dmatrix(H1_coll_train, H0_coll_train, allbranches, input_branches, list_branches, pt_limits)
    
    # ... and the validation data as well
    H1_coll_val = FileCollection(collections[H1_name], 0.5, 1.0)
    H0_coll_val = FileCollection(collections[H0_name], 0.5, 1.0)
    dval = get_data_dmatrix(H1_coll_val, H0_coll_val, allbranches, input_branches, list_branches, pt_limits)
    
    evallist = [(dtrain, 'train'), (dval, 'eval')]
    
    # perform the training
    # try different tree depths and choose the one that gives the best RMSE (i.e. avoid too deep trees to start with)
        
    params = {'eta': 0.01, 'silent': 1, 'gamma': 0.5, 'objective': 'binary:logistic'}
    params['nthread'] = 4
    params['eval_metric'] = 'rmse'
    max_num_rounds = 2000
    
    best_loss = 1e6
    best_imp = None
    best_params = None
    for tree_depth in range(1,8):
        params['max_depth'] = tree_depth
        
        bst = xgb.train(params, dtrain, max_num_rounds, evals = evallist, early_stopping_rounds = 10, verbose_eval = False)
    
        pred = bst.predict(dval)
        cur_loss = np.sqrt(mean_squared_error(pred, dval.get_label()))
        cur_imp = bst.get_fscore()

        print "for max_depth = " + str(params['max_depth']) + ": loss = " + str(cur_loss)
        
        if cur_loss < best_loss:
            best_loss = cur_loss
            best_imp = copy.copy(cur_imp)
            best_params = copy.copy(params)
            
    # normalize the usage score w.r.t. the total score (i.e. sum of all individuals)
    score_sum = sum([val for key, val in best_imp.iteritems()])
    used_variables = {key: val / float(score_sum) for key, val in sorted(best_imp.iteritems(), key = lambda x: x[1], reverse = True)}           
    return best_params, dtrain.feature_names, used_variables

In [19]:
def get_histogram(df, branch, label):
    data = df[branch].as_matrix()
    weights = df["training_weight"].as_matrix()
    
    # set the bin width
    q75, q25 = np.percentile(data, [75, 25])
    bin_width = max(2 * (q75 - q25) / len(data)**0.33, 0.005)

    data_max = np.max(data)
    data_min = np.min(data)
    bins = np.arange(data_min, data_max + bin_width, bin_width)
    
    weights = weights / (np.sum(weights) * bin_width)
    
    fig = plt.hist(data, bins = bins, weights = weights, alpha = 0.5, label = label)
    return fig

In [20]:
def plot_branch(disc_pair, branch, start_fraction = 0.0, end_fraction = 1.0):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    # get the training data for the BDT ...
    H1_coll = FileCollection(collections[H1_name], start_fraction, end_fraction)
    H0_coll = FileCollection(collections[H0_name], start_fraction, end_fraction)
    
    H1_df, H0_df = get_data(H1_coll, H0_coll, allbranches, allbranches, list_branches, pt_limits)
    
    plt.figure()
    H1_hist = get_histogram(H1_df, branch, H1_name)
    H0_hist = get_histogram(H0_df, branch, H0_name)
    
    plt.legend(loc = 'upper right')
    plt.show()

In [21]:
def plot_variables(discs):
    plotframe = pd.DataFrame()
    
    for disc in discs:
        _, _, implist = get_feature_importance_list_BDT(disc, candidate_branches, MELA_branches, list_branches, pt_limits)
        
        # cut the list to select only the 95% most important variables
        cutimplist = {key: val for key, val in implist.iteritems() if val > 0.00}
        curframe = pd.DataFrame(cutimplist, index = [len(plotframe)])
        
        plotframe = pd.concat([plotframe, curframe])
        
    plotframe = plotframe.fillna(0.0)
    
    print plotframe
    print "number of pre-selected input variables = " + str(len(plotframe.columns))
    
    # start the plotting
    parameters = plotframe.columns
    plotdata = np.transpose(plotframe.as_matrix())
    
    plt.close('all')
    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(111)
    cax = ax.matshow(plotdata, cmap = 'Blues')
    
    # make axis labels
    disclabels = []
    for disc in discs:
        if "0j" in disc[0] or "0j" in disc[1]:
            disclabels.append('D_' + re.sub('0j', '', disc[0]) + "_" + re.sub('0j', '', disc[1]) + "_0j")
        elif "01j" in disc[0] or "01j" in disc[1]:
            disclabels.append('D_' + re.sub('01j', '', disc[0]) + "_" + re.sub('01j', '', disc[1]) + "_01j")
        elif "1j" in disc[0] or "1j" in disc[1]:
            disclabels.append('D_' + re.sub('1j', '', disc[0]) + "_" + re.sub('1j', '', disc[1]) + "_1j")
        elif "2j" in disc[0] or "2j" in disc[1]:
            disclabels.append('D_' + re.sub('2j', '', disc[0]) + "_" + re.sub('2j', '', disc[1]) + "_2j")
        else:
            disclabels.append('D_' + disc[0] + "_" + disc[1] + "_2j")
            
    disclabels = np.concatenate([[''], np.array(disclabels)])
    parameters = np.concatenate([[''], np.array(parameters)])
        
    ax.set_xticklabels(disclabels, rotation = 'vertical')
    ax.set_yticklabels(parameters)
    
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    
    # sort the used variables according to their importance
    sorted_implist = []
    for key, val in sorted(cutimplist.iteritems(), key = lambda x: x[1], reverse = True):
        sorted_implist.append((key, val))
    
    return fig, sorted_implist

In [22]:
def append_variables(confhandler, impdict, threshold_fscore):
    confhandler.new_section(impdict["discriminant"])
    cur_sec = confhandler.get_section(impdict["discriminant"])

    periodic_inputs = []
    nonperiodic_inputs = []
    for key, val in impdict.iteritems():
        if val[0] > threshold_fscore and key is not "discriminant":
            if "phi" in key or "Phi" in key:
                periodic_inputs.append(key)
            else:
                nonperiodic_inputs.append(key)
    cur_sec["nonperiodic_columns"] = ConfigFileUtils.serialize_list(nonperiodic_inputs, lambda x: x)
    cur_sec["periodic_columns"] = ConfigFileUtils.serialize_list(periodic_inputs, lambda x: x)

In [23]:
def convert_varname(raw):
    raw = raw.replace('(', '[')
    raw = raw.replace(')', ']')
    return raw

In [24]:
df = pd.DataFrame()

In [25]:
out_dir = "/data_CMS/cms/wind/InputConfigurations/"
out_path = os.path.join(out_dir, "background.conf")
threshold_fscore = 0.01

In [26]:
confhandler = ConfigFileHandler()

In [27]:
%%capture
fig, implist = plot_variables([("ggH", "bck")])

In [28]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ggH_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [29]:
append_variables(confhandler, impdict, threshold_fscore)

In [30]:
implist

[('D_VBF2j_ggH_ME', 0.06371046625366808),
 ('JetPt(JetPt|0)', 0.0629496793826758),
 ('JetPt(JetPt|1)', 0.0549505488533855),
 ('PFMET', 0.05205955874361482),
 ('ZZPt', 0.051581349853276816),
 ('JetEta(JetPt|0)', 0.04684273448538202),
 ('ZZEta', 0.04406042821432453),
 ('JetPhi(JetPt|1)', 0.043821323769155526),
 ('JetEta(JetPt|1)', 0.04288664275622215),
 ('Z1Pt', 0.04236496033039887),
 ('ZZPhi', 0.04108249103358331),
 ('Z2Mass', 0.040778176285186395),
 ('Z2Pt', 0.04056080860776003),
 ('Z1Mass', 0.040213020323877836),
 ('JetEta(JetPt|2)', 0.03977828496902511),
 ('JetPhi(JetPt|0)', 0.038647973046408),
 ('D_WHh_ZHh_ME', 0.03851755243995218),
 ('ZZMassErr', 0.037343766981849796),
 ('D_WHh_ggH_ME', 0.03219215302684491),
 ('JetPt(JetPt|2)', 0.02932289968481687),
 ('D_ZHh_ggH_ME', 0.028170850994457125),
 ('D_VBF2j_WHh_ME', 0.0232366047168786),
 ('D_VBF2j_ZHh_ME', 0.022954026736224323),
 ('JetPhi(JetPt|2)', 0.016824258232800784),
 ('JetPhi(JetPt|3)', 0.005368981632431258),
 ('JetPt(JetPt|3)', 0.0

In [27]:
%%capture
fig, implist = plot_variables([("VBF", "bck")])

In [28]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [29]:
append_variables(confhandler, impdict, threshold_fscore)

In [30]:
implist

[('D_VBF2j_ggH_ME', 0.06371046625366808),
 ('JetPt(JetPt|0)', 0.0629496793826758),
 ('JetPt(JetPt|1)', 0.0549505488533855),
 ('PFMET', 0.05205955874361482),
 ('ZZPt', 0.051581349853276816),
 ('JetEta(JetPt|0)', 0.04684273448538202),
 ('ZZEta', 0.04406042821432453),
 ('JetPhi(JetPt|1)', 0.043821323769155526),
 ('JetEta(JetPt|1)', 0.04288664275622215),
 ('Z1Pt', 0.04236496033039887),
 ('ZZPhi', 0.04108249103358331),
 ('Z2Mass', 0.040778176285186395),
 ('Z2Pt', 0.04056080860776003),
 ('Z1Mass', 0.040213020323877836),
 ('JetEta(JetPt|2)', 0.03977828496902511),
 ('JetPhi(JetPt|0)', 0.038647973046408),
 ('D_WHh_ZHh_ME', 0.03851755243995218),
 ('ZZMassErr', 0.037343766981849796),
 ('D_WHh_ggH_ME', 0.03219215302684491),
 ('JetPt(JetPt|2)', 0.02932289968481687),
 ('D_ZHh_ggH_ME', 0.028170850994457125),
 ('D_VBF2j_WHh_ME', 0.0232366047168786),
 ('D_VBF2j_ZHh_ME', 0.022954026736224323),
 ('JetPhi(JetPt|2)', 0.016824258232800784),
 ('JetPhi(JetPt|3)', 0.005368981632431258),
 ('JetPt(JetPt|3)', 0.0

In [27]:
%%capture
fig, implist = plot_variables([("ZHl", "bck")])

In [28]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [29]:
append_variables(confhandler, impdict, threshold_fscore)

In [30]:
implist

[('D_VBF2j_ggH_ME', 0.06371046625366808),
 ('JetPt(JetPt|0)', 0.0629496793826758),
 ('JetPt(JetPt|1)', 0.0549505488533855),
 ('PFMET', 0.05205955874361482),
 ('ZZPt', 0.051581349853276816),
 ('JetEta(JetPt|0)', 0.04684273448538202),
 ('ZZEta', 0.04406042821432453),
 ('JetPhi(JetPt|1)', 0.043821323769155526),
 ('JetEta(JetPt|1)', 0.04288664275622215),
 ('Z1Pt', 0.04236496033039887),
 ('ZZPhi', 0.04108249103358331),
 ('Z2Mass', 0.040778176285186395),
 ('Z2Pt', 0.04056080860776003),
 ('Z1Mass', 0.040213020323877836),
 ('JetEta(JetPt|2)', 0.03977828496902511),
 ('JetPhi(JetPt|0)', 0.038647973046408),
 ('D_WHh_ZHh_ME', 0.03851755243995218),
 ('ZZMassErr', 0.037343766981849796),
 ('D_WHh_ggH_ME', 0.03219215302684491),
 ('JetPt(JetPt|2)', 0.02932289968481687),
 ('D_ZHh_ggH_ME', 0.028170850994457125),
 ('D_VBF2j_WHh_ME', 0.0232366047168786),
 ('D_VBF2j_ZHh_ME', 0.022954026736224323),
 ('JetPhi(JetPt|2)', 0.016824258232800784),
 ('JetPhi(JetPt|3)', 0.005368981632431258),
 ('JetPt(JetPt|3)', 0.0

In [27]:
%%capture
fig, implist = plot_variables([("ZHh", "bck")])

In [28]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [29]:
append_variables(confhandler, impdict, threshold_fscore)

In [30]:
implist

[('D_VBF2j_ggH_ME', 0.06371046625366808),
 ('JetPt(JetPt|0)', 0.0629496793826758),
 ('JetPt(JetPt|1)', 0.0549505488533855),
 ('PFMET', 0.05205955874361482),
 ('ZZPt', 0.051581349853276816),
 ('JetEta(JetPt|0)', 0.04684273448538202),
 ('ZZEta', 0.04406042821432453),
 ('JetPhi(JetPt|1)', 0.043821323769155526),
 ('JetEta(JetPt|1)', 0.04288664275622215),
 ('Z1Pt', 0.04236496033039887),
 ('ZZPhi', 0.04108249103358331),
 ('Z2Mass', 0.040778176285186395),
 ('Z2Pt', 0.04056080860776003),
 ('Z1Mass', 0.040213020323877836),
 ('JetEta(JetPt|2)', 0.03977828496902511),
 ('JetPhi(JetPt|0)', 0.038647973046408),
 ('D_WHh_ZHh_ME', 0.03851755243995218),
 ('ZZMassErr', 0.037343766981849796),
 ('D_WHh_ggH_ME', 0.03219215302684491),
 ('JetPt(JetPt|2)', 0.02932289968481687),
 ('D_ZHh_ggH_ME', 0.028170850994457125),
 ('D_VBF2j_WHh_ME', 0.0232366047168786),
 ('D_VBF2j_ZHh_ME', 0.022954026736224323),
 ('JetPhi(JetPt|2)', 0.016824258232800784),
 ('JetPhi(JetPt|3)', 0.005368981632431258),
 ('JetPt(JetPt|3)', 0.0

In [27]:
%%capture
fig, implist = plot_variables([("WHl", "bck")])

In [28]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [29]:
append_variables(confhandler, impdict, threshold_fscore)

In [30]:
implist

[('D_VBF2j_ggH_ME', 0.06371046625366808),
 ('JetPt(JetPt|0)', 0.0629496793826758),
 ('JetPt(JetPt|1)', 0.0549505488533855),
 ('PFMET', 0.05205955874361482),
 ('ZZPt', 0.051581349853276816),
 ('JetEta(JetPt|0)', 0.04684273448538202),
 ('ZZEta', 0.04406042821432453),
 ('JetPhi(JetPt|1)', 0.043821323769155526),
 ('JetEta(JetPt|1)', 0.04288664275622215),
 ('Z1Pt', 0.04236496033039887),
 ('ZZPhi', 0.04108249103358331),
 ('Z2Mass', 0.040778176285186395),
 ('Z2Pt', 0.04056080860776003),
 ('Z1Mass', 0.040213020323877836),
 ('JetEta(JetPt|2)', 0.03977828496902511),
 ('JetPhi(JetPt|0)', 0.038647973046408),
 ('D_WHh_ZHh_ME', 0.03851755243995218),
 ('ZZMassErr', 0.037343766981849796),
 ('D_WHh_ggH_ME', 0.03219215302684491),
 ('JetPt(JetPt|2)', 0.02932289968481687),
 ('D_ZHh_ggH_ME', 0.028170850994457125),
 ('D_VBF2j_WHh_ME', 0.0232366047168786),
 ('D_VBF2j_ZHh_ME', 0.022954026736224323),
 ('JetPhi(JetPt|2)', 0.016824258232800784),
 ('JetPhi(JetPt|3)', 0.005368981632431258),
 ('JetPt(JetPt|3)', 0.0

In [27]:
%%capture
fig, implist = plot_variables([("WHh", "bck")])

In [28]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [29]:
append_variables(confhandler, impdict, threshold_fscore)

In [30]:
implist

[('D_VBF2j_ggH_ME', 0.06371046625366808),
 ('JetPt(JetPt|0)', 0.0629496793826758),
 ('JetPt(JetPt|1)', 0.0549505488533855),
 ('PFMET', 0.05205955874361482),
 ('ZZPt', 0.051581349853276816),
 ('JetEta(JetPt|0)', 0.04684273448538202),
 ('ZZEta', 0.04406042821432453),
 ('JetPhi(JetPt|1)', 0.043821323769155526),
 ('JetEta(JetPt|1)', 0.04288664275622215),
 ('Z1Pt', 0.04236496033039887),
 ('ZZPhi', 0.04108249103358331),
 ('Z2Mass', 0.040778176285186395),
 ('Z2Pt', 0.04056080860776003),
 ('Z1Mass', 0.040213020323877836),
 ('JetEta(JetPt|2)', 0.03977828496902511),
 ('JetPhi(JetPt|0)', 0.038647973046408),
 ('D_WHh_ZHh_ME', 0.03851755243995218),
 ('ZZMassErr', 0.037343766981849796),
 ('D_WHh_ggH_ME', 0.03219215302684491),
 ('JetPt(JetPt|2)', 0.02932289968481687),
 ('D_ZHh_ggH_ME', 0.028170850994457125),
 ('D_VBF2j_WHh_ME', 0.0232366047168786),
 ('D_VBF2j_ZHh_ME', 0.022954026736224323),
 ('JetPhi(JetPt|2)', 0.016824258232800784),
 ('JetPhi(JetPt|3)', 0.005368981632431258),
 ('JetPt(JetPt|3)', 0.0

In [27]:
%%capture
fig, implist = plot_variables([("ZHMET", "bck")])

In [28]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [29]:
append_variables(confhandler, impdict, threshold_fscore)

In [30]:
implist

[('D_VBF2j_ggH_ME', 0.06371046625366808),
 ('JetPt(JetPt|0)', 0.0629496793826758),
 ('JetPt(JetPt|1)', 0.0549505488533855),
 ('PFMET', 0.05205955874361482),
 ('ZZPt', 0.051581349853276816),
 ('JetEta(JetPt|0)', 0.04684273448538202),
 ('ZZEta', 0.04406042821432453),
 ('JetPhi(JetPt|1)', 0.043821323769155526),
 ('JetEta(JetPt|1)', 0.04288664275622215),
 ('Z1Pt', 0.04236496033039887),
 ('ZZPhi', 0.04108249103358331),
 ('Z2Mass', 0.040778176285186395),
 ('Z2Pt', 0.04056080860776003),
 ('Z1Mass', 0.040213020323877836),
 ('JetEta(JetPt|2)', 0.03977828496902511),
 ('JetPhi(JetPt|0)', 0.038647973046408),
 ('D_WHh_ZHh_ME', 0.03851755243995218),
 ('ZZMassErr', 0.037343766981849796),
 ('D_WHh_ggH_ME', 0.03219215302684491),
 ('JetPt(JetPt|2)', 0.02932289968481687),
 ('D_ZHh_ggH_ME', 0.028170850994457125),
 ('D_VBF2j_WHh_ME', 0.0232366047168786),
 ('D_VBF2j_ZHh_ME', 0.022954026736224323),
 ('JetPhi(JetPt|2)', 0.016824258232800784),
 ('JetPhi(JetPt|3)', 0.005368981632431258),
 ('JetPt(JetPt|3)', 0.0

In [27]:
%%capture
fig, implist = plot_variables([("ttHh", "bck")])

In [28]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [29]:
append_variables(confhandler, impdict, threshold_fscore)

In [30]:
implist

[('D_VBF2j_ggH_ME', 0.06371046625366808),
 ('JetPt(JetPt|0)', 0.0629496793826758),
 ('JetPt(JetPt|1)', 0.0549505488533855),
 ('PFMET', 0.05205955874361482),
 ('ZZPt', 0.051581349853276816),
 ('JetEta(JetPt|0)', 0.04684273448538202),
 ('ZZEta', 0.04406042821432453),
 ('JetPhi(JetPt|1)', 0.043821323769155526),
 ('JetEta(JetPt|1)', 0.04288664275622215),
 ('Z1Pt', 0.04236496033039887),
 ('ZZPhi', 0.04108249103358331),
 ('Z2Mass', 0.040778176285186395),
 ('Z2Pt', 0.04056080860776003),
 ('Z1Mass', 0.040213020323877836),
 ('JetEta(JetPt|2)', 0.03977828496902511),
 ('JetPhi(JetPt|0)', 0.038647973046408),
 ('D_WHh_ZHh_ME', 0.03851755243995218),
 ('ZZMassErr', 0.037343766981849796),
 ('D_WHh_ggH_ME', 0.03219215302684491),
 ('JetPt(JetPt|2)', 0.02932289968481687),
 ('D_ZHh_ggH_ME', 0.028170850994457125),
 ('D_VBF2j_WHh_ME', 0.0232366047168786),
 ('D_VBF2j_ZHh_ME', 0.022954026736224323),
 ('JetPhi(JetPt|2)', 0.016824258232800784),
 ('JetPhi(JetPt|3)', 0.005368981632431258),
 ('JetPt(JetPt|3)', 0.0

In [27]:
%%capture
fig, implist = plot_variables([("ttHl", "bck")])

In [28]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_bkg_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [29]:
append_variables(confhandler, impdict, threshold_fscore)

In [30]:
implist

[('D_VBF2j_ggH_ME', 0.06371046625366808),
 ('JetPt(JetPt|0)', 0.0629496793826758),
 ('JetPt(JetPt|1)', 0.0549505488533855),
 ('PFMET', 0.05205955874361482),
 ('ZZPt', 0.051581349853276816),
 ('JetEta(JetPt|0)', 0.04684273448538202),
 ('ZZEta', 0.04406042821432453),
 ('JetPhi(JetPt|1)', 0.043821323769155526),
 ('JetEta(JetPt|1)', 0.04288664275622215),
 ('Z1Pt', 0.04236496033039887),
 ('ZZPhi', 0.04108249103358331),
 ('Z2Mass', 0.040778176285186395),
 ('Z2Pt', 0.04056080860776003),
 ('Z1Mass', 0.040213020323877836),
 ('JetEta(JetPt|2)', 0.03977828496902511),
 ('JetPhi(JetPt|0)', 0.038647973046408),
 ('D_WHh_ZHh_ME', 0.03851755243995218),
 ('ZZMassErr', 0.037343766981849796),
 ('D_WHh_ggH_ME', 0.03219215302684491),
 ('JetPt(JetPt|2)', 0.02932289968481687),
 ('D_ZHh_ggH_ME', 0.028170850994457125),
 ('D_VBF2j_WHh_ME', 0.0232366047168786),
 ('D_VBF2j_ZHh_ME', 0.022954026736224323),
 ('JetPhi(JetPt|2)', 0.016824258232800784),
 ('JetPhi(JetPt|3)', 0.005368981632431258),
 ('JetPt(JetPt|3)', 0.0

In [27]:
%%capture
fig, implist = plot_variables([("VBF2j", "ggH2j")])

In [28]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ggH_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [29]:
append_variables(confhandler, impdict, threshold_fscore)

In [30]:
implist

[('D_VBF2j_ggH_ME', 0.06371046625366808),
 ('JetPt(JetPt|0)', 0.0629496793826758),
 ('JetPt(JetPt|1)', 0.0549505488533855),
 ('PFMET', 0.05205955874361482),
 ('ZZPt', 0.051581349853276816),
 ('JetEta(JetPt|0)', 0.04684273448538202),
 ('ZZEta', 0.04406042821432453),
 ('JetPhi(JetPt|1)', 0.043821323769155526),
 ('JetEta(JetPt|1)', 0.04288664275622215),
 ('Z1Pt', 0.04236496033039887),
 ('ZZPhi', 0.04108249103358331),
 ('Z2Mass', 0.040778176285186395),
 ('Z2Pt', 0.04056080860776003),
 ('Z1Mass', 0.040213020323877836),
 ('JetEta(JetPt|2)', 0.03977828496902511),
 ('JetPhi(JetPt|0)', 0.038647973046408),
 ('D_WHh_ZHh_ME', 0.03851755243995218),
 ('ZZMassErr', 0.037343766981849796),
 ('D_WHh_ggH_ME', 0.03219215302684491),
 ('JetPt(JetPt|2)', 0.02932289968481687),
 ('D_ZHh_ggH_ME', 0.028170850994457125),
 ('D_VBF2j_WHh_ME', 0.0232366047168786),
 ('D_VBF2j_ZHh_ME', 0.022954026736224323),
 ('JetPhi(JetPt|2)', 0.016824258232800784),
 ('JetPhi(JetPt|3)', 0.005368981632431258),
 ('JetPt(JetPt|3)', 0.0

In [31]:
%%capture
fig, implist = plot_variables([("VBF1j", "ggH1j")])

In [32]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ggH_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [33]:
append_variables(confhandler, impdict, threshold_fscore)

In [34]:
implist

[('JetEta(JetPt|0)', 0.20724951195061467),
 ('D_VBF1j_ggH_ME', 0.0985068326913945),
 ('JetPt(JetPt|0)', 0.09407481665171741),
 ('ZZPt', 0.08964280061204032),
 ('ZZEta', 0.08294201445681422),
 ('PFMET', 0.08067324434126523),
 ('Z1Pt', 0.056138869835909884),
 ('Z2Pt', 0.05117923283912837),
 ('Z1Mass', 0.04843560386218541),
 ('ZZMassErr', 0.04400358782250831),
 ('ZZPhi', 0.043423204769693455),
 ('JetPhi(JetPt|0)', 0.04083786207988181),
 ('Z2Mass', 0.04052128950561917),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.00554002004959637),
 ('Z2Flav', 0.005065161188202396),
 ('ExtraLepPt(ExtraLepPt|0)', 0.003218487838336939),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.002954677359784731),
 ('Z1Flav', 0.002374294306969873),
 ('ZZMass_masked', 0.001530100775602807),
 ('ExtraLepEta(ExtraLepPt|0)', 0.001371814488471482),
 ('nExtraLep', 0.00031657257426264974)]

In [35]:
%%capture
fig, implist = plot_variables([("VBF0j", "ggH0j")])

In [36]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ggH_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [37]:
append_variables(confhandler, impdict, threshold_fscore)

In [38]:
implist

[('ZZPt', 0.17382291975125852),
 ('PFMET', 0.12652819493210374),
 ('ZZEta', 0.10647658530394687),
 ('Z1Pt', 0.10427683066119549),
 ('Z1Mass', 0.10169634925335251),
 ('ZZPhi', 0.10051186598417869),
 ('Z2Pt', 0.09302423960404417),
 ('Z2Mass', 0.09006303143110961),
 ('ZZMassErr', 0.08845551842294513),
 ('Z1Flav', 0.006345446084859765),
 ('Z2Flav', 0.0043149033377046404),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0017767249037607344),
 ('ZZMass_masked', 0.0008883624518803672),
 ('nExtraLep', 0.0008883624518803672),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.000803756504082237),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0001269089216971953)]

In [39]:
%%capture
fig, implist = plot_variables([("WHh2j", "ggH2j")])

In [40]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ggH_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [41]:
append_variables(confhandler, impdict, threshold_fscore)

In [42]:
implist

[('JetPt(JetPt|0)', 0.06072498628518378),
 ('ZZEta', 0.05551335612102798),
 ('JetPt(JetPt|1)', 0.055239059796598725),
 ('PFMET', 0.05243279740051483),
 ('D_VBF2j_ggH_ME', 0.051820905599864964),
 ('ZZPt', 0.050913617757522046),
 ('JetEta(JetPt|0)', 0.05015402793602566),
 ('D_WHh_ggH_ME', 0.04521669409629911),
 ('D_WHh_ZHh_ME', 0.04365531501877875),
 ('ZZPhi', 0.04222053424484112),
 ('Z1Pt', 0.04184073933409292),
 ('JetEta(JetPt|2)', 0.040870152339958644),
 ('JetPhi(JetPt|0)', 0.036797906908047434),
 ('Z2Pt', 0.03646031143182681),
 ('ZZMassErr', 0.03641811199729924),
 ('D_ZHh_ggH_ME', 0.03576402076212179),
 ('Z1Mass', 0.03485673291977887),
 ('JetEta(JetPt|1)', 0.034392539139975524),
 ('Z2Mass', 0.032577963455289696),
 ('JetPhi(JetPt|1)', 0.02989829936278854),
 ('JetPhi(JetPt|2)', 0.027113136683968433),
 ('JetPt(JetPt|2)', 0.025530657889184283),
 ('D_VBF2j_ZHh_ME', 0.023610583618179515),
 ('D_VBF2j_WHh_ME', 0.016689876355656833),
 ('JetEta(JetPt|3)', 0.008271089167405156),
 ('JetPt(JetPt|

In [43]:
%%capture
fig, implist = plot_variables([("WHh1j", "ggH1j")])

In [44]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ggH_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [45]:
append_variables(confhandler, impdict, threshold_fscore)

In [46]:
implist

[('JetEta(JetPt|0)', 0.14258555133079848),
 ('ZZEta', 0.1153041825095057),
 ('JetPt(JetPt|0)', 0.0969819391634981),
 ('D_VBF1j_ggH_ME', 0.0783032319391635),
 ('ZZPt', 0.07412072243346007),
 ('PFMET', 0.07281368821292776),
 ('ZZMassErr', 0.06670627376425856),
 ('Z1Pt', 0.058911596958174904),
 ('Z2Mass', 0.0587690114068441),
 ('ZZPhi', 0.05805608365019011),
 ('Z1Mass', 0.05796102661596958),
 ('Z2Pt', 0.05432509505703422),
 ('JetPhi(JetPt|0)', 0.050071292775665396),
 ('Z2Flav', 0.003873574144486692),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0028517110266159697),
 ('Z1Flav', 0.002756653992395437),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0025427756653992393),
 ('ZZMass_masked', 0.001473384030418251),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0013783269961977185),
 ('nExtraLep', 0.00021387832699619773)]

In [47]:
%%capture
fig, implist = plot_variables([("WHh0j", "ggH0j")])

In [48]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ggH_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [49]:
append_variables(confhandler, impdict, threshold_fscore)

In [50]:
implist

[('ZZPt', 0.169536),
 ('ZZEta', 0.122752),
 ('PFMET', 0.119552),
 ('ZZMassErr', 0.113792),
 ('Z2Mass', 0.09472),
 ('Z1Pt', 0.093376),
 ('Z1Mass', 0.092416),
 ('ZZPhi', 0.09216),
 ('Z2Pt', 0.081408),
 ('ZZMass_masked', 0.006144),
 ('Z1Flav', 0.004672),
 ('Z2Flav', 0.004288),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.00416),
 ('ExtraLepPt(ExtraLepPt|0)', 0.000384),
 ('nExtraLep', 0.00032),
 ('ExtraLepEta(ExtraLepPt|0)', 0.00032)]

In [51]:
%%capture
fig, implist = plot_variables([("ZHh2j", "ggH2j")])

In [52]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ggH_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [53]:
append_variables(confhandler, impdict, threshold_fscore)

In [54]:
implist

[('JetPt(JetPt|0)', 0.057621521208159894),
 ('D_ZHh_ggH_ME', 0.055351061268017476),
 ('D_VBF2j_ggH_ME', 0.0544566376552341),
 ('JetPt(JetPt|1)', 0.05339020950153084),
 ('ZZPt', 0.051945371357803846),
 ('JetEta(JetPt|0)', 0.048058068733014554),
 ('ZZEta', 0.047782861467542743),
 ('Z1Pt', 0.04774846055935877),
 ('Z2Mass', 0.04255392342357838),
 ('D_WHh_ZHh_ME', 0.04100588255529946),
 ('JetEta(JetPt|1)', 0.04031786439161994),
 ('ZZPhi', 0.03897622897244487),
 ('JetEta(JetPt|2)', 0.03708417902232619),
 ('JetPhi(JetPt|0)', 0.03615535450135884),
 ('PFMET', 0.03553613815404727),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03488252089855172),
 ('D_WHh_ggH_ME', 0.03371289002029654),
 ('Z2Pt', 0.032680862774777254),
 ('Z1Mass', 0.0308920155492105),
 ('JetPhi(JetPt|1)', 0.029240771956379648),
 ('ZZMassErr', 0.027967938353572534),
 ('JetPhi(JetPt|2)', 0.026660703842581443),
 ('JetPt(JetPt|2)', 0.023874230279679384),
 ('D_VBF2j_WHh_ME', 0.02095015308404142),
 ('D_VBF2j_ZHh_ME', 0.01606522412191682),
 ('J

In [55]:
%%capture
fig, implist = plot_variables([("ZHh1j", "ggH1j")])

In [56]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ggH_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [57]:
append_variables(confhandler, impdict, threshold_fscore)

In [58]:
implist

[('ZZEta', 0.12199506668338977),
 ('JetEta(JetPt|0)', 0.1219532589155065),
 ('D_VBF1j_ggH_ME', 0.0972866758643756),
 ('JetPt(JetPt|0)', 0.09473640202349597),
 ('ZZPhi', 0.07638279192273925),
 ('ZZPt', 0.07425059576069233),
 ('PFMET', 0.06124837994899452),
 ('Z1Mass', 0.06024499351979598),
 ('Z2Mass', 0.05819641289351562),
 ('ZZMassErr', 0.05552071574898616),
 ('Z1Pt', 0.052176094318324345),
 ('JetPhi(JetPt|0)', 0.0497930515489778),
 ('Z2Pt', 0.0465738534219658),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.01216606045403236),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0038045068773778167),
 ('Z2Flav', 0.0032610058948952716),
 ('ZZMass_masked', 0.0031355825912454532),
 ('nExtraLep', 0.002884735983945817),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.001755926251097454),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0014214641080312722),
 ('Z1Flav', 0.0012124252686149086)]

In [59]:
%%capture
fig, implist = plot_variables([("ZHh0j", "ggH0j")])

In [60]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ggH_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [61]:
append_variables(confhandler, impdict, threshold_fscore)

In [62]:
implist

[('ZZPt', 0.15282898919262555),
 ('Z2Mass', 0.13337571519389702),
 ('ZZPhi', 0.11341385886840433),
 ('Z2Pt', 0.10133502860775588),
 ('ZZEta', 0.10006357279084552),
 ('PFMET', 0.09675778766687858),
 ('ZZMassErr', 0.09548633184996821),
 ('Z1Pt', 0.09218054672600128),
 ('Z1Mass', 0.09065479974570884),
 ('Z1Flav', 0.009663064208518753),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0052129688493324855),
 ('Z2Flav', 0.003687221869040051),
 ('ZZMass_masked', 0.0024157660521296883),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.001652892561983471),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0012714558169103624)]

In [63]:
%%capture
fig, implist = plot_variables([("WHh2j", "ZHh2j")])

In [64]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ZHh_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [65]:
append_variables(confhandler, impdict, threshold_fscore)

In [66]:
implist

[('D_WHh_ZHh_ME', 0.07026611720332472),
 ('JetPhi(JetPt|1)', 0.05448068729482433),
 ('PFMET', 0.052105888105049944),
 ('Z2Mass', 0.04931200670531536),
 ('Z1Mass', 0.04791506600544807),
 ('D_WHh_ggH_ME', 0.045260878675700215),
 ('ZZMassErr', 0.045191031640706855),
 ('JetPt(JetPt|1)', 0.04330516169588601),
 ('JetEta(JetPt|0)', 0.04302577355591255),
 ('JetEta(JetPt|1)', 0.040511280296151427),
 ('JetPhi(JetPt|0)', 0.03981280994621778),
 ('Z1Pt', 0.039254033666270866),
 ('Z2Pt', 0.038485716281343854),
 ('JetPt(JetPt|0)', 0.038415869246350494),
 ('ZZEta', 0.03813648110637703),
 ('ZZPhi', 0.033177341621848155),
 ('JetPhi(JetPt|2)', 0.03219948313194105),
 ('D_ZHh_ggH_ME', 0.031221624642033947),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.028427743242299363),
 ('JetPt(JetPt|2)', 0.027519731787385624),
 ('ZZPt', 0.027310190682405533),
 ('D_VBF2j_WHh_ME', 0.025983097017531605),
 ('D_VBF2j_ggH_ME', 0.024446462247677585),
 ('D_VBF2j_ZHh_ME', 0.023049521547810297),
 ('JetEta(JetPt|2)', 0.01711252357337431

In [67]:
%%capture
fig, implist = plot_variables([("WHh1j", "ZHh1j")])

In [68]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ZHh_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [69]:
append_variables(confhandler, impdict, threshold_fscore)

In [70]:
implist

[('ZZMassErr', 0.09783308302939284),
 ('Z2Mass', 0.09343488521776443),
 ('PFMET', 0.09246942716155332),
 ('ZZEta', 0.0836730315382965),
 ('JetPt(JetPt|0)', 0.08056211113494958),
 ('JetEta(JetPt|0)', 0.07294571980261746),
 ('Z1Mass', 0.07219480798111994),
 ('JetPhi(JetPt|0)', 0.06994207251662733),
 ('Z2Pt', 0.06747479081742115),
 ('ZZPt', 0.06672387899592362),
 ('ZZPhi', 0.06522205535292856),
 ('D_VBF1j_ggH_ME', 0.05975112636773225),
 ('Z1Pt', 0.04644925981549024),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.018450976185367948),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0037545591074876635),
 ('Z1Flav', 0.003325466638060502),
 ('Z2Flav', 0.003003647285990131),
 ('ZZMass_masked', 0.0025745548165629693),
 ('nExtraLep', 0.00010727311735679038),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.00010727311735679038)]

In [71]:
%%capture
fig, implist = plot_variables([("WHh0j", "ZHh0j")])

In [72]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ZHh_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [73]:
append_variables(confhandler, impdict, threshold_fscore)

In [74]:
implist

[('PFMET', 0.18597560975609756),
 ('ZZPhi', 0.1524390243902439),
 ('ZZPt', 0.1402439024390244),
 ('ZZEta', 0.125),
 ('Z1Pt', 0.11890243902439024),
 ('Z2Pt', 0.07926829268292683),
 ('Z2Mass', 0.07317073170731707),
 ('Z1Mass', 0.06402439024390244),
 ('ZZMassErr', 0.04573170731707317),
 ('nExtraLep', 0.01524390243902439)]

In [75]:
%%capture
fig, implist = plot_variables([("VBF2j", "WHh2j")])

In [76]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_WHh_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [77]:
append_variables(confhandler, impdict, threshold_fscore)

In [78]:
implist

[('D_VBF2j_ggH_ME', 0.06844674530514393),
 ('JetEta(JetPt|2)', 0.06769973767871723),
 ('JetEta(JetPt|0)', 0.06307871375710092),
 ('PFMET', 0.05029272275593697),
 ('JetEta(JetPt|1)', 0.048034327606274865),
 ('ZZEta', 0.046019144241960984),
 ('JetPt(JetPt|2)', 0.044316661744523395),
 ('JetPt(JetPt|0)', 0.04429928947414138),
 ('D_WHh_ZHh_ME', 0.04363914319962476),
 ('D_WHh_ggH_ME', 0.04176293799836701),
 ('JetPt(JetPt|1)', 0.03887914111495231),
 ('Z2Mass', 0.03781943262164932),
 ('Z2Pt', 0.0369334468321665),
 ('ZZPhi', 0.03589111060924552),
 ('Z1Pt', 0.035647898823897295),
 ('ZZPt', 0.035005124819762694),
 ('Z1Mass', 0.03444921216753818),
 ('JetPhi(JetPt|0)', 0.03417125584142591),
 ('D_ZHh_ggH_ME', 0.030175633653562184),
 ('ZZMassErr', 0.030054027760888072),
 ('JetPhi(JetPt|1)', 0.026492712332574744),
 ('D_VBF2j_ZHh_ME', 0.024373295345968764),
 ('D_VBF2j_WHh_ME', 0.019092125149835833),
 ('JetPhi(JetPt|2)', 0.018397234334555184),
 ('JetEta(JetPt|3)', 0.010788179907232076),
 ('JetPt(JetPt|3

In [79]:
%%capture
fig, implist = plot_variables([("VBF1j", "WHh1j")])

In [80]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_WHh_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [81]:
append_variables(confhandler, impdict, threshold_fscore)

In [82]:
implist

[('JetEta(JetPt|0)', 0.16556324859756405),
 ('ZZEta', 0.14683883357760147),
 ('JetPt(JetPt|0)', 0.10974377116288472),
 ('PFMET', 0.0697932986304139),
 ('D_VBF1j_ggH_ME', 0.06446151513620053),
 ('JetPhi(JetPt|0)', 0.06276848435841714),
 ('ZZPt', 0.06110072269672007),
 ('ZZMassErr', 0.055768939202506695),
 ('ZZPhi', 0.05306514378127053),
 ('Z2Pt', 0.052130186486076716),
 ('Z1Pt', 0.05205437913781776),
 ('Z2Mass', 0.051346843887400816),
 ('Z1Mass', 0.048819932278768886),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.002779602769495123),
 ('Z2Flav', 0.002122605751250821),
 ('Z1Flav', 0.0014908778490928388),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0001010764643452772),
 ('nExtraLep', 5.05382321726386e-05)]

In [83]:
%%capture
fig, implist = plot_variables([("VBF0j", "WHh0j")])

In [84]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_WHh_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [85]:
append_variables(confhandler, impdict, threshold_fscore)

In [86]:
implist

[('ZZEta', 0.1443691786621507),
 ('ZZMassErr', 0.1339260513688964),
 ('PFMET', 0.13279706463449054),
 ('ZZPhi', 0.1100762066045724),
 ('Z1Mass', 0.10556025966694892),
 ('Z2Mass', 0.09187129551227773),
 ('Z2Pt', 0.08975444538526672),
 ('Z1Pt', 0.08947219870166526),
 ('ZZPt', 0.08763759525825572),
 ('Z2Flav', 0.006209427039232289),
 ('nExtraLep', 0.00183460344340954),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.0015523567598080723),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0015523567598080723),
 ('Z1Flav', 0.0014112334180073384),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0012701100762066045),
 ('ZZMass_masked', 0.0007056167090036692)]

In [87]:
%%capture
fig, implist = plot_variables([("VBF2j", "ZHh2j")])

In [88]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ZHh_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [89]:
append_variables(confhandler, impdict, threshold_fscore)

In [90]:
implist

[('JetEta(JetPt|2)', 0.07592682531973449),
 ('D_VBF2j_ggH_ME', 0.06964006259780908),
 ('JetEta(JetPt|0)', 0.06278668177648265),
 ('ZZEta', 0.061248718363822785),
 ('PFMET', 0.047784793049484645),
 ('D_ZHh_ggH_ME', 0.04454697533862177),
 ('JetEta(JetPt|1)', 0.042037666612703035),
 ('D_WHh_ZHh_ME', 0.038934757973126115),
 ('JetPt(JetPt|2)', 0.038206248988181965),
 ('Z2Mass', 0.035562031190977283),
 ('Z1Mass', 0.034131995035346176),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03302574065080136),
 ('D_VBF2j_ZHh_ME', 0.03283686795100103),
 ('D_VBF2j_WHh_ME', 0.0317575953807134),
 ('Z2Pt', 0.03154174086665587),
 ('ZZMassErr', 0.03135286816685554),
 ('JetPhi(JetPt|0)', 0.030678322810425773),
 ('JetPhi(JetPt|1)', 0.03011170471102477),
 ('Z1Pt', 0.02892450488370838),
 ('JetPt(JetPt|1)', 0.028519777669850522),
 ('ZZPt', 0.027413523285305705),
 ('JetPt(JetPt|0)', 0.027062759699962226),
 ('ZZPhi', 0.025848578058388645),
 ('D_WHh_ggH_ME', 0.02479628730235821),
 ('JetPhi(JetPt|2)', 0.021855269548324428),


In [91]:
%%capture
fig, implist = plot_variables([("VBF1j", "ZHh1j")])

In [92]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ZHh_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [93]:
append_variables(confhandler, impdict, threshold_fscore)

In [94]:
implist

[('ZZEta', 0.15461847389558234),
 ('JetEta(JetPt|0)', 0.1495547407019382),
 ('JetPt(JetPt|0)', 0.09493044642337466),
 ('PFMET', 0.07298760258425005),
 ('ZZPt', 0.06454804726150981),
 ('Z2Mass', 0.06364588789942378),
 ('Z1Mass', 0.061259530877131714),
 ('D_VBF1j_ggH_ME', 0.06102671555788371),
 ('ZZMassErr', 0.05951341598277167),
 ('Z1Pt', 0.04921133810604738),
 ('ZZPhi', 0.04909493044642337),
 ('Z2Pt', 0.04528257959373727),
 ('JetPhi(JetPt|0)', 0.03838542576101508),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.01682090681566847),
 ('Z2Flav', 0.008148536173680228),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.004277981491182119),
 ('Z1Flav', 0.0018043187241720505),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0017752168092660496),
 ('ExtraLepPt(ExtraLepPt|0)', 0.001717012979454048),
 ('ZZMass_masked', 0.0009312612769920261),
 ('nExtraLep', 0.00046563063849601303)]

In [95]:
%%capture
fig, implist = plot_variables([("VBF0j", "ZHh0j")])

In [96]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ZHh_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [97]:
append_variables(confhandler, impdict, threshold_fscore)

In [98]:
implist

[('ZZPhi', 0.14727272727272728),
 ('PFMET', 0.1315151515151515),
 ('ZZPt', 0.12303030303030303),
 ('ZZEta', 0.11757575757575757),
 ('Z2Mass', 0.11393939393939394),
 ('Z2Pt', 0.11333333333333333),
 ('Z1Mass', 0.07454545454545454),
 ('ZZMassErr', 0.07151515151515152),
 ('Z1Pt', 0.06303030303030303),
 ('ExtraLepPt(ExtraLepPt|0)', 0.02181818181818182),
 ('Z1Flav', 0.011515151515151515),
 ('ExtraLepEta(ExtraLepPt|0)', 0.007272727272727273),
 ('Z2Flav', 0.0036363636363636364)]

In [99]:
%%capture
fig, implist = plot_variables([("WHl", "ggH")])

In [100]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [101]:
append_variables(confhandler, impdict, threshold_fscore)

In [102]:
implist

[('PFMET', 0.1271917505918228),
 ('Z1Mass', 0.09278578020302532),
 ('ZZPt', 0.08861292781767845),
 ('Z2Mass', 0.07069774906712675),
 ('ZZMassErr', 0.06696625606869157),
 ('Z1Pt', 0.06389680215062392),
 ('Z2Pt', 0.06177025237732215),
 ('JetPt(JetPt|0)', 0.06160975805480881),
 ('ZZEta', 0.061328892990410465),
 ('ZZPhi', 0.06050635958752959),
 ('JetEta(JetPt|0)', 0.04582112907755888),
 ('JetPhi(JetPt|0)', 0.04086586686995947),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03488745335633752),
 ('ZZMass_masked', 0.02816675360109136),
 ('JetEta(JetPt|1)', 0.012498495365726437),
 ('JetPhi(JetPt|1)', 0.011415158688761385),
 ('ExtraLepEta(ExtraLepPt|0)', 0.011154355414677205),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.010592625285880512),
 ('JetPt(JetPt|1)', 0.009328732496087951),
 ('nCleanedJetsPt30', 0.007864221803153713),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.007763912851582875),
 ('JetPhi(JetPt|2)', 0.004132728804718533),
 ('nExtraZ', 0.003811740159691851),
 ('JetEta(JetPt|2)', 0.0030493921277534807),
 ('Z2Flav',

In [103]:
%%capture
fig, implist = plot_variables([("WHl", "VBF")])

In [104]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [105]:
append_variables(confhandler, impdict, threshold_fscore)

In [106]:
implist

[('JetEta(JetPt|0)', 0.1203573997652671),
 ('PFMET', 0.10733351001400826),
 ('ZZEta', 0.07163139363192367),
 ('JetPt(JetPt|0)', 0.06290463029568773),
 ('Z1Mass', 0.06176882595691516),
 ('Z2Mass', 0.05482148941808958),
 ('ZZMassErr', 0.052512020595918675),
 ('JetEta(JetPt|1)', 0.05084617423238557),
 ('Z1Pt', 0.04912353765191383),
 ('ZZPt', 0.04617044637110514),
 ('ExtraLepEta(ExtraLepPt|0)', 0.045753984780221864),
 ('Z2Pt', 0.04359595653655397),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04126755764207019),
 ('ZZPhi', 0.04045356453261652),
 ('JetPhi(JetPt|0)', 0.038844508386022034),
 ('ZZMass_masked', 0.02866012948169462),
 ('JetPt(JetPt|1)', 0.020539128459470715),
 ('JetPhi(JetPt|1)', 0.014822246620982093),
 ('JetEta(JetPt|2)', 0.010411539772081929),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.009162154999432098),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.005395070609169727),
 ('JetPhi(JetPt|2)', 0.0045242872827774205),
 ('Z2Flav', 0.003956385113391133),
 ('JetPt(JetPt|2)', 0.0030477416423730736),
 ('JetPt(JetP

In [107]:
%%capture
fig, implist = plot_variables([("WHl", "WHh")])

In [108]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [109]:
append_variables(confhandler, impdict, threshold_fscore)

In [110]:
implist

[('PFMET', 0.12598595141859278),
 ('JetPt(JetPt|0)', 0.06857512851705058),
 ('ZZMassErr', 0.06675038260801318),
 ('Z1Mass', 0.06669151983675392),
 ('ZZPt', 0.06500412039398815),
 ('JetEta(JetPt|0)', 0.06035396146450575),
 ('Z2Mass', 0.060177373150727936),
 ('Z2Pt', 0.05805831338539418),
 ('Z1Pt', 0.055487972373739355),
 ('ZZPhi', 0.048718753678923206),
 ('ZZEta', 0.043421104265588824),
 ('JetPhi(JetPt|0)', 0.042381195306675036),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04177294667032924),
 ('ZZMass_masked', 0.03390495624534003),
 ('JetPt(JetPt|1)', 0.028352234823215475),
 ('JetEta(JetPt|1)', 0.025723031040301377),
 ('JetPhi(JetPt|1)', 0.021563395204646233),
 ('ExtraLepEta(ExtraLepPt|0)', 0.02056272809323863),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.014048581407212652),
 ('nCleanedJetsPt30', 0.010222501275360044),
 ('JetPt(JetPt|2)', 0.008456618137581917),
 ('JetEta(JetPt|2)', 0.007514813797433583),
 ('JetPhi(JetPt|2)', 0.006259074677235805),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.005219165718322019),
 

In [111]:
%%capture
fig, implist = plot_variables([("WHl", "ZHh")])

In [112]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [113]:
append_variables(confhandler, impdict, threshold_fscore)

In [114]:
implist

[('PFMET', 0.16196661539338006),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0636358457243776),
 ('Z2Pt', 0.06095824075656583),
 ('JetEta(JetPt|0)', 0.059875804705748305),
 ('JetPt(JetPt|0)', 0.059306101521107504),
 ('ExtraLepPt(ExtraLepPt|0)', 0.05697031846408022),
 ('Z1Mass', 0.052184811713097475),
 ('ZZMass_masked', 0.05167207884692075),
 ('ZZPhi', 0.04927932547142939),
 ('JetPhi(JetPt|0)', 0.045918076682048656),
 ('ZZEta', 0.04517746254201561),
 ('Z1Pt', 0.04500655158662337),
 ('ZZMassErr', 0.04500655158662337),
 ('ZZPt', 0.04170227311570672),
 ('Z2Mass', 0.03469492394462485),
 ('JetPt(JetPt|1)', 0.022617216430239846),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.017831709679257107),
 ('ExtraLepEta(ExtraLepPt|0)', 0.017148065857688143),
 ('JetEta(JetPt|1)', 0.014698342163732695),
 ('nExtraLep', 0.013729846749843332),
 ('nCleanedJetsPt30', 0.01333105452059477),
 ('JetPhi(JetPt|1)', 0.008830399361932433),
 ('JetEta(JetPt|2)', 0.006893408534153706),
 ('JetPhi(JetPt|2)', 0.004386714521734177),
 ('Z1Flav

In [115]:
%%capture
fig, implist = plot_variables([("WHl", "ZHl")])

In [116]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [117]:
append_variables(confhandler, impdict, threshold_fscore)

In [118]:
implist

[('PFMET', 0.11522020610511007),
 ('Z1Mass', 0.10078220614058427),
 ('Z2Mass', 0.08880966317245784),
 ('ZZPt', 0.08105855016938932),
 ('Z2Pt', 0.07353801947533656),
 ('Z1Pt', 0.07174657230529098),
 ('ZZEta', 0.05624434629915394),
 ('ZZMassErr', 0.04590361659483141),
 ('JetPt(JetPt|0)', 0.04579719399067028),
 ('ZZMass_masked', 0.04427180333102751),
 ('ZZPhi', 0.038791039216729634),
 ('JetEta(JetPt|0)', 0.03123503432128984),
 ('ExtraLepPt(ExtraLepPt|0)', 0.026091275120168858),
 ('JetPhi(JetPt|0)', 0.02559463630075028),
 ('ExtraLepEta(ExtraLepPt|0)', 0.023732240727930613),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.01979460437396903),
 ('JetPhi(JetPt|1)', 0.016264921335958426),
 ('JetPt(JetPt|1)', 0.016069813228329698),
 ('JetEta(JetPt|1)', 0.012770712499334858),
 ('nExtraLep', 0.012628815693786693),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.007467319391972188),
 ('JetEta(JetPt|2)', 0.006935206371166569),
 ('nCleanedJetsPt30', 0.006207985242732223),
 ('ExtraLepPt(ExtraLepPt|1)', 0.00585324322886181),
 

In [119]:
%%capture
fig, implist = plot_variables([("WHl", "ZHMET")])

In [120]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [121]:
append_variables(confhandler, impdict, threshold_fscore)

In [122]:
implist

[('PFMET', 0.10861504907306434),
 ('Z1Mass', 0.09581970192657216),
 ('ZZPt', 0.08447837150127227),
 ('ZZPhi', 0.07858960378044348),
 ('Z1Pt', 0.0638313340603417),
 ('Z2Pt', 0.06346782988004362),
 ('Z2Mass', 0.06048709560159942),
 ('ZZEta', 0.05917848055252636),
 ('ExtraLepPt(ExtraLepPt|0)', 0.05350781533987641),
 ('JetPt(JetPt|0)', 0.047037440930570704),
 ('ZZMassErr', 0.04609233006179571),
 ('ExtraLepEta(ExtraLepPt|0)', 0.04565612504543802),
 ('JetEta(JetPt|0)', 0.03438749545619774),
 ('JetPhi(JetPt|0)', 0.032860777898945835),
 ('ZZMass_masked', 0.02697201017811705),
 ('JetPhi(JetPt|1)', 0.013231552162849873),
 ('ExtraLepPhi(ExtraLepPt|1)', 0.013158851326790257),
 ('nExtraLep', 0.013086150490730643),
 ('JetPt(JetPt|1)', 0.012940748818611413),
 ('JetEta(JetPt|1)', 0.009523809523809525),
 ('JetEta(JetPt|2)', 0.006543075245365322),
 ('JetPt(JetPt|2)', 0.0057433660487095604),
 ('nExtraZ', 0.0056706652126499455),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.005234460196292257),
 ('ExtraLepEta(ExtraLep

In [123]:
%%capture
fig, implist = plot_variables([("WHl", "ttHh")])

In [124]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [125]:
append_variables(confhandler, impdict, threshold_fscore)

In [126]:
implist

[('PFMET', 0.12916529198284393),
 ('JetPt(JetPt|0)', 0.08528538436159683),
 ('ExtraLepPt(ExtraLepPt|0)', 0.07163477400197954),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0575305179808644),
 ('Z1Mass', 0.05336522599802045),
 ('ZZMassErr', 0.04981854173540086),
 ('JetEta(JetPt|1)', 0.04614813592873639),
 ('JetPt(JetPt|1)', 0.04371494556252062),
 ('Z2Mass', 0.037570108874958756),
 ('JetEta(JetPt|0)', 0.036704058066644674),
 ('ZZEta', 0.03389970306829429),
 ('ZZPt', 0.031012867040580667),
 ('JetPt(JetPt|2)', 0.030229297261629824),
 ('JetPhi(JetPt|0)', 0.029115803365225998),
 ('Z1Pt', 0.02849719564500165),
 ('Z2Pt', 0.028290993071593534),
 ('ZZMass_masked', 0.027961068954140546),
 ('JetPt(JetPt|3)', 0.026682612999010227),
 ('ZZPhi', 0.02400197954470472),
 ('JetEta(JetPt|2)', 0.022970966677664136),
 ('ExtraLepEta(ExtraLepPt|0)', 0.020125371164632134),
 ('JetPhi(JetPt|1)', 0.017980864401187728),
 ('nCleanedJetsPt30', 0.014970306829429231),
 ('JetEta(JetPt|3)', 0.011423622566809634),
 ('JetPhi(JetP

In [127]:
%%capture
fig, implist = plot_variables([("WHl", "ttHl")])

In [128]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [129]:
append_variables(confhandler, impdict, threshold_fscore)

In [130]:
implist

[('ZZMass_masked', 0.09176644251271117),
 ('PFMET', 0.07241266196490077),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.07142857142857142),
 ('JetPt(JetPt|0)', 0.062407741512219124),
 ('ZZPt', 0.06109562079711334),
 ('Z2Pt', 0.060193537805478106),
 ('ZZEta', 0.0546170247662785),
 ('JetEta(JetPt|0)', 0.05305888141709037),
 ('JetPt(JetPt|1)', 0.05035263244218468),
 ('JetEta(JetPt|1)', 0.04198786288338527),
 ('Z1Mass', 0.03837953091684435),
 ('Z2Mass', 0.03632934229949155),
 ('JetPhi(JetPt|1)', 0.029604723634574382),
 ('ExtraLepPt(ExtraLepPt|1)', 0.02829260291946859),
 ('ExtraLepEta(ExtraLepPt|0)', 0.02616040675742168),
 ('ExtraLepPt(ExtraLepPt|0)', 0.02616040675742168),
 ('nCleanedJetsPt30', 0.02566836148925701),
 ('JetPhi(JetPt|0)', 0.025422338855174677),
 ('Z1Pt', 0.02066590126291619),
 ('JetPt(JetPt|3)', 0.01853370510086928),
 ('ZZMassErr', 0.016483516483516484),
 ('ZZPhi', 0.015663441036575364),
 ('JetPt(JetPt|2)', 0.01410529768738724),
 ('nExtraLep', 0.010496965720846317),
 ('JetEta(JetPt|2

In [131]:
%%capture
fig, implist = plot_variables([("ZHh", "ZHl")])

In [132]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ZHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [133]:
append_variables(confhandler, impdict, threshold_fscore)

In [134]:
implist

[('Z1Mass', 0.13092852371409486),
 ('Z2Pt', 0.09426545398489286),
 ('ZZPt', 0.08470787729304763),
 ('Z1Pt', 0.08403987462103694),
 ('Z2Mass', 0.08003185858897281),
 ('PFMET', 0.0754586095267458),
 ('ZZMassErr', 0.05675453471044654),
 ('JetPt(JetPt|0)', 0.052900673141154105),
 ('ZZEta', 0.04606649195827552),
 ('JetPhi(JetPt|0)', 0.04480756384564),
 ('ZZPhi', 0.041236318791429014),
 ('JetEta(JetPt|0)', 0.038718462566157956),
 ('ZZMass_masked', 0.03792199784183752),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03008581265094291),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.017470839114125686),
 ('JetPhi(JetPt|1)', 0.012743435589126972),
 ('JetEta(JetPt|1)', 0.01222958737988798),
 ('JetPt(JetPt|1)', 0.009172190534915985),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.007604953496737064),
 ('nExtraLep', 0.006680026720106881),
 ('ExtraLepEta(ExtraLepPt|0)', 0.005960639227172293),
 ('JetPhi(JetPt|2)', 0.0058321771748625455),
 ('JetPt(JetPt|2)', 0.005241251734237706),
 ('JetEta(JetPt|2)', 0.0045989414726889674),
 ('Z1Flav', 

In [135]:
%%capture
fig, implist = plot_variables([("ZHh", "ZHMET")])

In [136]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [137]:
append_variables(confhandler, impdict, threshold_fscore)

In [138]:
implist

[('PFMET', 0.15495905702640178),
 ('ZZPt', 0.08460323871773216),
 ('ZZEta', 0.07244886718319686),
 ('JetEta(JetPt|0)', 0.07160430360224727),
 ('Z2Pt', 0.06587595931406749),
 ('Z1Pt', 0.06370947012815334),
 ('Z1Mass', 0.062644585613043),
 ('JetPt(JetPt|0)', 0.06113905922961113),
 ('ZZPhi', 0.060588256894209234),
 ('Z2Mass', 0.051298057503763816),
 ('ZZMassErr', 0.04476186979032791),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03954760768185657),
 ('JetPhi(JetPt|0)', 0.03499430837586751),
 ('JetPt(JetPt|1)', 0.026805713656225902),
 ('JetEta(JetPt|1)', 0.022693056218558365),
 ('JetPhi(JetPt|1)', 0.020159365475709616),
 ('nCleanedJetsPt30', 0.014431021187529836),
 ('JetPt(JetPt|2)', 0.013292696361032572),
 ('JetEta(JetPt|2)', 0.0064260272463555245),
 ('ZZMass_masked', 0.005618183821099402),
 ('JetPhi(JetPt|2)', 0.004369698527521756),
 ('Z2Flav', 0.0034516946351852533),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0033415341681048726),
 ('Z1Flav', 0.003157933389637572),
 ('JetPhi(JetPt|3)', 0.0026805713656225

In [139]:
%%capture
fig, implist = plot_variables([("ZHh", "ttHh")])

In [140]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [141]:
append_variables(confhandler, impdict, threshold_fscore)

In [142]:
implist

[('nCleanedJetsPt30BTagged_bTagSF', 0.11302623550945698),
 ('JetPt(JetPt|1)', 0.08907870652837095),
 ('JetPt(JetPt|0)', 0.07718120805369127),
 ('ZZPt', 0.06558877364246492),
 ('PFMET', 0.06497864551555826),
 ('nCleanedJetsPt30', 0.0599450884685784),
 ('JetPt(JetPt|2)', 0.05292861500915192),
 ('JetEta(JetPt|2)', 0.05079316656497865),
 ('JetPt(JetPt|3)', 0.038895668090298964),
 ('Z1Mass', 0.03813300793166565),
 ('ZZEta', 0.03813300793166565),
 ('Z1Pt', 0.03737034777303234),
 ('JetEta(JetPt|1)', 0.03615009151921904),
 ('Z2Mass', 0.030048810250152533),
 ('JetEta(JetPt|0)', 0.023794996949359364),
 ('ExtraLepPt(ExtraLepPt|0)', 0.02303233679072605),
 ('Z2Pt', 0.02303233679072605),
 ('ZZMass_masked', 0.0224222086638194),
 ('JetPhi(JetPt|1)', 0.021354484441732765),
 ('JetPhi(JetPt|3)', 0.020896888346552775),
 ('JetEta(JetPt|3)', 0.013575350823672972),
 ('nExtraLep', 0.013117754728492984),
 ('JetPhi(JetPt|0)', 0.01235509456985967),
 ('ZZPhi', 0.012050030506406345),
 ('ZZMassErr', 0.0089993898718

In [143]:
%%capture
fig, implist = plot_variables([("ZHh", "ttHl")])

In [144]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [145]:
append_variables(confhandler, impdict, threshold_fscore)

In [146]:
implist

[('PFMET', 0.12150537634408602),
 ('Z1Mass', 0.07159498207885305),
 ('Z2Pt', 0.058393070489844684),
 ('ZZPt', 0.056959378733572284),
 ('ExtraLepPt(ExtraLepPt|0)', 0.05328554360812425),
 ('ZZMass_masked', 0.05104540023894863),
 ('ZZEta', 0.05065710872162485),
 ('ZZPhi', 0.04991039426523298),
 ('Z2Mass', 0.041875746714456395),
 ('nCleanedJetsPt30', 0.038410991636798085),
 ('ZZMassErr', 0.03796296296296296),
 ('JetPt(JetPt|0)', 0.03643966547192354),
 ('JetEta(JetPt|0)', 0.03608124253285543),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.035483870967741936),
 ('JetPhi(JetPt|1)', 0.03181003584229391),
 ('JetPt(JetPt|1)', 0.030017921146953404),
 ('Z1Pt', 0.027270011947431304),
 ('JetPt(JetPt|2)', 0.024283154121863798),
 ('JetPhi(JetPt|0)', 0.024283154121863798),
 ('JetPt(JetPt|3)', 0.020400238948626047),
 ('JetEta(JetPt|1)', 0.018488649940262843),
 ('JetEta(JetPt|2)', 0.01544205495818399),
 ('nExtraLep', 0.013112305854241339),
 ('JetPhi(JetPt|2)', 0.013052568697729989),
 ('JetPhi(JetPt|3)', 0.010872

In [147]:
%%capture
fig, implist = plot_variables([("ZHl", "ggH")])

In [148]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [149]:
append_variables(confhandler, impdict, threshold_fscore)

In [150]:
implist

[('Z1Mass', 0.10904716918953358),
 ('Z2Mass', 0.0882637256409693),
 ('Z2Pt', 0.08551749895643386),
 ('ZZPt', 0.08514401212733703),
 ('Z1Pt', 0.08036557769624536),
 ('PFMET', 0.06597534986927961),
 ('ZZEta', 0.06545905925258695),
 ('ZZMassErr', 0.06225146648504954),
 ('JetEta(JetPt|0)', 0.05914273787815542),
 ('JetPt(JetPt|0)', 0.05458400158182657),
 ('ZZPhi', 0.05336467693389283),
 ('JetPhi(JetPt|0)', 0.03428389392974054),
 ('JetEta(JetPt|1)', 0.022376255025594833),
 ('ExtraLepPt(ExtraLepPt|0)', 0.02045389634642002),
 ('JetPt(JetPt|1)', 0.018707296175055475),
 ('ZZMass_masked', 0.01814706593141024),
 ('JetPhi(JetPt|1)', 0.01702660544411978),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.008963683898323704),
 ('JetEta(JetPt|2)', 0.008073906452534217),
 ('JetPt(JetPt|2)', 0.006283366654217105),
 ('ExtraLepEta(ExtraLepPt|0)', 0.005459498648856471),
 ('JetPhi(JetPt|2)', 0.004866313684996814),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.004668585363710262),
 ('nCleanedJetsPt30', 0.00465760045697212),
 ('Z2Fla

In [151]:
%%capture
fig, implist = plot_variables([("ZHl", "VBF")])

In [152]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [153]:
append_variables(confhandler, impdict, threshold_fscore)

In [154]:
implist

[('JetEta(JetPt|0)', 0.0982014297622957),
 ('Z1Mass', 0.09438704625255766),
 ('Z1Pt', 0.07060398615707177),
 ('ZZEta', 0.06920201076110845),
 ('Z2Mass', 0.06884835930987446),
 ('ZZPt', 0.06807790436254325),
 ('PFMET', 0.06440245535150428),
 ('Z2Pt', 0.06240685073382676),
 ('ZZMassErr', 0.054891757395104455),
 ('JetEta(JetPt|1)', 0.054045519993937405),
 ('JetPt(JetPt|0)', 0.05249197969030237),
 ('ZZPhi', 0.042387652512188345),
 ('JetPhi(JetPt|0)', 0.03304114987243287),
 ('ExtraLepPt(ExtraLepPt|0)', 0.02134539116376588),
 ('ZZMass_masked', 0.021067522166367748),
 ('JetPhi(JetPt|1)', 0.018718266097456235),
 ('ExtraLepEta(ExtraLepPt|0)', 0.018010963194988253),
 ('JetPt(JetPt|1)', 0.016823704751559854),
 ('JetEta(JetPt|2)', 0.012920908379013312),
 ('JetPt(JetPt|2)', 0.01180943238942077),
 ('JetPhi(JetPt|2)', 0.009005481597494126),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.008310809103998788),
 ('nExtraLep', 0.00443327354939753),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.003902796372546543),
 ('nCleanedJ

In [155]:
%%capture
fig, implist = plot_variables([("ZHl", "WHh")])

In [156]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [157]:
append_variables(confhandler, impdict, threshold_fscore)

In [158]:
implist

[('Z1Mass', 0.10751977713887748),
 ('Z2Pt', 0.08982303255415275),
 ('Z2Mass', 0.0824836157788532),
 ('Z1Pt', 0.08003714352042),
 ('ZZPt', 0.07994785621171807),
 ('PFMET', 0.07894783835425633),
 ('ZZMassErr', 0.0709476954945624),
 ('JetPt(JetPt|0)', 0.05667958356399221),
 ('ZZPhi', 0.04044715084197932),
 ('ZZEta', 0.040232861301094665),
 ('JetEta(JetPt|0)', 0.03657208164431508),
 ('JetPhi(JetPt|0)', 0.033089876604939374),
 ('ZZMass_masked', 0.03216128859443919),
 ('ExtraLepPt(ExtraLepPt|0)', 0.027482633618457472),
 ('JetPhi(JetPt|1)', 0.023339702494687406),
 ('JetEta(JetPt|1)', 0.019393203450061607),
 ('JetPt(JetPt|1)', 0.016571724495080268),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.01526812978803193),
 ('JetPhi(JetPt|2)', 0.01185735459561778),
 ('ExtraLepEta(ExtraLepPt|0)', 0.007607278701405382),
 ('nExtraLep', 0.007535848854443829),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.007178699619636065),
 ('JetPhi(JetPt|3)', 0.007017982463972571),
 ('JetEta(JetPt|2)', 0.005428668369078019),
 ('JetEta(JetPt

In [159]:
%%capture
fig, implist = plot_variables([("ZHl", "ZHMET")])

In [160]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [161]:
append_variables(confhandler, impdict, threshold_fscore)

In [162]:
implist

[('Z1Mass', 0.11281733746130031),
 ('PFMET', 0.10835913312693499),
 ('Z2Pt', 0.10030959752321982),
 ('Z1Pt', 0.09424148606811146),
 ('ZZPt', 0.08590299277605778),
 ('Z2Mass', 0.07777089783281733),
 ('JetPt(JetPt|0)', 0.05824561403508772),
 ('ZZMass_masked', 0.04982456140350877),
 ('ZZEta', 0.04416924664602683),
 ('ZZMassErr', 0.04346749226006192),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03847265221878225),
 ('ZZPhi', 0.03678018575851393),
 ('JetEta(JetPt|0)', 0.02959752321981424),
 ('JetPhi(JetPt|0)', 0.019896800825593395),
 ('JetPt(JetPt|1)', 0.015603715170278637),
 ('JetPhi(JetPt|1)', 0.01523219814241486),
 ('nExtraLep', 0.013787409700722394),
 ('JetEta(JetPt|1)', 0.013126934984520123),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0086687306501548),
 ('JetPt(JetPt|2)', 0.008503611971104231),
 ('nCleanedJetsPt30', 0.006769865841073272),
 ('Z2Flav', 0.00652218782249742),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.004705882352941176),
 ('JetEta(JetPt|2)', 0.0025180598555211557),
 ('ExtraLepPhi(ExtraLepPt|1)', 0

In [163]:
%%capture
fig, implist = plot_variables([("ZHl", "ttHh")])

In [164]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [165]:
append_variables(confhandler, impdict, threshold_fscore)

In [166]:
implist

[('Z1Mass', 0.08260994990952243),
 ('JetPt(JetPt|0)', 0.0792006503894469),
 ('Z2Mass', 0.07033647163725053),
 ('ZZMass_masked', 0.06262620964569511),
 ('Z2Pt', 0.05787941569851302),
 ('ZZMassErr', 0.05119194356298025),
 ('PFMET', 0.05014292832603393),
 ('JetPt(JetPt|1)', 0.047651517138286435),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04696965723427133),
 ('ZZPt', 0.045501035902546484),
 ('JetPhi(JetPt|0)', 0.033961868296137),
 ('JetEta(JetPt|1)', 0.03356848758228213),
 ('Z1Pt', 0.031549133251160474),
 ('ExtraLepPt(ExtraLepPt|0)', 0.031181977918229263),
 ('ZZEta', 0.031024625632687316),
 ('JetPt(JetPt|2)', 0.02680233930397839),
 ('JetPt(JetPt|3)', 0.02564842254333744),
 ('ZZPhi', 0.02415357583068894),
 ('JetEta(JetPt|0)', 0.022186672261414596),
 ('ExtraLepEta(ExtraLepPt|0)', 0.021688390023865096),
 ('JetPhi(JetPt|2)', 0.021242558548162913),
 ('JetPhi(JetPt|1)', 0.02103275550077365),
 ('JetEta(JetPt|2)', 0.019957514882903673),
 ('JetPhi(JetPt|3)', 0.01809551283732396),
 ('nCleanedJetsPt30',

In [167]:
%%capture
fig, implist = plot_variables([("ZHl", "ttHl")])

In [168]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [169]:
append_variables(confhandler, impdict, threshold_fscore)

In [170]:
implist

[('PFMET', 0.10811606722161143),
 ('Z2Mass', 0.09030940788440396),
 ('Z1Mass', 0.08858174964661536),
 ('Z1Pt', 0.07234568870739752),
 ('JetPt(JetPt|0)', 0.06547432071619287),
 ('Z2Pt', 0.06070362808229936),
 ('ZZPt', 0.05750353384639548),
 ('ZZEta', 0.04264174650541856),
 ('ZZMass_masked', 0.04079629338778074),
 ('JetPt(JetPt|1)', 0.03557405371446521),
 ('JetEta(JetPt|0)', 0.03461206219569656),
 ('ZZPhi', 0.03451389979582221),
 ('ZZMassErr', 0.02913460028270771),
 ('JetPhi(JetPt|0)', 0.02817260876393906),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.022341762211402546),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0212423433328098),
 ('JetEta(JetPt|1)', 0.019652112454845296),
 ('nExtraLep', 0.01658944557876551),
 ('JetPt(JetPt|2)', 0.01615753101931836),
 ('JetPhi(JetPt|1)', 0.01435134286163028),
 ('nExtraZ', 0.013762368462384168),
 ('JetEta(JetPt|2)', 0.011308308465525364),
 ('ExtraLepEta(ExtraLepPt|0)', 0.011151248625726402),
 ('JetPhi(JetPt|2)', 0.010483744306580807),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.00

In [171]:
%%capture
fig, implist = plot_variables([("ZHMET", "ggH")])

In [172]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [173]:
append_variables(confhandler, impdict, threshold_fscore)

In [174]:
implist

[('ZZPt', 0.1614072593763071),
 ('PFMET', 0.155040200771483),
 ('ZZPhi', 0.08867407166426547),
 ('JetPt(JetPt|0)', 0.07621880373658038),
 ('Z1Pt', 0.06989822001208347),
 ('ZZEta', 0.06947994608913882),
 ('Z2Pt', 0.06673792814983501),
 ('Z1Mass', 0.05939489705814008),
 ('ZZMassErr', 0.05906957289584979),
 ('Z2Mass', 0.05762885160570712),
 ('JetEta(JetPt|0)', 0.04368638750755217),
 ('JetPhi(JetPt|0)', 0.028163777478272995),
 ('JetPt(JetPt|1)', 0.01412836361946368),
 ('JetEta(JetPt|1)', 0.012827066970302552),
 ('nCleanedJetsPt30', 0.011572245201468606),
 ('JetPhi(JetPt|1)', 0.006599433006460009),
 ('Z2Flav', 0.005344611237626063),
 ('JetPt(JetPt|2)', 0.003764465306501836),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.002602593298322257),
 ('Z1Flav', 0.0019519449737416927),
 ('JetEta(JetPt|2)', 0.00176604545243296),
 ('ZZMass_masked', 0.0015336710507970442),
 ('JetPhi(JetPt|2)', 0.0013477715294883115),
 ('JetPhi(JetPt|3)', 0.00046474880327183155),
 ('JetPt(JetPt|3)', 0.00037179904261746524),
 ('E

In [175]:
%%capture
fig, implist = plot_variables([("ZHMET", "VBF")])

In [176]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [177]:
append_variables(confhandler, impdict, threshold_fscore)

In [178]:
implist

[('JetEta(JetPt|0)', 0.1456776947705443),
 ('PFMET', 0.14044823906083245),
 ('ZZPt', 0.08904304517965136),
 ('JetPt(JetPt|0)', 0.0832443970117396),
 ('JetEta(JetPt|1)', 0.06602632515119175),
 ('Z1Pt', 0.06506581287797937),
 ('ZZEta', 0.06054784774101743),
 ('ZZPhi', 0.05090715048025614),
 ('ZZMassErr', 0.04991106367840626),
 ('Z1Mass', 0.04966204197794379),
 ('Z2Mass', 0.04702952685876912),
 ('Z2Pt', 0.039736748488082535),
 ('JetPhi(JetPt|0)', 0.03635716826752045),
 ('JetPt(JetPt|1)', 0.018676627534685165),
 ('JetPhi(JetPt|1)', 0.015795090715048025),
 ('JetPhi(JetPt|2)', 0.009889718961223764),
 ('ZZMass_masked', 0.007506225542511562),
 ('JetEta(JetPt|2)', 0.005407328352899324),
 ('nCleanedJetsPt30', 0.004695837780149413),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.004340092493774457),
 ('Z1Flav', 0.0035930273923870507),
 ('Z2Flav', 0.002810387762362149),
 ('JetPt(JetPt|2)', 0.0026325151191746708),
 ('JetEta(JetPt|3)', 0.0007470651013874066),
 ('JetPt(JetPt|3)', 0.00021344717182497332),
 ('E

In [179]:
%%capture
fig, implist = plot_variables([("ZHMET", "WHh")])

In [180]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [181]:
append_variables(confhandler, impdict, threshold_fscore)

In [182]:
implist

[('PFMET', 0.15419155831478662),
 ('ZZPt', 0.0901175276248487),
 ('Z1Pt', 0.0732107297645543),
 ('JetPt(JetPt|0)', 0.07282027253914333),
 ('ZZEta', 0.07235172386865019),
 ('JetEta(JetPt|0)', 0.06899379173011597),
 ('ZZPhi', 0.06532349381125298),
 ('Z2Pt', 0.0606380071063215),
 ('Z1Mass', 0.057162937800163995),
 ('Z2Mass', 0.05673343485221194),
 ('ZZMassErr', 0.0544297372222873),
 ('JetPhi(JetPt|0)', 0.04095896294560931),
 ('JetPt(JetPt|1)', 0.0339307328882121),
 ('JetPhi(JetPt|1)', 0.022060833235719026),
 ('nCleanedJetsPt30', 0.01811721525906837),
 ('JetEta(JetPt|1)', 0.01382218577954785),
 ('JetPt(JetPt|2)', 0.011049939479130062),
 ('ZZMass_masked', 0.006325407051657491),
 ('JetPt(JetPt|3)', 0.0060911327164109175),
 ('Z2Flav', 0.005895904103705439),
 ('JetPhi(JetPt|2)', 0.005661629768458866),
 ('Z1Flav', 0.0040607551442739444),
 ('JetEta(JetPt|2)', 0.003943617976650658),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0012885088438561556),
 ('JetEta(JetPt|3)', 0.0007809144508219124),
 ('JetPhi(

In [183]:
%%capture
fig, implist = plot_variables([("ZHMET", "ttHh")])

In [184]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [185]:
append_variables(confhandler, impdict, threshold_fscore)

In [186]:
implist

[('PFMET', 0.2372835497835498),
 ('JetPt(JetPt|0)', 0.10849567099567099),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.08955627705627706),
 ('nCleanedJetsPt30', 0.07602813852813853),
 ('JetEta(JetPt|0)', 0.07305194805194805),
 ('JetPt(JetPt|1)', 0.060064935064935064),
 ('JetPt(JetPt|2)', 0.058982683982683984),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03706709956709957),
 ('ZZMass_masked', 0.035173160173160176),
 ('Z1Mass', 0.029491341991341992),
 ('JetEta(JetPt|2)', 0.029491341991341992),
 ('Z1Pt', 0.02867965367965368),
 ('JetPt(JetPt|3)', 0.026515151515151516),
 ('Z2Pt', 0.021374458874458876),
 ('Z2Mass', 0.019751082251082252),
 ('JetPhi(JetPt|0)', 0.01893939393939394),
 ('ZZEta', 0.017586580086580088),
 ('ZZPt', 0.015422077922077922),
 ('ZZPhi', 0.007575757575757576),
 ('nExtraLep', 0.002976190476190476),
 ('JetEta(JetPt|1)', 0.002435064935064935),
 ('JetEta(JetPt|3)', 0.0021645021645021645),
 ('JetPhi(JetPt|1)', 0.0008116883116883117),
 ('ZZMassErr', 0.0005411255411255411),
 ('JetPhi(JetPt|3)', 0.0

In [187]:
%%capture
fig, implist = plot_variables([("ZHMET", "ttHl")])

In [188]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [189]:
append_variables(confhandler, impdict, threshold_fscore)

In [190]:
implist

[('Z1Mass', 0.10904103917611137),
 ('Z2Mass', 0.08611499249598924),
 ('PFMET', 0.07452258965999069),
 ('ZZMass_masked', 0.06846762925011644),
 ('JetPt(JetPt|0)', 0.060601355897117425),
 ('ExtraLepPt(ExtraLepPt|0)', 0.05066501060911867),
 ('ZZPt', 0.04983698183511877),
 ('Z2Pt', 0.04668012213424416),
 ('ZZEta', 0.04331625523986959),
 ('ZZPhi', 0.042332971070744706),
 ('JetPt(JetPt|3)', 0.041401438699994825),
 ('nCleanedJetsPt30', 0.03984888474874502),
 ('ZZMassErr', 0.037778812813745274),
 ('JetEta(JetPt|0)', 0.03720954303162035),
 ('nExtraLep', 0.03653676965274543),
 ('Z1Pt', 0.02665217616312167),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.022822543083372147),
 ('JetPt(JetPt|1)', 0.02064896755162242),
 ('JetPhi(JetPt|0)', 0.019717435180872534),
 ('JetEta(JetPt|1)', 0.01552553951249806),
 ('JetPt(JetPt|2)', 0.014438751746623195),
 ('JetPhi(JetPt|1)', 0.014283496351498214),
 ('JetEta(JetPt|2)', 0.009211820110748848),
 ('JetPhi(JetPt|2)', 0.006261967603374217),
 ('Z2Flav', 0.005951456813124256

In [191]:
%%capture
fig, implist = plot_variables([("ttHh", "ggH")])

In [192]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [193]:
append_variables(confhandler, impdict, threshold_fscore)

In [194]:
implist

[('JetEta(JetPt|0)', 0.08706655017624197),
 ('JetEta(JetPt|1)', 0.08375259843943),
 ('JetPt(JetPt|0)', 0.0835718374356039),
 ('PFMET', 0.07043653782424004),
 ('JetPt(JetPt|1)', 0.058747326243485075),
 ('Z1Pt', 0.056758955201397884),
 ('ZZPt', 0.05615641852197752),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04645557798330974),
 ('ZZPhi', 0.039707167173801704),
 ('JetPt(JetPt|2)', 0.03946615250203356),
 ('ZZEta', 0.03666435694272889),
 ('JetEta(JetPt|2)', 0.03579067875756937),
 ('JetPhi(JetPt|1)', 0.03365167354562709),
 ('JetPhi(JetPt|0)', 0.03259723435664146),
 ('JetPt(JetPt|3)', 0.03244660018678637),
 ('Z2Pt', 0.03211520501310517),
 ('Z2Mass', 0.03124152682794565),
 ('Z1Mass', 0.027897448257162654),
 ('ZZMassErr', 0.02494501852800289),
 ('nCleanedJetsPt30', 0.017112041695538215),
 ('JetEta(JetPt|3)', 0.015816587834784443),
 ('ZZMass_masked', 0.011719338414725996),
 ('JetPhi(JetPt|2)', 0.009188684361160486),
 ('JetPhi(JetPt|3)', 0.009098303859247432),
 ('ExtraLepPt(ExtraLepPt|0)', 0.00741120

In [195]:
%%capture
fig, implist = plot_variables([("ttHh", "VBF")])

In [196]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [197]:
append_variables(confhandler, impdict, threshold_fscore)

In [198]:
implist

[('JetEta(JetPt|0)', 0.15780433342330627),
 ('JetEta(JetPt|1)', 0.14668858734326307),
 ('JetEta(JetPt|2)', 0.08338036463573234),
 ('JetPt(JetPt|0)', 0.049346060412730354),
 ('PFMET', 0.044536598532623366),
 ('ZZEta', 0.0418374107427674),
 ('JetPt(JetPt|1)', 0.04053689298947317),
 ('JetPt(JetPt|2)', 0.03661080165877359),
 ('Z2Mass', 0.032169410840919684),
 ('Z1Pt', 0.03204672048683532),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.030010060609034918),
 ('Z1Mass', 0.02868500478492381),
 ('JetPhi(JetPt|1)', 0.02797340073123451),
 ('ZZMassErr', 0.027605329668981424),
 ('JetPt(JetPt|3)', 0.026378426128137806),
 ('ZZPhi', 0.025519593649547274),
 ('ZZPt', 0.025225136799744804),
 ('JetEta(JetPt|3)', 0.02404730940053493),
 ('JetPhi(JetPt|0)', 0.02365470026746497),
 ('nCleanedJetsPt30', 0.021029126690059626),
 ('Z2Pt', 0.020808284052707775),
 ('JetPhi(JetPt|2)', 0.014624690206855938),
 ('ZZMass_masked', 0.008465634431820971),
 ('JetPhi(JetPt|3)', 0.007901258803032905),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0

In [199]:
%%capture
fig, implist = plot_variables([("ttHh", "WHh")])

In [200]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [201]:
append_variables(confhandler, impdict, threshold_fscore)

In [202]:
implist

[('PFMET', 0.08238682241481401),
 ('JetPt(JetPt|0)', 0.07956074716046724),
 ('JetPt(JetPt|1)', 0.06909081121817301),
 ('ZZPt', 0.06351940571674651),
 ('Z1Pt', 0.05762502018625182),
 ('JetEta(JetPt|0)', 0.052161274694514724),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.044921139042902514),
 ('ZZMassErr', 0.04478656403079076),
 ('ZZEta', 0.043440813909673255),
 ('JetPhi(JetPt|1)', 0.041502933735264036),
 ('JetEta(JetPt|1)', 0.03854228346880551),
 ('JetPt(JetPt|2)', 0.03808472842762556),
 ('Z2Mass', 0.03684663831619745),
 ('Z1Mass', 0.03663131829681865),
 ('ZZPhi', 0.036146848253216346),
 ('JetPhi(JetPt|0)', 0.03461269311514238),
 ('Z2Pt', 0.034235883081229476),
 ('JetEta(JetPt|2)', 0.028879797599181783),
 ('JetPt(JetPt|3)', 0.02309307207837649),
 ('JetEta(JetPt|3)', 0.022447112020240082),
 ('nCleanedJetsPt30', 0.01945954675135921),
 ('JetPhi(JetPt|2)', 0.018948161705334553),
 ('ZZMass_masked', 0.012784626150616353),
 ('JetPhi(JetPt|3)', 0.009447165850244927),
 ('ExtraLepPt(ExtraLepPt|0)', 0.00

In [203]:
%%capture
fig, implist = plot_variables([("ttHh", "ttHl")])

In [204]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [205]:
append_variables(confhandler, impdict, threshold_fscore)

In [206]:
implist

[('PFMET', 0.09847921421793655),
 ('Z1Mass', 0.07752673556792244),
 ('ZZPt', 0.05159900494960634),
 ('ZZMassErr', 0.04990639345523556),
 ('JetPt(JetPt|0)', 0.04236657861667479),
 ('JetPhi(JetPt|0)', 0.042187059215756675),
 ('Z2Pt', 0.04049444772138589),
 ('JetPt(JetPt|3)', 0.03975072448901085),
 ('ZZEta', 0.03949426820198497),
 ('Z2Mass', 0.038186341138153),
 ('JetEta(JetPt|0)', 0.034570307491088144),
 ('JetPt(JetPt|2)', 0.03439078809017003),
 ('ExtraLepPt(ExtraLepPt|0)', 0.033493191085579466),
 ('JetPhi(JetPt|2)', 0.03346754545687688),
 ('ZZPhi', 0.03313415228374324),
 ('Z1Pt', 0.03308286102633806),
 ('JetPhi(JetPt|1)', 0.031903162106019026),
 ('ZZMass_masked', 0.03146718641807504),
 ('nCleanedJetsPt30', 0.031415895160669866),
 ('JetEta(JetPt|1)', 0.029082142948734388),
 ('JetPhi(JetPt|3)', 0.028158900315441233),
 ('JetEta(JetPt|2)', 0.026081604390531633),
 ('JetPt(JetPt|1)', 0.024209473495242734),
 ('JetEta(JetPt|3)', 0.0222091144564409),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0204139

In [207]:
%%capture
fig, implist = plot_variables([("ttHl", "ggH")])

In [208]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [209]:
append_variables(confhandler, impdict, threshold_fscore)

In [210]:
implist

[('PFMET', 0.13029386866610232),
 ('Z1Mass', 0.06849679525940258),
 ('ZZMass_masked', 0.06658604426170033),
 ('ZZPt', 0.06431249244164954),
 ('JetPt(JetPt|0)', 0.05623412746402225),
 ('Z2Pt', 0.04924416495344056),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04752690772765752),
 ('Z2Mass', 0.04508404885717741),
 ('JetEta(JetPt|0)', 0.04329423146692466),
 ('Z1Pt', 0.04278631031563671),
 ('ZZMassErr', 0.042423509493288186),
 ('JetPt(JetPt|1)', 0.04181884145604063),
 ('ZZPhi', 0.040029024065787885),
 ('ZZEta', 0.03867456766235337),
 ('JetEta(JetPt|1)', 0.038481073890434155),
 ('JetPhi(JetPt|0)', 0.02735518200507921),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.023606240174144394),
 ('nCleanedJetsPt30', 0.023219252630305963),
 ('nExtraLep', 0.019712178014270165),
 ('JetPt(JetPt|2)', 0.018212601281896237),
 ('JetEta(JetPt|2)', 0.016229290119724273),
 ('JetPhi(JetPt|1)', 0.01373805780626436),
 ('JetPt(JetPt|3)', 0.010085862861289152),
 ('JetEta(JetPt|3)', 0.008199298585076793),
 ('ExtraLepEta(ExtraLepPt|0)', 0

In [211]:
%%capture
fig, implist = plot_variables([("ttHl", "VBF")])

In [212]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [213]:
append_variables(confhandler, impdict, threshold_fscore)

In [214]:
implist

[('JetEta(JetPt|0)', 0.10585940923832063),
 ('PFMET', 0.09113419230432115),
 ('JetEta(JetPt|1)', 0.08228153212376194),
 ('Z1Mass', 0.06256025944429836),
 ('ZZMass_masked', 0.047090016653519154),
 ('ZZEta', 0.046301165746340606),
 ('ZZPt', 0.042422648786046104),
 ('ZZMassErr', 0.04191866070645981),
 ('Z2Mass', 0.04106407222368306),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04045052151809975),
 ('JetPt(JetPt|0)', 0.03806205627136471),
 ('JetEta(JetPt|2)', 0.03560785344903147),
 ('JetPt(JetPt|1)', 0.035169602945043385),
 ('Z2Pt', 0.033548076080287495),
 ('JetPt(JetPt|2)', 0.032079936891927424),
 ('Z1Pt', 0.03186081163993339),
 ('ZZPhi', 0.027061968621263914),
 ('JetPhi(JetPt|0)', 0.023599789639758086),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.02329301428696643),
 ('nCleanedJetsPt30', 0.021715312472609345),
 ('JetPhi(JetPt|1)', 0.020093785607853448),
 ('ExtraLepEta(ExtraLepPt|0)', 0.019787010255061792),
 ('JetPt(JetPt|3)', 0.014659479358401261),
 ('JetEta(JetPt|3)', 0.008962222806556227),
 ('nExtraLep'

In [215]:
%%capture
fig, implist = plot_variables([("ttHl", "WHh")])

In [216]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [217]:
append_variables(confhandler, impdict, threshold_fscore)

In [218]:
implist

[('PFMET', 0.14763102510138082),
 ('ZZMass_masked', 0.0859812124634259),
 ('ExtraLepPt(ExtraLepPt|0)', 0.07150556952928494),
 ('ZZPt', 0.06842564550074431),
 ('Z1Mass', 0.061136491966531495),
 ('JetPt(JetPt|0)', 0.059750526153688206),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04660951696524819),
 ('ZZMassErr', 0.04183563472101021),
 ('Z2Mass', 0.038499050356757866),
 ('Z2Pt', 0.03629177146963708),
 ('JetPt(JetPt|1)', 0.03136389302397208),
 ('JetPt(JetPt|2)', 0.03023458754684051),
 ('JetEta(JetPt|0)', 0.025563369436887223),
 ('Z1Pt', 0.025358041168317847),
 ('JetEta(JetPt|1)', 0.025204044966890816),
 ('ZZEta', 0.02340742261690878),
 ('JetPhi(JetPt|1)', 0.022791437811200656),
 ('ZZPhi', 0.022637441609773625),
 ('nCleanedJetsPt30', 0.021456804065499716),
 ('JetPt(JetPt|3)', 0.0182742159026744),
 ('nExtraLep', 0.015296956008418459),
 ('JetPhi(JetPt|3)', 0.014732303269852677),
 ('ExtraLepEta(ExtraLepPt|0)', 0.013243673322724706),
 ('JetPhi(JetPt|2)', 0.01262768851701658),
 ('JetPhi(JetPt|0)', 0

In [219]:
# save the variable configuration
confhandler.save_configuration(out_path)

In [220]:
df = df.fillna(0.0)

In [221]:
df.to_csv("input_parameters_table_ZZMask.csv")

In [222]:
# now plot the data contained in the table to have a global picture of the relevant input variables
datacol_labels = [col for col in df.columns.tolist() if col != "discriminant"]
variable_data = df[datacol_labels].as_matrix().transpose()
datacol_labels = np.concatenate([[''], np.array(datacol_labels)])

In [223]:
discriminant_labels = np.concatenate([[''], df["discriminant"].as_matrix()])

In [224]:
fig = plt.figure(figsize = (15, 10))
ax = fig.add_subplot(111)
cax = ax.matshow(variable_data, interpolation = 'nearest', cmap = 'Blues', vmin = np.min(variable_data), vmax = np.max(variable_data))
ax.set_xticklabels(discriminant_labels, rotation = 'vertical')
ax.set_yticklabels(datacol_labels)
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

In [225]:
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "input_variables_exclusive_fullmassrange_ZZMask.pdf"))