In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from trainlib.FileCollection import FileCollection
from trainlib.config import Config
from trainlib.ConfigFileHandler import ConfigFileHandler
from trainlib.ConfigFileUtils import ConfigFileUtils
import trainlib.cuts as cuts
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import copy
import re
from scipy import interpolate
import scipy.integrate as integrate
import pickle
import os

Welcome to JupyROOT 6.10/09


In [3]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error



In [4]:
#candidate_branches = ["PFMET", "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF", "nExtraLep", "ZZMass", "nExtraZ", "Z1Mass", "Z2Mass", "Z1Pt", "Z2Pt", "ZZMassErr", "ZZPt", "ZZEta", "ZZPhi", "Z1Flav", "Z2Flav", "costhetastar", "helphi", "helcosthetaZ1", "helcosthetaZ2", "phistarZ1", "phistarZ2", "xi", "xistar"]
candidate_branches = ["PFMET", "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF", "nExtraLep", "ZZMass", "nExtraZ", "Z1Mass", "Z2Mass", "Z1Pt", "Z2Pt", "ZZMassErr", "ZZPt", "ZZEta", "ZZPhi", "Z1Flav", "Z2Flav"]
MELA_branches = ["D_VBF2j_ggH_ME", "D_VBF1j_ggH_ME", "D_WHh_ggH_ME", "D_ZHh_ggH_ME", "D_WHh_ZHh_ME", "D_VBF2j_WHh_ME", "D_VBF2j_ZHh_ME"]
#list_branches = ["Jet", "Lep", "ExtraLep"]
list_branches = ["Jet", "ExtraLep"]
pt_limits = [30.0, 0.0, 0.0]

In [5]:
allbranches = ["JetPt", "JetEta", "JetPhi", "LepPt", "LepEta", "LepPhi", "ExtraLepPt", "ExtraLepEta", "ExtraLepPhi"] + candidate_branches + MELA_branches + ["LHEAssociatedParticleId", "GenAssocLep1Id", "GenAssocLep2Id", "training_weight"]

In [6]:
#MC_path = "/data_CMS/cms/wind/CJLST_NTuples_randomizeda/"
MC_path = "/data_CMS/cms/wind/CJLST_NTuples/"

In [7]:
def WHhadr0j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] == 0

def WHhadr01j_cut(row):
    return cuts.WHhadr_cut(row) and (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def WHhadr1j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] == 1

def WHhadr2j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] >= 2

def ZHhadr0j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] == 0

def ZHhadr01j_cut(row):
    return cuts.ZHhadr_cut(row) and (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def ZHhadr1j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] == 1

def ZHhadr2j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] >= 2

def mZZ0j_cut(row):
    return cuts.mZZ_cut(row) and row["nCleanedJetsPt30"] == 0

def mZZ01j_cut(row):
    return cuts.mZZ_cut(row) and (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def mZZ1j_cut(row):
    return cuts.mZZ_cut(row) and row["nCleanedJetsPt30"] == 1

def mZZ2j_cut(row):
    return cuts.mZZ_cut(row) and row["nCleanedJetsPt30"] >= 2

In [8]:
collections = {"VBF2j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ2j_cut},
            "VBF1j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ1j_cut},
            "VBF0j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ0j_cut},
            "VBF01j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ01j_cut},
            "VBF": {MC_path + "VBFH125/ZZ4lAnalysis.root": cuts.mZZ_cut},
            "ggH2j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ2j_cut},
            "ggH1j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ1j_cut},
            "ggH0j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ0j_cut},
            "ggH01j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ01j_cut},
            "ggH" : {MC_path + "ggH125/ZZ4lAnalysis.root": cuts.mZZ_cut},
            "WHh2j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr2j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr2j_cut},
            "WHh1j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr1j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr1j_cut},
            "WHh0j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr0j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr0j_cut},
            "WHh": {MC_path + "WplusH125/ZZ4lAnalysis.root": cuts.WHhadr_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": cuts.WHhadr_cut},
            "WHh01j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr01j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr01j_cut},
            "WHl": {MC_path + "WplusH125/ZZ4lAnalysis.root": cuts.WHlept_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": cuts.WHlept_cut},
            "ZHh2j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr2j_cut},
            "ZHh1j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr1j_cut},
            "ZHh01j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr01j_cut},
            "ZHh0j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr0j_cut},
            "ZHh": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHhadr_cut},
            "ZHl": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHlept_cut},
            "ttHh": {MC_path + "ttH125/ZZ4lAnalysis.root": cuts.ttHhadr_cut},
            "ttHl": {MC_path + "ttH125/ZZ4lAnalysis.root": cuts.ttHlept_cut},
            "ZHMET": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHMET_cut}
          }

In [9]:
# all the model combinations for which neural networks are currently trained
discriminant_pairs = [("VBF2j", "ggH2j"), ("VBF1j", "ggH1j"), ("VBF0j", "ggH0j"), ("WHh2j", "ggH2j"), 
                     ("WHh1j", "ggH1j"), ("WHh0j", "ggH0j"), ("ZHh2j", "ggH2j"), ("ZHh1j", "ggH1j"), 
                      ("ZHh0j", "ggH0j"), ("WHh2j", "ZHh2j"), ("WHh01j", "ZHh01j"), ("VBF2j", "WHh2j"),
                     ("VBF1j", "WHh1j"), ("VBF0j", "WHh0j"), ("VBF2j", "ZHh2j"), ("VBF1j", "ZHh1j"), 
                      ("VBF0j", "ZHh0h"), ("WHl", "ggH"), ("WHl", "VBF"), ("WHl", "WHh"), ("WHl", "ZHh"),
                     ("WHl", "ZHl"), ("WHl", "ZHMET"), ("WHl", "ttHh"), ("WHl", "ttHl"), ("ZHh", "ZHl"),
                     ("ZHh", "ZHMET"), ("ZHh", "ttHh"), ("ZHh", "ttHl"), ("ZHl", "ggH"), ("ZHl", "VBF"),
                     ("ZHl", "WHh"), ("ZHl", "ZHMET"), ("ZHl", "ttHh"), ("ZHl", "ttHl"), ("ZHMET", "ggH"),
                     ("ZHMET", "VBF"), ("ZHMET", "WHh"), ("ZHMET", "ttHh"), ("ZHMET", "ttHl"), ("ttHh", "ggH"),
                      ("ttHh", "VBF"), ("ttHh", "WHh"), ("ttHh", "ttHl"), ("ttHl", "ggH"), ("ttHl", "VBF"),
                     ("ttHl", "WHh")]

In [10]:
def extract_order(df, col_basename, sorted_column, columns, order):
    def get_index(row, order, col_basename, sorted_column):
        sorted_column = row[col_basename + sorted_column]
        if order >= len(sorted_column):
            return -1
        else:
            return np.flipud(np.argsort(sorted_column))[order]
    
    index_column = pd.DataFrame(df.transform(lambda row: get_index(row, order, col_basename, sorted_column), axis = 1, raw = True))
    index_column.columns = ["index"]
    df_temp = pd.concat([index_column, df], axis = 1)
    
    def get_element(row, column_name):
        if row["index"] == -1:
            return 0
        else:
            return row[column_name][row["index"]]
        
    extracted_cols = pd.DataFrame()
    for column in columns:
        extracted_col = pd.DataFrame(df_temp.transform(lambda row: get_element(row, col_basename + column), axis = 1, raw = True))
        extracted_col.columns = [col_basename + column + "(" + col_basename + "Pt|" + str(order) + ")"]
        extracted_cols = pd.concat([extracted_cols, extracted_col], axis = 1)
        
    return extracted_cols

In [11]:
def prepare_data(df, col_basenames, sorted_column, columns, orders, pt_limits):
    all_extracted = pd.DataFrame()
    for col_basename, pt_limit in zip(col_basenames, pt_limits):
        for order in orders:
            extracted = extract_order(df, col_basename, sorted_column, columns, order)
            mask = extracted[col_basename + "Pt(" + col_basename + "Pt|" + str(order) + ")"] < pt_limit
            extracted[mask] = 0.0

            all_extracted = pd.concat([all_extracted, extracted], axis = 1)
            
    return all_extracted

In [12]:
def get_data(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits):
    H1_df = H1_coll.get_data(read_branches, 0.0, 1.0)
    H0_df = H0_coll.get_data(read_branches, 0.0, 1.0)
    
    H1_list_df = prepare_data(H1_df, list_branches, "Pt", ["Pt", "Eta", "Phi"], range(4), pt_limits)
    H0_list_df = prepare_data(H0_df, list_branches, "Pt", ["Pt", "Eta", "Phi"], range(4), pt_limits)
    
    list_branches_unrolled = H1_list_df.columns
            
    H1_df = pd.concat([H1_df, H1_list_df], axis = 1)
    H0_df = pd.concat([H0_df, H0_list_df], axis = 1)        
    
    complete_input_branches = np.concatenate([input_branches, list_branches_unrolled])
            
    H1_df = H1_df[complete_input_branches]
    H0_df = H0_df[complete_input_branches]
    
    return H1_df, H0_df

In [13]:
def get_data_dmatrix(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits):
    H1_df, H0_df = get_data(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits)
    
    complete_input_branches = H1_df.columns
    print "number of input variables: " + str(len(complete_input_branches))
    print "final list of inputs: " + str(complete_input_branches)
    
    # try with the same weights as used later in the neural network training, to balance out some (very)
    # unbalanced datasets
    H1_class_weight = 1.0 + float(len(H0_df)) / float(len(H1_df))
    H0_class_weight = 1.0 + float(len(H1_df)) / float(len(H0_df))
    
    print "using class weights: " + str(H1_class_weight) + " (H1), " + str(H0_class_weight) + " (H0)"
    
    H1_weights = np.full(len(H1_df), H1_class_weight)
    H0_weights = np.full(len(H0_df), H0_class_weight)
    
    H1_data = H1_df.as_matrix()
    H0_data = H0_df.as_matrix()
    H1_target = np.ones(np.shape(H1_data)[0])
    H0_target = np.zeros(np.shape(H0_data)[0])
    
    target = np.concatenate([H1_target, H0_target])
    data = np.concatenate([H1_data, H0_data])
    weights = np.concatenate([H1_weights, H0_weights])
    
    dmatrix = xgb.DMatrix(data, label = target, feature_names = complete_input_branches, weight = weights)
    
    return dmatrix

In [14]:
def get_feature_correlation(source, corr_branches, mandatory_branches, optional_branches, list_branches, pt_limits):    
    coll = FileCollection(collections[source], 0.0, 0.5)
    
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if "0j" in source and ("0j" in optional_branch):
            input_branches.append(optional_branch)
            
        if "1j" in source and ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
        if "2j" in source and ("2j" in optional_branch):
            input_branches.append(optional_branch)

    df, _ = get_data(coll, coll, allbranches, input_branches, list_branches, pt_limits)

    df = df[corr_branches]
    
    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(111)
    
    cax = ax.matshow(df.corr(), vmin = -1.0, vmax = 1.0, cmap = "RdBu")
    
    fig.colorbar(cax)
    
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    
    ax.set_yticklabels([''] + corr_branches)
    ax.set_xticklabels([''] + corr_branches, rotation = 'vertical')
    
    return fig

In [15]:
def get_interpolating_function(data, bins):
    bin_centers = [np.mean([bins[i], bins[i + 1]]) for i in range(len(bins) - 1)]
    intf = interpolate.interp1d(bin_centers, data, kind = "linear")
    interpolated_function = lambda x: intf(x) if x > bin_centers[0] and x < bin_centers[-1] else 0
    
    return interpolated_function

In [16]:
def get_binned_data(df, branch):
    data = df[branch].as_matrix()
    weights = df["training_weight"].as_matrix()
    
    # set the bin width
    q75, q25 = np.percentile(data, [75, 25])
    bin_width = max(2 * (q75 - q25) / len(data)**0.33, 0.005)

    data_max = np.max(data)
    data_min = np.min(data)
    bins = np.arange(data_min, data_max + bin_width, bin_width)
    
    weights = weights / (np.sum(weights) * bin_width)
    
    hist = np.histogram(data, bins = bins, weights = weights)
    return hist

In [17]:
def get_feature_importance_list_separation(disc_pair, mandatory_branches, optional_branches, list_branches, pt_limits):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if ("0j" in H1_name or "0j" in H0_name) and ("0j" in optional_branch):
            input_branches.append(optional_branch)  
        elif ("1j" in H1_name or "1j" in H0_name) and ("1j" in optional_branch):
            input_branches.append(optional_branch)
        elif ("2j" in H1_name or "2j" in H0_name) and not ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
    # needed to build the histograms
    input_branches.append("training_weight")
    
    H1_coll = FileCollection(collections[H1_name], 0.0, 0.5)
    H0_coll = FileCollection(collections[H0_name], 0.0, 0.5)
    
    H1_df, H0_df = get_data(H1_coll, H0_coll, allbranches, input_branches, list_branches, pt_limits)
    
    available_branches = H1_df.columns
    
    implist = {}
    for branch in available_branches:
                
        if "training_weight" not in branch:
            data_H1, bins_H1 = get_binned_data(H1_df, branch)
            data_H0, bins_H0 = get_binned_data(H0_df, branch)

            if(len(data_H1) > 0):
                H1_func = get_interpolating_function(data_H1, bins_H1)
                H0_func = get_interpolating_function(data_H0, bins_H0)

                # compute the separation in this branch
                global_min = np.min(np.concatenate([bins_H0, bins_H1]))
                global_max = np.max(np.concatenate([bins_H0, bins_H1]))

                separation_func = lambda x: (H1_func(x) + H0_func(x)) * (H1_func(x) - H0_func(x))**2

                sep = integrate.quad(separation_func, global_min, global_max)[0]
            else:
                sep = 0.0
                
            print "separation for " + branch + " = " + str(sep)
            implist[branch] = sep
        
    # normalize the importance list
    impsum = sum([val for key, val in implist.iteritems()])
    for key in implist.keys():
        implist[key] /= impsum
        
    return None, None, implist

In [18]:
def get_feature_importance_list_BDT(disc_pair, mandatory_branches, optional_branches, list_branches, pt_limits):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    # first assemble the list of branches that can serve as input: it will *always* contain the mandatory branches,
    # and *can* contain some of the optional branches, if the name of the categories allows it
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if ("0j" in H1_name or "0j" in H0_name) and ("0j" in optional_branch):
            input_branches.append(optional_branch)  
        elif ("1j" in H1_name or "1j" in H0_name) and ("1j" in optional_branch):
            input_branches.append(optional_branch)
        elif ("2j" in H1_name or "2j" in H0_name) and not ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
        # the fully inclusive categories (i.e. those with NO "xxj" in their name, can not use MELA, since there may
        # be events with low number of jets contained)
    
    # get the training data for the BDT ...
    H1_coll_train = FileCollection(collections[H1_name], 0.0, 0.5)
    H0_coll_train = FileCollection(collections[H0_name], 0.0, 0.5)
    
    dtrain = get_data_dmatrix(H1_coll_train, H0_coll_train, allbranches, input_branches, list_branches, pt_limits)
    
    # ... and the validation data as well
    H1_coll_val = FileCollection(collections[H1_name], 0.5, 1.0)
    H0_coll_val = FileCollection(collections[H0_name], 0.5, 1.0)
    dval = get_data_dmatrix(H1_coll_val, H0_coll_val, allbranches, input_branches, list_branches, pt_limits)
    
    evallist = [(dtrain, 'train'), (dval, 'eval')]
    
    # perform the training
    # try different tree depths and choose the one that gives the best RMSE (i.e. avoid too deep trees to start with)
        
    params = {'eta': 0.01, 'silent': 1, 'gamma': 0.5, 'objective': 'binary:logistic'}
    params['nthread'] = 4
    params['eval_metric'] = 'rmse'
    max_num_rounds = 2000
    
    best_loss = 1e6
    best_imp = None
    best_params = None
    for tree_depth in range(1,8):
        params['max_depth'] = tree_depth
        
        bst = xgb.train(params, dtrain, max_num_rounds, evals = evallist, early_stopping_rounds = 10, verbose_eval = False)
    
        pred = bst.predict(dval)
        cur_loss = np.sqrt(mean_squared_error(pred, dval.get_label()))
        cur_imp = bst.get_fscore()

        print "for max_depth = " + str(params['max_depth']) + ": loss = " + str(cur_loss)
        
        if cur_loss < best_loss:
            best_loss = cur_loss
            best_imp = copy.copy(cur_imp)
            best_params = copy.copy(params)
            
    # normalize the usage score w.r.t. the total score (i.e. sum of all individuals)
    score_sum = sum([val for key, val in best_imp.iteritems()])
    used_variables = {key: val / float(score_sum) for key, val in sorted(best_imp.iteritems(), key = lambda x: x[1], reverse = True)}           
    return best_params, dtrain.feature_names, used_variables

In [19]:
def get_histogram(df, branch, label):
    data = df[branch].as_matrix()
    weights = df["training_weight"].as_matrix()
    
    # set the bin width
    q75, q25 = np.percentile(data, [75, 25])
    bin_width = max(2 * (q75 - q25) / len(data)**0.33, 0.005)

    data_max = np.max(data)
    data_min = np.min(data)
    bins = np.arange(data_min, data_max + bin_width, bin_width)
    
    weights = weights / (np.sum(weights) * bin_width)
    
    fig = plt.hist(data, bins = bins, weights = weights, alpha = 0.5, label = label)
    return fig

In [20]:
def plot_branch(disc_pair, branch, start_fraction = 0.0, end_fraction = 1.0):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    # get the training data for the BDT ...
    H1_coll = FileCollection(collections[H1_name], start_fraction, end_fraction)
    H0_coll = FileCollection(collections[H0_name], start_fraction, end_fraction)
    
    H1_df, H0_df = get_data(H1_coll, H0_coll, allbranches, allbranches, list_branches, pt_limits)
    
    plt.figure()
    H1_hist = get_histogram(H1_df, branch, H1_name)
    H0_hist = get_histogram(H0_df, branch, H0_name)
    
    plt.legend(loc = 'upper right')
    plt.show()

In [21]:
def plot_variables(discs):
    plotframe = pd.DataFrame()
    
    for disc in discs:
        _, _, implist = get_feature_importance_list_BDT(disc, candidate_branches, MELA_branches, list_branches, pt_limits)
        
        # cut the list to select only the 95% most important variables
        cutimplist = {key: val for key, val in implist.iteritems() if val > 0.00}
        curframe = pd.DataFrame(cutimplist, index = [len(plotframe)])
        
        plotframe = pd.concat([plotframe, curframe])
        
    plotframe = plotframe.fillna(0.0)
    
    print plotframe
    print "number of pre-selected input variables = " + str(len(plotframe.columns))
    
    # start the plotting
    parameters = plotframe.columns
    plotdata = np.transpose(plotframe.as_matrix())
    
    plt.close('all')
    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(111)
    cax = ax.matshow(plotdata, cmap = 'Blues')
    
    # make axis labels
    disclabels = []
    for disc in discs:
        if "0j" in disc[0] or "0j" in disc[1]:
            disclabels.append('D_' + re.sub('0j', '', disc[0]) + "_" + re.sub('0j', '', disc[1]) + "_0j")
        elif "01j" in disc[0] or "01j" in disc[1]:
            disclabels.append('D_' + re.sub('01j', '', disc[0]) + "_" + re.sub('01j', '', disc[1]) + "_01j")
        elif "1j" in disc[0] or "1j" in disc[1]:
            disclabels.append('D_' + re.sub('1j', '', disc[0]) + "_" + re.sub('1j', '', disc[1]) + "_1j")
        elif "2j" in disc[0] or "2j" in disc[1]:
            disclabels.append('D_' + re.sub('2j', '', disc[0]) + "_" + re.sub('2j', '', disc[1]) + "_2j")
        else:
            disclabels.append('D_' + disc[0] + "_" + disc[1] + "_2j")
            
    disclabels = np.concatenate([[''], np.array(disclabels)])
    parameters = np.concatenate([[''], np.array(parameters)])
        
    ax.set_xticklabels(disclabels, rotation = 'vertical')
    ax.set_yticklabels(parameters)
    
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    
    # sort the used variables according to their importance
    sorted_implist = []
    for key, val in sorted(cutimplist.iteritems(), key = lambda x: x[1], reverse = True):
        sorted_implist.append((key, val))
    
    return fig, sorted_implist

In [22]:
def append_variables(confhandler, impdict, threshold_fscore):
    confhandler.new_section(impdict["discriminant"])
    cur_sec = confhandler.get_section(impdict["discriminant"])

    periodic_inputs = []
    nonperiodic_inputs = []
    for key, val in impdict.iteritems():
        if val[0] > threshold_fscore and key is not "discriminant":
            if "phi" in key or "Phi" in key:
                periodic_inputs.append(key)
            else:
                nonperiodic_inputs.append(key)
    cur_sec["nonperiodic_columns"] = ConfigFileUtils.serialize_list(nonperiodic_inputs, lambda x: x)
    cur_sec["periodic_columns"] = ConfigFileUtils.serialize_list(periodic_inputs, lambda x: x)

In [23]:
def convert_varname(raw):
    raw = raw.replace('(', '[')
    raw = raw.replace(')', ']')
    return raw

In [24]:
df = pd.DataFrame()

In [25]:
out_dir = "/data_CMS/cms/wind/InputConfigurations/"
out_path = os.path.join(out_dir, "exclusive_99.conf")
threshold_fscore = 0.01

In [26]:
confhandler = ConfigFileHandler()

In [27]:
%%capture
fig, implist = plot_variables([("VBF2j", "ggH2j")])

In [28]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ggH_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [29]:
append_variables(confhandler, impdict, threshold_fscore)

In [30]:
implist

[('JetPt(JetPt|0)', 0.06575969358569403),
 ('D_VBF2j_ggH_ME', 0.06441664386798318),
 ('JetPt(JetPt|1)', 0.05613450394209964),
 ('ZZPt', 0.04817569080010943),
 ('PFMET', 0.04673315591812371),
 ('Z1Pt', 0.04290297709354093),
 ('JetEta(JetPt|1)', 0.0422314522346855),
 ('ZZEta', 0.041758897704379835),
 ('ZZMass', 0.041709155122242395),
 ('JetEta(JetPt|0)', 0.0415847986668988),
 ('Z2Pt', 0.04019200636705051),
 ('JetEta(JetPt|2)', 0.03937125376178277),
 ('D_WHh_ZHh_ME', 0.0391474121421643),
 ('JetPhi(JetPt|1)', 0.037630263386972417),
 ('ZZPhi', 0.0367100256174298),
 ('ZZMassErr', 0.036187728504986694),
 ('JetPhi(JetPt|0)', 0.035391847190787674),
 ('Z1Mass', 0.03491929266048201),
 ('Z2Mass', 0.03389956972666451),
 ('D_WHh_ggH_ME', 0.03074091576093715),
 ('D_ZHh_ggH_ME', 0.028975054095058076),
 ('JetPt(JetPt|2)', 0.02755739050414107),
 ('D_VBF2j_WHh_ME', 0.027283806302385156),
 ('D_VBF2j_ZHh_ME', 0.019349864451463677),
 ('JetPhi(JetPt|2)', 0.01629069565001119),
 ('JetPt(JetPt|3)', 0.0063173079

In [31]:
%%capture
fig, implist = plot_variables([("VBF1j", "ggH1j")])

In [32]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ggH_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [33]:
append_variables(confhandler, impdict, threshold_fscore)

In [34]:
implist

[('JetEta(JetPt|0)', 0.21606407057048344),
 ('ZZPt', 0.09453891242528002),
 ('D_VBF1j_ggH_ME', 0.08919969821832743),
 ('JetPt(JetPt|0)', 0.08542742731124137),
 ('ZZEta', 0.07886947942661482),
 ('PFMET', 0.07260170622714875),
 ('Z1Pt', 0.05414659625094307),
 ('ZZMassErr', 0.053334107132493765),
 ('JetPhi(JetPt|0)', 0.044977076199872325),
 ('Z1Mass', 0.04370030758516627),
 ('Z2Mass', 0.041030700481689976),
 ('Z2Pt', 0.039870001741048114),
 ('ZZPhi', 0.03621380070802623),
 ('ZZMass', 0.035691486274737394),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.007196332191979572),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.002727642040508386),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0021472926701874527),
 ('Z2Flav', 0.001450873425802333),
 ('Z1Flav', 0.0004642794962567466),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0002901746851604666),
 ('nExtraLep', 5.803493703209332e-05)]

In [35]:
%%capture
fig, implist = plot_variables([("VBF0j", "ggH0j")])

In [36]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ggH_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [37]:
append_variables(confhandler, impdict, threshold_fscore)

In [38]:
implist

[('ZZPt', 0.1732883612948443),
 ('PFMET', 0.11778877743221342),
 ('ZZEta', 0.10263261640895352),
 ('ZZPhi', 0.09338998642078059),
 ('ZZMass', 0.09150641727627142),
 ('Z2Pt', 0.08721363178413422),
 ('Z1Mass', 0.08165053221779316),
 ('ZZMassErr', 0.08125629681545403),
 ('Z1Pt', 0.0800297866748434),
 ('Z2Mass', 0.07783958999518156),
 ('Z2Flav', 0.005475491699154584),
 ('Z1Flav', 0.0038547461562048273),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0027596478163739104),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.0008760786718647334),
 ('nExtraLep', 0.0004380393359323667)]

In [39]:
%%capture
fig, implist = plot_variables([("WHh2j", "ggH2j")])

In [40]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ggH_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [41]:
append_variables(confhandler, impdict, threshold_fscore)

In [42]:
implist

[('JetPt(JetPt|0)', 0.06315862612038817),
 ('D_WHh_ggH_ME', 0.055145100400676286),
 ('ZZEta', 0.05160154711999444),
 ('ZZPt', 0.05081409083539848),
 ('D_VBF2j_ggH_ME', 0.05014243694559604),
 ('JetPt(JetPt|1)', 0.049378141139958774),
 ('JetEta(JetPt|0)', 0.048011672881395184),
 ('Z1Pt', 0.04551034115385506),
 ('PFMET', 0.044120712416332766),
 ('JetEta(JetPt|2)', 0.0399981471616833),
 ('Z2Pt', 0.03925701183500475),
 ('JetPhi(JetPt|0)', 0.03823795076082173),
 ('Z2Mass', 0.0380989878870695),
 ('ZZMass', 0.03789054357644116),
 ('ZZPhi', 0.037496815434143176),
 ('ZZMassErr', 0.032239386710517175),
 ('D_WHh_ZHh_ME', 0.03221622623155847),
 ('JetPhi(JetPt|1)', 0.030548671746531718),
 ('Z1Mass', 0.030479190309655603),
 ('JetEta(JetPt|1)', 0.030247585520068555),
 ('D_ZHh_ggH_ME', 0.027028278944808578),
 ('JetPt(JetPt|2)', 0.026773513676262826),
 ('JetPhi(JetPt|2)', 0.022859392732241704),
 ('D_VBF2j_WHh_ME', 0.02079811010491697),
 ('D_VBF2j_ZHh_ME', 0.020381221483660283),
 ('JetEta(JetPt|3)', 0.00

In [43]:
%%capture
fig, implist = plot_variables([("WHh1j", "ggH1j")])

In [44]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ggH_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [45]:
append_variables(confhandler, impdict, threshold_fscore)

In [46]:
implist

[('JetEta(JetPt|0)', 0.13239822100581594),
 ('ZZEta', 0.11549207435283385),
 ('JetPt(JetPt|0)', 0.09761660394571788),
 ('D_VBF1j_ggH_ME', 0.08341886189987456),
 ('ZZPt', 0.07212909111643288),
 ('PFMET', 0.06674079142433573),
 ('ZZMassErr', 0.06243585357509408),
 ('Z2Pt', 0.05311323982210058),
 ('JetPhi(JetPt|0)', 0.052229444634507925),
 ('ZZMass', 0.051944349412703844),
 ('Z1Mass', 0.05000570190443608),
 ('ZZPhi', 0.049920173337894856),
 ('Z2Mass', 0.049720606682632),
 ('Z1Pt', 0.04709773064203444),
 ('Z2Flav', 0.004475994982324096),
 ('Z1Flav', 0.004133880716159197),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0038202759721747063),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0023947998631542937),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0007412475766906146),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.00017105713308244953)]

In [47]:
%%capture
fig, implist = plot_variables([("WHh0j", "ggH0j")])

In [48]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ggH_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [49]:
append_variables(confhandler, impdict, threshold_fscore)

In [50]:
implist

[('ZZPt', 0.15967845659163987),
 ('ZZEta', 0.11935691318327975),
 ('PFMET', 0.11022508038585209),
 ('ZZMassErr', 0.102508038585209),
 ('ZZPhi', 0.08758842443729904),
 ('Z1Pt', 0.08662379421221865),
 ('Z1Mass', 0.08604501607717042),
 ('Z2Mass', 0.08430868167202572),
 ('Z2Pt', 0.07858520900321543),
 ('ZZMass', 0.0777491961414791),
 ('Z2Flav', 0.0045659163987138264),
 ('Z1Flav', 0.0019292604501607716),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0008360128617363344)]

In [51]:
%%capture
fig, implist = plot_variables([("ZHh2j", "ggH2j")])

In [52]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ggH_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [53]:
append_variables(confhandler, impdict, threshold_fscore)

In [54]:
implist

[('JetPt(JetPt|1)', 0.059286107245720206),
 ('JetPt(JetPt|0)', 0.057020808388078055),
 ('Z1Pt', 0.05384938998737905),
 ('ZZEta', 0.05058088734992395),
 ('D_VBF2j_ggH_ME', 0.04708585482670464),
 ('ZZPt', 0.045888482573379505),
 ('D_WHh_ggH_ME', 0.0418756674541277),
 ('D_ZHh_ggH_ME', 0.0418756674541277),
 ('JetEta(JetPt|0)', 0.03935147729846931),
 ('JetEta(JetPt|1)', 0.0381864664573962),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.037959936571631984),
 ('Z1Mass', 0.03663311866929873),
 ('JetEta(JetPt|2)', 0.03647131160803858),
 ('PFMET', 0.036374227371282486),
 ('Z2Mass', 0.03595352901200608),
 ('ZZPhi', 0.03300864049707129),
 ('Z2Pt', 0.032943917672567234),
 ('ZZMassErr', 0.03245849648878677),
 ('ZZMass', 0.0321672437785185),
 ('JetPhi(JetPt|0)', 0.030872787288437266),
 ('D_WHh_ZHh_ME', 0.02857512701854309),
 ('JetPhi(JetPt|1)', 0.02854276560629106),
 ('JetPt(JetPt|2)', 0.025953852626128603),
 ('JetPhi(JetPt|2)', 0.02381799941749458),
 ('D_VBF2j_WHh_ME', 0.022750072813177567),
 ('D_VBF2j_ZHh_

In [55]:
%%capture
fig, implist = plot_variables([("ZHh1j", "ggH1j")])

In [56]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ggH_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [57]:
append_variables(confhandler, impdict, threshold_fscore)

In [58]:
implist

[('JetEta(JetPt|0)', 0.12320038910505837),
 ('JetPt(JetPt|0)', 0.10802529182879378),
 ('ZZEta', 0.10423151750972763),
 ('D_VBF1j_ggH_ME', 0.09528210116731517),
 ('ZZPhi', 0.07446498054474708),
 ('ZZPt', 0.061867704280155644),
 ('PFMET', 0.061284046692607),
 ('Z2Mass', 0.059484435797665366),
 ('Z1Mass', 0.058122568093385216),
 ('ZZMass', 0.05413424124513619),
 ('Z1Pt', 0.047714007782101164),
 ('ZZMassErr', 0.04547665369649805),
 ('Z2Pt', 0.04430933852140078),
 ('JetPhi(JetPt|0)', 0.04085603112840467),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.013715953307392996),
 ('Z2Flav', 0.003404669260700389),
 ('nExtraLep', 0.0020914396887159532),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0009727626459143969),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.0006809338521400778),
 ('Z1Flav', 0.0006809338521400778)]

In [59]:
%%capture
fig, implist = plot_variables([("ZHh0j", "ggH0j")])

In [60]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ggH_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [61]:
append_variables(confhandler, impdict, threshold_fscore)

In [62]:
implist

[('ZZPt', 0.170250606305578),
 ('ZZEta', 0.12627324171382376),
 ('Z1Pt', 0.10299110751818917),
 ('Z1Mass', 0.09409862570735651),
 ('Z2Mass', 0.09264349232012935),
 ('ZZMassErr', 0.0889248181083266),
 ('PFMET', 0.08714632174616006),
 ('ZZPhi', 0.0763136620856912),
 ('Z2Pt', 0.07518189167340339),
 ('ZZMass', 0.0719482619240097),
 ('ExtraLepPt(ExtraLepPt|0)', 0.006790622473726758),
 ('Z2Flav', 0.00582053354890865),
 ('Z1Flav', 0.0016168148746968471)]

In [63]:
%%capture
fig, implist = plot_variables([("WHh2j", "ZHh2j")])

In [64]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ZHh_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [65]:
append_variables(confhandler, impdict, threshold_fscore)

In [66]:
implist

[('D_WHh_ZHh_ME', 0.08077594810379242),
 ('PFMET', 0.06081586826347305),
 ('JetPhi(JetPt|1)', 0.053330838323353294),
 ('ZZMass', 0.04590818363273453),
 ('ZZMassErr', 0.04572105788423154),
 ('JetPt(JetPt|1)', 0.04123003992015968),
 ('JetEta(JetPt|0)', 0.0406686626746507),
 ('JetPt(JetPt|0)', 0.03904690618762475),
 ('JetPhi(JetPt|0)', 0.037050898203592815),
 ('JetPhi(JetPt|2)', 0.036988522954091815),
 ('Z2Mass', 0.036926147704590816),
 ('Z1Pt', 0.036926147704590816),
 ('JetEta(JetPt|1)', 0.03617764471057884),
 ('ZZPhi', 0.03493013972055888),
 ('ZZEta', 0.03305888223552894),
 ('D_ZHh_ggH_ME', 0.032123253493013974),
 ('Z1Mass', 0.03181137724550898),
 ('D_WHh_ggH_ME', 0.031374750499002),
 ('Z2Pt', 0.030501497005988025),
 ('D_VBF2j_ggH_ME', 0.02944111776447106),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.02900449101796407),
 ('ZZPt', 0.027382734530938122),
 ('D_VBF2j_WHh_ME', 0.025137225548902197),
 ('D_VBF2j_ZHh_ME', 0.02370259481037924),
 ('JetPt(JetPt|2)', 0.022392714570858282),
 ('JetEta(JetP

In [67]:
%%capture
fig, implist = plot_variables([("WHh1j", "ZHh1j")])

In [68]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ZHh_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [69]:
append_variables(confhandler, impdict, threshold_fscore)

In [70]:
implist

[('ZZMass', 0.11130284728213978),
 ('JetPt(JetPt|0)', 0.09174575783721599),
 ('PFMET', 0.08484325568018407),
 ('ZZMassErr', 0.07204486626402071),
 ('Z2Pt', 0.07161345987920621),
 ('D_VBF1j_ggH_ME', 0.07017543859649122),
 ('JetEta(JetPt|0)', 0.06888121944204774),
 ('ZZEta', 0.06801840667241875),
 ('ZZPt', 0.0638481449525453),
 ('Z2Mass', 0.058958872591314354),
 ('ZZPhi', 0.0547886108714409),
 ('Z1Mass', 0.05162496404946793),
 ('Z1Pt', 0.04946793212539546),
 ('JetPhi(JetPt|0)', 0.048461317227494964),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.028041415012942193),
 ('Z1Flav', 0.004170261719873454),
 ('Z2Flav', 0.0011504170261719873),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0008628127696289905)]

In [71]:
%%capture
fig, implist = plot_variables([("WHh0j", "ZHh0j")])

In [72]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ZHh_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [73]:
append_variables(confhandler, impdict, threshold_fscore)

In [74]:
implist

[('Z1Pt', 0.18808777429467086),
 ('Z1Mass', 0.13166144200626959),
 ('ZZPhi', 0.11285266457680251),
 ('ZZPt', 0.10658307210031348),
 ('ZZEta', 0.09717868338557993),
 ('PFMET', 0.09090909090909091),
 ('Z2Mass', 0.07836990595611286),
 ('ZZMass', 0.07210031347962383),
 ('ZZMassErr', 0.06269592476489028),
 ('Z2Pt', 0.05642633228840126),
 ('Z1Flav', 0.003134796238244514)]

In [75]:
%%capture
fig, implist = plot_variables([("VBF2j", "WHh2j")])

In [76]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_WHh_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [77]:
append_variables(confhandler, impdict, threshold_fscore)

In [78]:
implist

[('JetEta(JetPt|2)', 0.09120774617446183),
 ('D_VBF2j_ggH_ME', 0.09055934987464338),
 ('D_WHh_ggH_ME', 0.05891761044350307),
 ('JetEta(JetPt|0)', 0.05718855364398721),
 ('JetPt(JetPt|2)', 0.05329817584507651),
 ('JetPt(JetPt|0)', 0.04569032592720671),
 ('JetEta(JetPt|1)', 0.04530128814731564),
 ('D_WHh_ZHh_ME', 0.041281231088441256),
 ('ZZEta', 0.040287023428719634),
 ('ZZPt', 0.03635341920982104),
 ('PFMET', 0.033889513270510936),
 ('JetPt(JetPt|1)', 0.029437192011757585),
 ('Z2Mass', 0.029221059911818104),
 ('ZZPhi', 0.027621682372265928),
 ('ZZMass', 0.027578455952278032),
 ('Z2Pt', 0.02658424829255641),
 ('Z1Mass', 0.02619521051266534),
 ('D_VBF2j_ZHh_ME', 0.02559004063283479),
 ('D_VBF2j_WHh_ME', 0.02481196507305265),
 ('JetPhi(JetPt|0)', 0.023731304573355233),
 ('JetPhi(JetPt|1)', 0.023731304573355233),
 ('Z1Pt', 0.023385493213452062),
 ('D_ZHh_ggH_ME', 0.02126739863404513),
 ('ZZMassErr', 0.02100804011411775),
 ('JetPhi(JetPt|2)', 0.017895737874989194),
 ('JetPt(JetPt|3)', 0.017

In [79]:
%%capture
fig, implist = plot_variables([("VBF1j", "WHh1j")])

In [80]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_WHh_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [81]:
append_variables(confhandler, impdict, threshold_fscore)

In [82]:
implist

[('JetEta(JetPt|0)', 0.16489028213166143),
 ('ZZEta', 0.13941065830721003),
 ('JetPt(JetPt|0)', 0.11147335423197492),
 ('PFMET', 0.06507836990595611),
 ('D_VBF1j_ggH_ME', 0.0644012539184953),
 ('ZZPt', 0.06036363636363636),
 ('JetPhi(JetPt|0)', 0.05399373040752351),
 ('Z1Pt', 0.05078369905956113),
 ('Z2Mass', 0.0500564263322884),
 ('Z2Pt', 0.049579937304075235),
 ('ZZMassErr', 0.04922884012539185),
 ('ZZPhi', 0.04714733542319749),
 ('ZZMass', 0.045141065830721),
 ('Z1Mass', 0.0404012539184953),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.002909090909090909),
 ('Z2Flav', 0.002608150470219436),
 ('Z1Flav', 0.0022570532915360503),
 ('ExtraLepPt(ExtraLepPt|0)', 0.00027586206896551725)]

In [83]:
%%capture
fig, implist = plot_variables([("VBF0j", "WHh0j")])

In [84]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_WHh_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [85]:
append_variables(confhandler, impdict, threshold_fscore)

In [86]:
implist

[('ZZEta', 0.12379713914174252),
 ('PFMET', 0.11469440832249675),
 ('ZZMassErr', 0.1081924577373212),
 ('Z2Pt', 0.10143042912873862),
 ('ZZMass', 0.10091027308192457),
 ('Z1Pt', 0.10013003901170352),
 ('Z1Mass', 0.08816644993498049),
 ('ZZPhi', 0.0858257477243173),
 ('Z2Mass', 0.0858257477243173),
 ('ZZPt', 0.07906371911573472),
 ('Z1Flav', 0.007542262678803641),
 ('Z2Flav', 0.0033810143042912874),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0010403120936280884)]

In [87]:
%%capture
fig, implist = plot_variables([("VBF2j", "ZHh2j")])

In [88]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ZHh_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [89]:
append_variables(confhandler, impdict, threshold_fscore)

In [90]:
implist

[('JetEta(JetPt|2)', 0.07289013065987344),
 ('D_VBF2j_ggH_ME', 0.06954830580436629),
 ('JetEta(JetPt|0)', 0.05311310159695401),
 ('ZZEta', 0.05108609307803983),
 ('JetEta(JetPt|1)', 0.04495028350727258),
 ('D_VBF2j_WHh_ME', 0.04295066699537075),
 ('PFMET', 0.04062234639932068),
 ('JetPt(JetPt|2)', 0.04015668228011066),
 ('D_ZHh_ggH_ME', 0.03810228175418413),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03640397731941819),
 ('JetPt(JetPt|0)', 0.035034376968800504),
 ('JetPhi(JetPt|1)', 0.034815240912701675),
 ('Z2Mass', 0.033966088695318704),
 ('Z1Pt', 0.03385652066726929),
 ('D_WHh_ggH_ME', 0.03240474429561454),
 ('ZZMassErr', 0.03202125619744159),
 ('JetPt(JetPt|1)', 0.03133645602213274),
 ('ZZMass', 0.030706439860848603),
 ('D_WHh_ZHh_ME', 0.029473799545292684),
 ('JetPhi(JetPt|0)', 0.029282055496206206),
 ('Z1Mass', 0.02720026296326732),
 ('ZZPt', 0.027035910921193194),
 ('ZZPhi', 0.026104582682773166),
 ('JetPhi(JetPt|2)', 0.024351494233982526),
 ('D_VBF2j_ZHh_ME', 0.023118853918426604),


In [91]:
%%capture
fig, implist = plot_variables([("VBF1j", "ZHh1j")])

In [92]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ZHh_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [93]:
append_variables(confhandler, impdict, threshold_fscore)

In [94]:
implist

[('JetEta(JetPt|0)', 0.16268454461165394),
 ('ZZEta', 0.14959703302189573),
 ('JetPt(JetPt|0)', 0.09799586334783539),
 ('PFMET', 0.07652806504528921),
 ('D_VBF1j_ggH_ME', 0.06162185293488339),
 ('Z2Mass', 0.05666500249625562),
 ('ZZPt', 0.05249268953712289),
 ('Z1Mass', 0.05152984808501533),
 ('ZZMassErr', 0.05070964981099779),
 ('Z2Pt', 0.04867698452321518),
 ('ZZMass', 0.045503173810712505),
 ('ZZPhi', 0.04154482561871479),
 ('JetPhi(JetPt|0)', 0.0400114114542472),
 ('Z1Pt', 0.036302688823907),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.019007203480493545),
 ('Z2Flav', 0.006204978246915341),
 ('Z1Flav', 0.0013907709863775766),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.0008201982740175452),
 ('nExtraLep', 0.0004992511233150275),
 ('ExtraLepPt(ExtraLepPt|0)', 0.00021396476713501176)]

In [95]:
%%capture
fig, implist = plot_variables([("VBF0j", "ZHh0j")])

In [96]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ZHh_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [97]:
append_variables(confhandler, impdict, threshold_fscore)

In [98]:
implist

[('PFMET', 0.14429530201342283),
 ('Z1Mass', 0.1319910514541387),
 ('ZZPhi', 0.12639821029082773),
 ('ZZMass', 0.11073825503355705),
 ('Z2Pt', 0.1029082774049217),
 ('ZZEta', 0.08501118568232663),
 ('Z1Pt', 0.07606263982102908),
 ('ZZPt', 0.06599552572706935),
 ('ZZMassErr', 0.06487695749440715),
 ('Z2Mass', 0.06263982102908278),
 ('Z2Flav', 0.013422818791946308),
 ('ExtraLepPt(ExtraLepPt|0)', 0.011185682326621925),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0044742729306487695)]

In [99]:
%%capture
fig, implist = plot_variables([("WHl", "ggH")])

In [100]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [101]:
append_variables(confhandler, impdict, threshold_fscore)

In [102]:
implist

[('PFMET', 0.1500443626565016),
 ('ZZPt', 0.12648286287009958),
 ('JetPt(JetPt|0)', 0.0884624231868818),
 ('ZZEta', 0.06582103775754987),
 ('Z1Pt', 0.06145049456146692),
 ('ZZPhi', 0.05783575958726299),
 ('JetEta(JetPt|0)', 0.05425388584009727),
 ('ZZMassErr', 0.05313660412079787),
 ('Z1Mass', 0.04804311392987414),
 ('ZZMass', 0.04663008116723078),
 ('Z2Mass', 0.043902599323058726),
 ('Z2Pt', 0.040879366435542706),
 ('JetPhi(JetPt|0)', 0.03874338667805856),
 ('ExtraLepPt(ExtraLepPt|0)', 0.035391541520160365),
 ('JetEta(JetPt|1)', 0.015313331799809406),
 ('ExtraLepEta(ExtraLepPt|0)', 0.013243074496401696),
 ('JetPt(JetPt|1)', 0.011205678420032204),
 ('JetPhi(JetPt|1)', 0.010219841608885677),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.006079327002070257),
 ('JetEta(JetPt|3)', 0.005947882093917387),
 ('nCleanedJetsPt30', 0.005257796326114817),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.004797739147579771),
 ('JetPhi(JetPt|2)', 0.003910486017547895),
 ('Z2Flav', 0.003187539022707108),
 ('nExtraLep', 0.00

In [103]:
%%capture
fig, implist = plot_variables([("WHl", "VBF")])

In [104]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [105]:
append_variables(confhandler, impdict, threshold_fscore)

In [106]:
implist

[('JetEta(JetPt|0)', 0.13490456521152236),
 ('PFMET', 0.12750735671283173),
 ('ZZEta', 0.08072136281417888),
 ('JetEta(JetPt|1)', 0.07270322075537917),
 ('JetPt(JetPt|0)', 0.06768175805188845),
 ('ZZPt', 0.05167247104559812),
 ('ExtraLepEta(ExtraLepPt|0)', 0.04840582057719824),
 ('ZZMassErr', 0.045004184552252906),
 ('ZZPhi', 0.04087362652196215),
 ('Z2Pt', 0.04027969007316217),
 ('JetPhi(JetPt|0)', 0.03790394427796226),
 ('Z1Pt', 0.03722901649523501),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03712102804999865),
 ('ZZMass', 0.03218055668043519),
 ('Z2Mass', 0.0294808455495262),
 ('Z1Mass', 0.029291865770362573),
 ('JetPhi(JetPt|1)', 0.0219756486055992),
 ('JetPt(JetPt|1)', 0.019437920142544748),
 ('JetEta(JetPt|2)', 0.011797737642072299),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.009556977403417835),
 ('JetPhi(JetPt|2)', 0.005885370265381604),
 ('nCleanedJetsPt30', 0.004940471369563456),
 ('JetPt(JetPt|3)', 0.00399557247374531),
 ('JetEta(JetPt|3)', 0.002159768904727194),
 ('JetPt(JetPt|2)', 0.001997786

In [107]:
%%capture
fig, implist = plot_variables([("WHl", "WHh")])

In [108]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [109]:
append_variables(confhandler, impdict, threshold_fscore)

In [110]:
implist

[('PFMET', 0.15051472133017196),
 ('JetPt(JetPt|0)', 0.075452444551157),
 ('JetEta(JetPt|0)', 0.07509228968456436),
 ('ZZPt', 0.07074041837990336),
 ('Z2Mass', 0.0534229718779075),
 ('ExtraLepPt(ExtraLepPt|0)', 0.052132416939283895),
 ('ZZPhi', 0.05117200396170353),
 ('ZZMass', 0.05075182328401213),
 ('ZZMassErr', 0.04988144902307992),
 ('ZZEta', 0.04868093280110448),
 ('Z1Pt', 0.04808067469011675),
 ('Z1Mass', 0.03865662234760947),
 ('JetPhi(JetPt|0)', 0.03607551247036225),
 ('JetPt(JetPt|1)', 0.03577538341486839),
 ('JetEta(JetPt|1)', 0.035445241453825146),
 ('Z2Pt', 0.034664905909541105),
 ('ExtraLepEta(ExtraLepPt|0)', 0.021879408145502568),
 ('JetPhi(JetPt|1)', 0.021639304901107475),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.010954710525525976),
 ('JetPt(JetPt|2)', 0.007863381253939193),
 ('JetPhi(JetPt|2)', 0.007053032804105765),
 ('JetEta(JetPt|2)', 0.005432335904438909),
 ('nCleanedJetsPt30', 0.004171793871364687),
 ('JetPt(JetPt|3)', 0.0033314325159818723),
 ('Z1Flav', 0.002941264743839

In [111]:
%%capture
fig, implist = plot_variables([("WHl", "ZHh")])

In [112]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [113]:
append_variables(confhandler, impdict, threshold_fscore)

In [114]:
implist

[('PFMET', 0.2012478336221837),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.06994800693240902),
 ('JetPhi(JetPt|0)', 0.0641941074523397),
 ('ExtraLepPt(ExtraLepPt|0)', 0.06232235701906413),
 ('JetPt(JetPt|0)', 0.05837088388214905),
 ('JetEta(JetPt|0)', 0.05691507798960139),
 ('ZZPt', 0.046655112651646444),
 ('ZZEta', 0.0432582322357019),
 ('Z1Pt', 0.04048526863084922),
 ('ZZPhi', 0.039376083188908145),
 ('ZZMassErr', 0.03785095320623917),
 ('Z2Pt', 0.03438474870017331),
 ('Z1Mass', 0.03396880415944541),
 ('JetPt(JetPt|1)', 0.028838821490467938),
 ('ExtraLepEta(ExtraLepPt|0)', 0.02772963604852686),
 ('ZZMass', 0.026412478336221838),
 ('nCleanedJetsPt30', 0.023362218370883883),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.022599653379549393),
 ('Z2Mass', 0.021074523396880415),
 ('nExtraLep', 0.019965337954939343),
 ('JetEta(JetPt|1)', 0.019480069324090122),
 ('JetEta(JetPt|2)', 0.006793760831889081),
 ('JetPhi(JetPt|1)', 0.005545927209705373),
 ('JetPt(JetPt|2)', 0.0036741767764298093),
 ('JetPhi(JetPt|2)

In [115]:
%%capture
fig, implist = plot_variables([("WHl", "ZHl")])

In [116]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [117]:
append_variables(confhandler, impdict, threshold_fscore)

In [118]:
implist

[('PFMET', 0.09852266424645792),
 ('ZZMass', 0.09282508949730248),
 ('Z2Mass', 0.08400141178843341),
 ('Z2Pt', 0.07603489134271164),
 ('Z1Mass', 0.06887510714465789),
 ('ZZMassErr', 0.05828669389401502),
 ('JetPt(JetPt|0)', 0.05697574749155448),
 ('ExtraLepPt(ExtraLepPt|0)', 0.053900065547320124),
 ('ZZPhi', 0.04593354510159835),
 ('ExtraLepEta(ExtraLepPt|0)', 0.045832703070639844),
 ('ZZPt', 0.04386628346694903),
 ('JetEta(JetPt|0)', 0.03766449856300106),
 ('ZZEta', 0.033882922402057175),
 ('Z1Pt', 0.03277366006151364),
 ('JetPhi(JetPt|0)', 0.028034084606463974),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.027479453436192205),
 ('JetPhi(JetPt|1)', 0.017697776433217367),
 ('JetPt(JetPt|1)', 0.01613472495336056),
 ('JetEta(JetPt|1)', 0.014369989411586749),
 ('nExtraZ', 0.013311148086522463),
 ('nExtraLep', 0.013059043009126204),
 ('ExtraLepEta(ExtraLepPt|1)', 0.009630413956537085),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.006453889981344225),
 ('JetEta(JetPt|2)', 0.005042101547925175),
 ('Z2Flav', 0.

In [119]:
%%capture
fig, implist = plot_variables([("WHl", "ZHMET")])

In [120]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [121]:
append_variables(confhandler, impdict, threshold_fscore)

In [122]:
implist

[('PFMET', 0.13662027310924368),
 ('Z1Pt', 0.0859375),
 ('ZZPt', 0.08042279411764706),
 ('ZZMass', 0.08022584033613446),
 ('Z2Mass', 0.07293855042016807),
 ('ZZEta', 0.060267857142857144),
 ('ZZMassErr', 0.05948004201680672),
 ('JetPt(JetPt|0)', 0.058298319327731093),
 ('Z1Mass', 0.05757615546218487),
 ('ZZPhi', 0.05573792016806723),
 ('Z2Pt', 0.05219275210084034),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04575892857142857),
 ('JetEta(JetPt|0)', 0.042804621848739496),
 ('JetPhi(JetPt|0)', 0.034992121848739496),
 ('JetEta(JetPt|1)', 0.011817226890756302),
 ('nExtraLep', 0.011292016806722689),
 ('JetPt(JetPt|1)', 0.010307247899159664),
 ('JetPhi(JetPt|1)', 0.010241596638655462),
 ('ExtraLepEta(ExtraLepPt|0)', 0.008797268907563025),
 ('JetPt(JetPt|2)', 0.005317752100840336),
 ('JetEta(JetPt|2)', 0.005120798319327731),
 ('Z2Flav', 0.0030199579831932774),
 ('Z1Flav', 0.0029543067226890755),
 ('nCleanedJetsPt30', 0.002297794117647059),
 ('JetPhi(JetPt|2)', 0.0019695378151260504),
 ('JetPhi(JetPt|3)',

In [123]:
%%capture
fig, implist = plot_variables([("WHl", "ttHh")])

In [124]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [125]:
append_variables(confhandler, impdict, threshold_fscore)

In [126]:
implist

[('PFMET', 0.11465309402625234),
 ('ExtraLepPt(ExtraLepPt|0)', 0.11224216447897134),
 ('JetPt(JetPt|0)', 0.09710688454326279),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.08652558264130726),
 ('JetPt(JetPt|1)', 0.05973747656040718),
 ('JetEta(JetPt|0)', 0.051968925796946154),
 ('JetEta(JetPt|1)', 0.050629520492901155),
 ('JetPt(JetPt|2)', 0.04995981784087865),
 ('JetPhi(JetPt|0)', 0.03616394320921511),
 ('ZZEta', 0.0326814894186981),
 ('JetPhi(JetPt|2)', 0.03160996517546209),
 ('ZZMass', 0.027725689793731582),
 ('nCleanedJetsPt30', 0.02638628448968658),
 ('Z1Pt', 0.025850522368068576),
 ('JetPt(JetPt|3)', 0.025180819716046077),
 ('ZZPt', 0.02317171175997857),
 ('JetPhi(JetPt|1)', 0.022368068577551567),
 ('ZZPhi', 0.018081971604607554),
 ('Z2Pt', 0.01620680417894455),
 ('Z2Mass', 0.015671042057326546),
 ('ZZMassErr', 0.01339405304045004),
 ('JetEta(JetPt|2)', 0.012724350388427539),
 ('Z1Mass', 0.010983123493169033),
 ('JetEta(JetPt|3)', 0.009777658719528529),
 ('nExtraLep', 0.0085721939458880

In [127]:
%%capture
fig, implist = plot_variables([("WHl", "ttHl")])

In [128]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [129]:
append_variables(confhandler, impdict, threshold_fscore)

In [130]:
implist

[('nCleanedJetsPt30BTagged_bTagSF', 0.12115786332438078),
 ('JetPt(JetPt|0)', 0.09176365264100268),
 ('JetPt(JetPt|1)', 0.08325872873769025),
 ('PFMET', 0.05580423754103253),
 ('ExtraLepPt(ExtraLepPt|1)', 0.052820053715308866),
 ('ZZEta', 0.051178752611160845),
 ('Z1Mass', 0.047000895255147716),
 ('JetPt(JetPt|2)', 0.04416592062071024),
 ('Z2Pt', 0.04297224709042077),
 ('ZZMass', 0.03849597135183527),
 ('ZZPt', 0.030587884213667562),
 ('ExtraLepEta(ExtraLepPt|0)', 0.028946583109519548),
 ('nExtraLep', 0.028797373918233362),
 ('JetEta(JetPt|0)', 0.027454491196657715),
 ('JetPt(JetPt|3)', 0.027156072814085348),
 ('ExtraLepPt(ExtraLepPt|0)', 0.025962399283795883),
 ('JetEta(JetPt|1)', 0.024619516562220233),
 ('JetPhi(JetPt|0)', 0.02447030737093405),
 ('nCleanedJetsPt30', 0.023873470605789315),
 ('ZZPhi', 0.019695613249776187),
 ('JetPhi(JetPt|1)', 0.015965383467621604),
 ('ZZMassErr', 0.014622500746045956),
 ('Z2Mass', 0.013279618024470308),
 ('Z1Pt', 0.011936735302894658),
 ('JetEta(JetP

In [131]:
%%capture
fig, implist = plot_variables([("ZHh", "ZHl")])

In [132]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ZHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [133]:
append_variables(confhandler, impdict, threshold_fscore)

In [134]:
implist

[('ZZMass', 0.10154661953159523),
 ('PFMET', 0.08775961113566062),
 ('Z1Mass', 0.0798497569597879),
 ('Z2Mass', 0.07370746796288113),
 ('ZZMassErr', 0.06999558108705259),
 ('Z2Pt', 0.06319045514803358),
 ('JetPt(JetPt|0)', 0.06252761820592134),
 ('JetEta(JetPt|0)', 0.05236411842686699),
 ('ZZPt', 0.05201060539107379),
 ('ZZPhi', 0.05112682280159081),
 ('JetPhi(JetPt|0)', 0.04516129032258064),
 ('Z1Pt', 0.04078656650463986),
 ('ZZEta', 0.03928413610251878),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0344675209898365),
 ('JetEta(JetPt|1)', 0.0240830755634114),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.020150243040212106),
 ('JetPhi(JetPt|1)', 0.019133893062306673),
 ('JetPt(JetPt|1)', 0.01802916482545294),
 ('ExtraLepEta(ExtraLepPt|0)', 0.01643835616438356),
 ('JetEta(JetPt|2)', 0.008572691117984976),
 ('nCleanedJetsPt30', 0.008440123729562527),
 ('JetPhi(JetPt|2)', 0.007423773751657092),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.007335395492708794),
 ('Z2Flav', 0.007202828104286346),
 ('JetPt(JetPt|2)', 0.0031

In [135]:
%%capture
fig, implist = plot_variables([("ZHh", "ZHMET")])

In [136]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [137]:
append_variables(confhandler, impdict, threshold_fscore)

In [138]:
implist

[('PFMET', 0.14685998208031278),
 ('ZZPt', 0.08267492058320437),
 ('JetEta(JetPt|0)', 0.0716380223181559),
 ('Z1Pt', 0.06915370204447341),
 ('ZZEta', 0.06780972550297304),
 ('Z2Pt', 0.06536613179115419),
 ('JetPt(JetPt|0)', 0.06210800684206239),
 ('ZZPhi', 0.056569194428606334),
 ('Z1Mass', 0.05314816323205995),
 ('Z2Mass', 0.04846460861774049),
 ('ZZMass', 0.046306100838967176),
 ('JetPhi(JetPt|0)', 0.03885314001791969),
 ('ZZMassErr', 0.0379164290950558),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.035513561945100594),
 ('JetPt(JetPt|1)', 0.029078765170644293),
 ('JetPhi(JetPt|1)', 0.020078194998778203),
 ('JetEta(JetPt|1)', 0.018734218457277838),
 ('JetPt(JetPt|2)', 0.012462327930276126),
 ('nCleanedJetsPt30', 0.012177241997230593),
 ('JetEta(JetPt|2)', 0.008878390486275149),
 ('nExtraLep', 0.0032988515109554453),
 ('Z1Flav', 0.0032581249490917976),
 ('JetEta(JetPt|3)', 0.0029730390160462652),
 ('Z2Flav', 0.0022399609025006107),
 ('JetPhi(JetPt|2)', 0.001995601531318726),
 ('JetPhi(JetPt|

In [139]:
%%capture
fig, implist = plot_variables([("ZHh", "ttHh")])

In [140]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [141]:
append_variables(confhandler, impdict, threshold_fscore)

In [142]:
implist

[('nCleanedJetsPt30BTagged_bTagSF', 0.15102481121898598),
 ('JetPt(JetPt|1)', 0.1128371089536138),
 ('nCleanedJetsPt30', 0.08284789644012945),
 ('JetPt(JetPt|2)', 0.07529665587918015),
 ('JetPt(JetPt|0)', 0.07357065803667745),
 ('PFMET', 0.07357065803667745),
 ('ZZPt', 0.06472491909385113),
 ('JetPt(JetPt|3)', 0.05825242718446602),
 ('Z1Pt', 0.04919093851132686),
 ('JetEta(JetPt|2)', 0.03214670981661273),
 ('JetEta(JetPt|1)', 0.029557713052858686),
 ('ZZEta', 0.02740021574973031),
 ('JetPhi(JetPt|3)', 0.020280474649406688),
 ('Z2Pt', 0.020064724919093852),
 ('JetEta(JetPt|0)', 0.017259978425026967),
 ('ExtraLepPt(ExtraLepPt|0)', 0.015318230852211435),
 ('ZZPhi', 0.014670981661272923),
 ('nExtraLep', 0.013160733549083063),
 ('Z1Mass', 0.011866235167206042),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.010355987055016181),
 ('Z2Mass', 0.009708737864077669),
 ('ZZMass', 0.009492988133764833),
 ('JetPhi(JetPt|1)', 0.008414239482200648),
 ('ZZMassErr', 0.00819848975188781),
 ('JetEta(JetPt|3)', 0.00517

In [143]:
%%capture
fig, implist = plot_variables([("ZHh", "ttHl")])

In [144]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [145]:
append_variables(confhandler, impdict, threshold_fscore)

In [146]:
implist

[('PFMET', 0.1920114122681883),
 ('ExtraLepPt(ExtraLepPt|0)', 0.1202567760342368),
 ('nCleanedJetsPt30', 0.05991440798858773),
 ('JetPt(JetPt|0)', 0.057203994293865905),
 ('ZZMass', 0.050784593437945794),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.050356633380884454),
 ('ZZPt', 0.04893009985734665),
 ('Z1Mass', 0.038801711840228244),
 ('JetPt(JetPt|1)', 0.037089871611982884),
 ('Z1Pt', 0.03209700427960057),
 ('JetPt(JetPt|3)', 0.03195435092724679),
 ('Z2Pt', 0.029101283880171185),
 ('JetPt(JetPt|2)', 0.028530670470756064),
 ('ZZPhi', 0.02767475035663338),
 ('JetEta(JetPt|2)', 0.023395149786019972),
 ('ZZEta', 0.019543509272467903),
 ('ZZMassErr', 0.019115549215406563),
 ('JetPhi(JetPt|3)', 0.016262482168330955),
 ('JetEta(JetPt|0)', 0.016119828815977174),
 ('nExtraLep', 0.015121255349500713),
 ('Z2Mass', 0.013980028530670471),
 ('JetEta(JetPt|1)', 0.012981455064194009),
 ('JetPhi(JetPt|0)', 0.012553495007132667),
 ('JetPhi(JetPt|1)', 0.011554921540656205),
 ('JetPhi(JetPt|2)', 0.01027104136

In [147]:
%%capture
fig, implist = plot_variables([("ZHl", "ggH")])

In [148]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [149]:
append_variables(confhandler, impdict, threshold_fscore)

In [150]:
implist

[('ZZMass', 0.10157216039568981),
 ('PFMET', 0.08549726196785021),
 ('Z1Mass', 0.08284755343578873),
 ('Z1Pt', 0.07485426603073662),
 ('ZZPt', 0.07476594241300123),
 ('ZZMassErr', 0.07366189719130896),
 ('Z2Mass', 0.07216039568980745),
 ('JetEta(JetPt|0)', 0.06822999470058294),
 ('Z2Pt', 0.06452040275569687),
 ('ZZEta', 0.04447094152976506),
 ('JetPt(JetPt|0)', 0.04438261791202968),
 ('ZZPhi', 0.042042042042042045),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03758169934640523),
 ('ExtraLepEta(ExtraLepPt|0)', 0.026629570747217807),
 ('JetPhi(JetPt|0)', 0.018327150680091858),
 ('JetEta(JetPt|1)', 0.017443914502738033),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.016119060236707297),
 ('JetPt(JetPt|1)', 0.011393746687864335),
 ('JetPhi(JetPt|1)', 0.009583112524288995),
 ('JetEta(JetPt|2)', 0.007463345698639817),
 ('JetPt(JetPt|2)', 0.007065889418830595),
 ('Z2Flav', 0.0068450803744921395),
 ('JetPhi(JetPt|2)', 0.004813637166578343),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.0027821939586645467),
 ('nCleanedJetsPt3

In [151]:
%%capture
fig, implist = plot_variables([("ZHl", "VBF")])

In [152]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [153]:
append_variables(confhandler, impdict, threshold_fscore)

In [154]:
implist

[('JetEta(JetPt|0)', 0.1217993774475349),
 ('ZZMass', 0.08287311309703116),
 ('PFMET', 0.08213676071894768),
 ('JetEta(JetPt|1)', 0.07105800448505539),
 ('ZZEta', 0.0685811828496837),
 ('JetPt(JetPt|0)', 0.05800448505539378),
 ('Z2Mass', 0.05579542792114336),
 ('Z1Mass', 0.055126016668340196),
 ('ZZMassErr', 0.05174548984168424),
 ('Z2Pt', 0.04967031495799444),
 ('Z1Pt', 0.03849114703618168),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0335375037654383),
 ('ZZPt', 0.03340362151487766),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03270073969943435),
 ('JetPhi(JetPt|0)', 0.031596211132309135),
 ('ZZPhi', 0.02768015530341065),
 ('JetPhi(JetPt|1)', 0.019914984770893998),
 ('JetPt(JetPt|2)', 0.011848579174615926),
 ('JetPt(JetPt|1)', 0.01161428523613482),
 ('JetEta(JetPt|2)', 0.010877932858051343),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.010509756669009607),
 ('JetPhi(JetPt|2)', 0.009405228101884393),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.009204404726043445),
 ('nExtraLep', 0.007196170967633966),
 ('JetPhi(JetPt|3)', 0.

In [155]:
%%capture
fig, implist = plot_variables([("ZHl", "WHh")])

In [156]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [157]:
append_variables(confhandler, impdict, threshold_fscore)

In [158]:
implist

[('PFMET', 0.09906403079076277),
 ('ZZMass', 0.09123512946116165),
 ('Z2Mass', 0.07706438068579426),
 ('ZZMassErr', 0.07352169349195241),
 ('Z1Mass', 0.07242827151854443),
 ('JetPt(JetPt|0)', 0.06621763470958712),
 ('Z2Pt', 0.06153778866340098),
 ('ZZPt', 0.05777641707487754),
 ('JetEta(JetPt|0)', 0.05178446466060182),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0489853044086774),
 ('Z1Pt', 0.047673198040587825),
 ('JetPhi(JetPt|0)', 0.04071903428971309),
 ('ZZPhi', 0.034071028691392585),
 ('ZZEta', 0.03372113365990203),
 ('JetEta(JetPt|1)', 0.02829776067179846),
 ('JetPhi(JetPt|1)', 0.02296186144156753),
 ('ExtraLepEta(ExtraLepPt|0)', 0.015745276417074877),
 ('JetPhi(JetPt|2)', 0.012071378586424073),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.011371588523442968),
 ('JetPt(JetPt|1)', 0.010890482855143458),
 ('JetEta(JetPt|2)', 0.01071553533939818),
 ('nExtraLep', 0.0056420573827851645),
 ('nCleanedJetsPt30', 0.005423372988103569),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.005248425472358292),
 ('Z2Flav', 0.0048

In [159]:
%%capture
fig, implist = plot_variables([("ZHl", "ZHMET")])

In [160]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [161]:
append_variables(confhandler, impdict, threshold_fscore)

In [162]:
implist

[('PFMET', 0.14208163691408288),
 ('ZZMass', 0.11159828792149494),
 ('Z2Pt', 0.08529074016076835),
 ('ExtraLepPt(ExtraLepPt|0)', 0.08059296377492431),
 ('JetPt(JetPt|0)', 0.07850506315899364),
 ('Z1Mass', 0.07746111285102829),
 ('Z2Mass', 0.05773045203048335),
 ('Z1Pt', 0.05418102098340119),
 ('ZZPt', 0.050527194905522495),
 ('JetEta(JetPt|0)', 0.041236037164630965),
 ('ZZMassErr', 0.037269025994362666),
 ('ZZEta', 0.03622507568639733),
 ('ZZPhi', 0.031109719177367157),
 ('JetPt(JetPt|1)', 0.02421964714479591),
 ('JetPhi(JetPt|0)', 0.01680759995824199),
 ('nExtraLep', 0.012318613633991022),
 ('JetPhi(JetPt|1)', 0.011483453387618749),
 ('JetEta(JetPt|1)', 0.010439503079653408),
 ('ExtraLepEta(ExtraLepPt|0)', 0.009499947802484601),
 ('JetPt(JetPt|2)', 0.007934022340536591),
 ('JetPhi(JetPt|2)', 0.0057417266938093745),
 ('JetEta(JetPt|2)', 0.005324146570623238),
 ('Z2Flav', 0.004802171416640568),
 ('nCleanedJetsPt30', 0.0039670111702682954),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.001983505

In [163]:
%%capture
fig, implist = plot_variables([("ZHl", "ttHh")])

In [164]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [165]:
append_variables(confhandler, impdict, threshold_fscore)

In [166]:
implist

[('JetPt(JetPt|0)', 0.12239583333333333),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0967548076923077),
 ('ZZMass', 0.09455128205128205),
 ('JetPt(JetPt|1)', 0.08493589743589744),
 ('ExtraLepPt(ExtraLepPt|0)', 0.08273237179487179),
 ('Z2Pt', 0.06290064102564102),
 ('JetPhi(JetPt|2)', 0.055288461538461536),
 ('Z2Mass', 0.05348557692307692),
 ('nCleanedJetsPt30', 0.04827724358974359),
 ('JetEta(JetPt|1)', 0.04046474358974359),
 ('JetPt(JetPt|2)', 0.03886217948717949),
 ('Z1Mass', 0.037459935897435896),
 ('PFMET', 0.029246794871794872),
 ('JetEta(JetPt|0)', 0.027844551282051284),
 ('JetPt(JetPt|3)', 0.021834935897435896),
 ('nExtraLep', 0.015625),
 ('ZZPt', 0.013421474358974358),
 ('JetPhi(JetPt|3)', 0.013221153846153846),
 ('ZZMassErr', 0.01201923076923077),
 ('ExtraLepEta(ExtraLepPt|0)', 0.00921474358974359),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.007011217948717949),
 ('ExtraLepPt(ExtraLepPt|1)', 0.006209935897435897),
 ('JetPhi(JetPt|0)', 0.005408653846153846),
 ('Z1Pt', 0.005208333333333333),
 

In [167]:
%%capture
fig, implist = plot_variables([("ZHl", "ttHl")])

In [168]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [169]:
append_variables(confhandler, impdict, threshold_fscore)

In [170]:
implist

[('PFMET', 0.127208480565371),
 ('JetPt(JetPt|0)', 0.07684448039507855),
 ('Z2Pt', 0.0610498531227383),
 ('Z1Mass', 0.054961897058197456),
 ('ZZMass', 0.05359955723955894),
 ('Z2Mass', 0.05193920558559326),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04972540338030568),
 ('JetEta(JetPt|0)', 0.03882668483119758),
 ('ZZEta', 0.03550598152326621),
 ('JetPhi(JetPt|0)', 0.03401592234663034),
 ('ZZPt', 0.031546681425348035),
 ('JetPt(JetPt|1)', 0.030312060964706886),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.028396270594746476),
 ('ZZMassErr', 0.026395333986121164),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.0262676146281238),
 ('Z1Pt', 0.02537357912214228),
 ('nCleanedJetsPt30', 0.025118140406147557),
 ('ZZPhi', 0.023883519945506408),
 ('nExtraZ', 0.023117203797522246),
 ('JetEta(JetPt|1)', 0.02120141342756184),
 ('JetPhi(JetPt|1)', 0.018306441312955002),
 ('JetPhi(JetPt|2)', 0.01796585635829537),
 ('nExtraLep', 0.01758269828430329),
 ('ExtraLepEta(ExtraLepPt|0)', 0.015283749840350802),
 ('JetPt(JetPt|2)', 0.0144748

In [171]:
%%capture
fig, implist = plot_variables([("ZHMET", "ggH")])

In [172]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [173]:
append_variables(confhandler, impdict, threshold_fscore)

In [174]:
implist

[('ZZPt', 0.15188784725208565),
 ('PFMET', 0.14668444138642814),
 ('ZZPhi', 0.08286746366216564),
 ('Z2Pt', 0.07357873914165305),
 ('JetPt(JetPt|0)', 0.07026748086350736),
 ('Z1Pt', 0.06747226283650125),
 ('ZZMass', 0.06618216220865228),
 ('ZZMassErr', 0.06321493076459964),
 ('Z1Mass', 0.05895759869269803),
 ('Z2Mass', 0.0525500989077148),
 ('ZZEta', 0.04648662595682463),
 ('JetEta(JetPt|0)', 0.03496172701470715),
 ('JetPhi(JetPt|0)', 0.029586307732003096),
 ('JetPt(JetPt|1)', 0.013073019695536252),
 ('JetEta(JetPt|1)', 0.010449815085576675),
 ('nCleanedJetsPt30', 0.009374731229035866),
 ('JetPhi(JetPt|1)', 0.00679452997333792),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0052894125741807865),
 ('Z1Flav', 0.0028812247355293713),
 ('Z2Flav', 0.002236174421604885),
 ('JetPt(JetPt|2)', 0.002107164358819988),
 ('JetEta(JetPt|2)', 0.001806140878988561),
 ('JetPhi(JetPt|2)', 0.0007740603767093833),
 ('JetPt(JetPt|3)', 0.0005160402511395889)]

In [175]:
%%capture
fig, implist = plot_variables([("ZHMET", "VBF")])

In [176]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [177]:
append_variables(confhandler, impdict, threshold_fscore)

In [178]:
implist

[('PFMET', 0.13630745461568172),
 ('JetEta(JetPt|0)', 0.13174971031286212),
 ('ZZPt', 0.08636539204325995),
 ('JetPt(JetPt|0)', 0.07833140208574739),
 ('Z1Pt', 0.06628041714947856),
 ('ZZMass', 0.058323677095403634),
 ('JetEta(JetPt|1)', 0.05828505214368482),
 ('ZZEta', 0.05399768250289687),
 ('ZZPhi', 0.049710312862108925),
 ('ZZMassErr', 0.04955581305523368),
 ('Z2Mass', 0.0485129393588258),
 ('Z1Mass', 0.04159907300115875),
 ('Z2Pt', 0.03908845113943608),
 ('JetPhi(JetPt|0)', 0.03789107763615295),
 ('JetPhi(JetPt|1)', 0.0152954808806489),
 ('JetPt(JetPt|1)', 0.012630359212050985),
 ('JetPhi(JetPt|2)', 0.010467361915797606),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.006179992275009656),
 ('JetEta(JetPt|2)', 0.004905368868288915),
 ('nCleanedJetsPt30', 0.00436461954422557),
 ('Z2Flav', 0.003823870220162225),
 ('Z1Flav', 0.0036693704132869836),
 ('JetPt(JetPt|2)', 0.0017767477790652762),
 ('JetEta(JetPt|3)', 0.0008111239860950173),
 ('JetPhi(JetPt|3)', 7.72499034376207e-05)]

In [179]:
%%capture
fig, implist = plot_variables([("ZHMET", "WHh")])

In [180]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [181]:
append_variables(confhandler, impdict, threshold_fscore)

In [182]:
implist

[('PFMET', 0.15605976757055895),
 ('ZZPt', 0.09601549529607084),
 ('JetEta(JetPt|0)', 0.07214009012570163),
 ('JetPt(JetPt|0)', 0.06510396078741403),
 ('Z1Pt', 0.06486678788837062),
 ('Z2Pt', 0.06316704877855957),
 ('ZZPhi', 0.05573563127519962),
 ('Z2Mass', 0.05510317021108388),
 ('ZZEta', 0.05439165151395367),
 ('ZZMass', 0.05328484465175112),
 ('ZZMassErr', 0.05304767175270773),
 ('Z1Mass', 0.050952644477824334),
 ('JetPhi(JetPt|0)', 0.04114949798403036),
 ('JetPt(JetPt|1)', 0.030437188710570006),
 ('JetPhi(JetPt|1)', 0.02427069333544154),
 ('JetEta(JetPt|1)', 0.015060479089256068),
 ('nCleanedJetsPt30', 0.012332990750256937),
 ('JetPt(JetPt|2)', 0.009131156613171001),
 ('Z2Flav', 0.006166495375128468),
 ('JetPt(JetPt|3)', 0.005494505494505495),
 ('Z1Flav', 0.004980630879911455),
 ('JetPhi(JetPt|2)', 0.004980630879911455),
 ('JetEta(JetPt|2)', 0.004585342714839118),
 ('JetEta(JetPt|3)', 0.0011068068622025456),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0004348169815795715)]

In [183]:
%%capture
fig, implist = plot_variables([("ZHMET", "ttHh")])

In [184]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [185]:
append_variables(confhandler, impdict, threshold_fscore)

In [186]:
implist

[('PFMET', 0.2645),
 ('JetPt(JetPt|0)', 0.156),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.1015),
 ('JetPt(JetPt|1)', 0.0695),
 ('JetPt(JetPt|3)', 0.0595),
 ('JetPt(JetPt|2)', 0.0565),
 ('nCleanedJetsPt30', 0.0535),
 ('JetEta(JetPt|0)', 0.0525),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0465),
 ('Z1Pt', 0.0355),
 ('JetEta(JetPt|2)', 0.0295),
 ('ZZPt', 0.028),
 ('ZZEta', 0.0235),
 ('Z2Mass', 0.0105),
 ('Z2Pt', 0.0085),
 ('nExtraLep', 0.0045)]

In [187]:
%%capture
fig, implist = plot_variables([("ZHMET", "ttHl")])

In [188]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [189]:
append_variables(confhandler, impdict, threshold_fscore)

In [190]:
implist

[('ZZMass', 0.09087021595039556),
 ('JetPt(JetPt|0)', 0.08044686765020312),
 ('JetEta(JetPt|0)', 0.07761385503527903),
 ('PFMET', 0.07237545435107975),
 ('Z1Mass', 0.06312807355142185),
 ('JetPhi(JetPt|0)', 0.05254436604661108),
 ('ZZEta', 0.05045969638657259),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04367115672439598),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03870002138122728),
 ('Z2Mass', 0.038272396835578364),
 ('Z2Pt', 0.03747060081248663),
 ('ZZMassErr', 0.03714988240324995),
 ('ZZPt', 0.036401539448364335),
 ('nExtraLep', 0.03388924524267693),
 ('nCleanedJetsPt30', 0.030842420354928374),
 ('ZZPhi', 0.03073551421851614),
 ('Z1Pt', 0.029719905922599956),
 ('JetPt(JetPt|1)', 0.026085097284584136),
 ('JetPt(JetPt|3)', 0.025978191148171906),
 ('JetEta(JetPt|1)', 0.024534958306606798),
 ('JetPt(JetPt|2)', 0.018441308531109684),
 ('JetEta(JetPt|2)', 0.01624973273465897),
 ('JetPhi(JetPt|1)', 0.015554842847979474),
 ('Z2Flav', 0.014539234552063289),
 ('JetPhi(JetPt|2)', 0.009835364549925165),
 ('J

In [191]:
%%capture
fig, implist = plot_variables([("ttHh", "ggH")])

In [192]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [193]:
append_variables(confhandler, impdict, threshold_fscore)

In [194]:
implist

[('JetPt(JetPt|0)', 0.1008040448017977),
 ('JetEta(JetPt|0)', 0.08781292791685685),
 ('JetEta(JetPt|1)', 0.0830378146834732),
 ('ZZPt', 0.0802991467996208),
 ('PFMET', 0.0583546926020856),
 ('JetPt(JetPt|1)', 0.05786313682806081),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04926091078262702),
 ('Z1Pt', 0.047470243320108144),
 ('JetPt(JetPt|2)', 0.038868017274674344),
 ('JetEta(JetPt|2)', 0.03802535023348899),
 ('JetPhi(JetPt|0)', 0.03451423756188336),
 ('Z2Pt', 0.03370668164741406),
 ('JetPhi(JetPt|1)', 0.0316702362978828),
 ('ZZEta', 0.030932902636845616),
 ('JetPt(JetPt|3)', 0.030581791369685054),
 ('ZZPhi', 0.029247568554474912),
 ('Z2Mass', 0.029072012920894633),
 ('ZZMassErr', 0.027316456585091815),
 ('Z1Mass', 0.02355956602647379),
 ('JetEta(JetPt|3)', 0.017696007864892383),
 ('ZZMass', 0.017660896738176328),
 ('nCleanedJetsPt30', 0.016642674063410696),
 ('JetPhi(JetPt|2)', 0.009234226326322812),
 ('ExtraLepPt(ExtraLepPt|0)', 0.007303114356939714),
 ('JetPhi(JetPt|3)', 0.0058986692882

In [195]:
%%capture
fig, implist = plot_variables([("ttHh", "VBF")])

In [196]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [197]:
append_variables(confhandler, impdict, threshold_fscore)

In [198]:
implist

[('JetEta(JetPt|0)', 0.1591576260370134),
 ('JetEta(JetPt|1)', 0.1530312699425654),
 ('JetEta(JetPt|2)', 0.08544990427568602),
 ('JetPt(JetPt|0)', 0.04594767070835992),
 ('ZZEta', 0.04151244416081685),
 ('JetPt(JetPt|1)', 0.04071474154435226),
 ('JetPt(JetPt|2)', 0.03956604977664327),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03899170389278877),
 ('Z1Pt', 0.03560944479897894),
 ('JetPhi(JetPt|1)', 0.032035737077217616),
 ('ZZPt', 0.030887045309508615),
 ('JetPt(JetPt|3)', 0.029610721123165283),
 ('PFMET', 0.02820676451818762),
 ('JetPhi(JetPt|0)', 0.026419910657306957),
 ('ZZPhi', 0.024441608168474793),
 ('nCleanedJetsPt30', 0.023931078493937462),
 ('Z2Mass', 0.02217613273771538),
 ('JetEta(JetPt|3)', 0.02032546266751755),
 ('Z2Pt', 0.02013401403956605),
 ('Z1Mass', 0.01876196553924697),
 ('ZZMass', 0.01723037651563497),
 ('JetPhi(JetPt|2)', 0.015283982131461392),
 ('ZZMassErr', 0.014167198468410976),
 ('ExtraLepPt(ExtraLepPt|0)', 0.01253988513082323),
 ('JetPhi(JetPt|3)', 0.01174218251435

In [199]:
%%capture
fig, implist = plot_variables([("ttHh", "WHh")])

In [200]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [201]:
append_variables(confhandler, impdict, threshold_fscore)

In [202]:
implist

[('JetPt(JetPt|0)', 0.08357637101847086),
 ('ZZPt', 0.06970892663121175),
 ('PFMET', 0.06782181048779093),
 ('JetPt(JetPt|1)', 0.06724995711099674),
 ('Z1Pt', 0.06333276147995653),
 ('JetEta(JetPt|0)', 0.04849316635214731),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.046863384228283865),
 ('ZZMass', 0.04657745753988677),
 ('ZZEta', 0.044861897409504206),
 ('Z2Pt', 0.040744553096586034),
 ('Z2Mass', 0.03931491965460056),
 ('JetPhi(JetPt|1)', 0.03757076685537828),
 ('ZZPhi', 0.035569280036598615),
 ('JetPhi(JetPt|0)', 0.03496883399096472),
 ('ZZMassErr', 0.034597129296048496),
 ('JetPt(JetPt|2)', 0.03268142048378796),
 ('JetEta(JetPt|1)', 0.031423343054840736),
 ('JetEta(JetPt|2)', 0.026905701378166638),
 ('Z1Mass', 0.02490421455938697),
 ('JetPhi(JetPt|2)', 0.024103619831875107),
 ('JetPt(JetPt|3)', 0.022988505747126436),
 ('JetEta(JetPt|3)', 0.020472350889232),
 ('nCleanedJetsPt30', 0.01932864413564362),
 ('JetPhi(JetPt|3)', 0.00909246869102762),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0069766111968

In [203]:
%%capture
fig, implist = plot_variables([("ttHh", "ttHl")])

In [204]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [205]:
append_variables(confhandler, impdict, threshold_fscore)

In [206]:
implist

[('PFMET', 0.21333333333333335),
 ('ExtraLepPt(ExtraLepPt|0)', 0.15857142857142856),
 ('Z1Mass', 0.05452380952380952),
 ('JetPhi(JetPt|2)', 0.04690476190476191),
 ('JetPt(JetPt|2)', 0.04380952380952381),
 ('ZZMass', 0.04357142857142857),
 ('nCleanedJetsPt30', 0.04357142857142857),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04285714285714286),
 ('ZZPt', 0.03785714285714286),
 ('JetEta(JetPt|0)', 0.030476190476190476),
 ('JetEta(JetPt|3)', 0.030238095238095238),
 ('Z2Pt', 0.030238095238095238),
 ('JetPt(JetPt|0)', 0.03),
 ('JetPhi(JetPt|1)', 0.027142857142857142),
 ('JetPt(JetPt|1)', 0.02666666666666667),
 ('JetPt(JetPt|3)', 0.024761904761904763),
 ('ZZMassErr', 0.023333333333333334),
 ('JetPhi(JetPt|3)', 0.019523809523809523),
 ('JetEta(JetPt|1)', 0.012380952380952381),
 ('JetPhi(JetPt|0)', 0.010714285714285714),
 ('JetEta(JetPt|2)', 0.010476190476190476),
 ('Z1Pt', 0.009523809523809525),
 ('ExtraLepEta(ExtraLepPt|0)', 0.009047619047619047),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.00738095238095238

In [207]:
%%capture
fig, implist = plot_variables([("ttHl", "ggH")])

In [208]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [209]:
append_variables(confhandler, impdict, threshold_fscore)

In [210]:
implist

[('PFMET', 0.1780918313493227),
 ('JetPt(JetPt|0)', 0.08212631208585),
 ('ZZPt', 0.06884419163783498),
 ('JetEta(JetPt|0)', 0.04981528176860377),
 ('Z1Mass', 0.04837858441329971),
 ('ZZMassErr', 0.04717644989151469),
 ('ZZMass', 0.045563830411071364),
 ('ExtraLepPt(ExtraLepPt|0)', 0.044977423327273794),
 ('Z1Pt', 0.044713540139564883),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04271975605465314),
 ('JetEta(JetPt|1)', 0.04142966047029848),
 ('JetPt(JetPt|1)', 0.04013956488594382),
 ('Z2Mass', 0.03659180202896851),
 ('Z2Pt', 0.032428311734005746),
 ('ZZEta', 0.029730839148536914),
 ('ZZPhi', 0.022254148830117868),
 ('JetPt(JetPt|2)', 0.021902304579839323),
 ('JetPhi(JetPt|0)', 0.020993373599953086),
 ('nCleanedJetsPt30', 0.01756289215973729),
 ('JetPt(JetPt|3)', 0.015979593033483844),
 ('JetEta(JetPt|2)', 0.014894739928458335),
 ('nExtraLep', 0.01313551867706562),
 ('JetEta(JetPt|3)', 0.009851639007799215),
 ('ExtraLepEta(ExtraLepPt|0)', 0.009617076174280184),
 ('JetPhi(JetPt|1)', 0.00929455

In [211]:
%%capture
fig, implist = plot_variables([("ttHl", "VBF")])

In [212]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [213]:
append_variables(confhandler, impdict, threshold_fscore)

In [214]:
implist

[('PFMET', 0.12862396713951377),
 ('JetEta(JetPt|0)', 0.11778191718011176),
 ('JetEta(JetPt|1)', 0.10247408893346707),
 ('JetEta(JetPt|2)', 0.04671156326121221),
 ('ZZMass', 0.044108516024263265),
 ('JetPt(JetPt|0)', 0.04150546878731432),
 ('ZZEta', 0.03964273773702059),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03868749104456226),
 ('JetPt(JetPt|1)', 0.03560682046138415),
 ('JetPt(JetPt|2)', 0.03434111859387687),
 ('ExtraLepEta(ExtraLepPt|0)', 0.031069398672207097),
 ('Z1Mass', 0.029517122796962315),
 ('ZZPt', 0.029111142952667528),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.026770788556144624),
 ('Z2Mass', 0.02657973921765296),
 ('JetPhi(JetPt|0)', 0.02605435353680088),
 ('ZZMassErr', 0.025433443186702966),
 ('Z1Pt', 0.024549839996179015),
 ('ZZPhi', 0.023451306299851937),
 ('JetPhi(JetPt|1)', 0.02340354396522902),
 ('Z2Pt', 0.02166021875149257),
 ('JetPt(JetPt|3)', 0.019510913693461335),
 ('nCleanedJetsPt30', 0.015164541242775947),
 ('JetPhi(JetPt|2)', 0.011462960309499929),
 ('JetEta(JetPt|3)', 0

In [215]:
%%capture
fig, implist = plot_variables([("ttHl", "WHh")])

In [216]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [217]:
append_variables(confhandler, impdict, threshold_fscore)

In [218]:
implist

[('PFMET', 0.15972287239022565),
 ('JetPt(JetPt|0)', 0.06531847829479137),
 ('ZZMassErr', 0.052710420372624286),
 ('ZZPt', 0.04759229784976438),
 ('ExtraLepPt(ExtraLepPt|0)', 0.047467465593109257),
 ('JetPt(JetPt|1)', 0.046936928502325004),
 ('Z1Pt', 0.04160034953031863),
 ('Z1Mass', 0.041506725337827295),
 ('JetEta(JetPt|0)', 0.04066410760540524),
 ('Z2Pt', 0.040632899541241455),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.038791623755578444),
 ('ZZEta', 0.03592048185251069),
 ('ZZPhi', 0.03392316574602877),
 ('JetPt(JetPt|2)', 0.03370470929688232),
 ('ZZMass', 0.03120806416377992),
 ('JetPhi(JetPt|1)', 0.029366788378116904),
 ('Z2Mass', 0.029335580313953124),
 ('JetEta(JetPt|1)', 0.029054707736479106),
 ('JetPhi(JetPt|0)', 0.02805604968323815),
 ('nCleanedJetsPt30', 0.021845644914645943),
 ('JetPt(JetPt|3)', 0.02003557719314671),
 ('ExtraLepEta(ExtraLepPt|0)', 0.015916112723527758),
 ('JetPhi(JetPt|3)', 0.014542957900321443),
 ('JetPhi(JetPt|2)', 0.014293293387011203),
 ('JetEta(JetPt|2)',

In [219]:
# save the variable configuration
confhandler.save_configuration(out_path)

In [220]:
df = df.fillna(0.0)

In [221]:
df.to_csv("input_parameters_table.csv")

In [222]:
# now plot the data contained in the table to have a global picture of the relevant input variables
datacol_labels = [col for col in df.columns.tolist() if col != "discriminant"]
variable_data = df[datacol_labels].as_matrix().transpose()
datacol_labels = np.concatenate([[''], np.array(datacol_labels)])

In [223]:
discriminant_labels = np.concatenate([[''], df["discriminant"].as_matrix()])

In [224]:
fig = plt.figure(figsize = (15, 10))
ax = fig.add_subplot(111)
cax = ax.matshow(variable_data, interpolation = 'nearest', cmap = 'Blues', vmin = np.min(variable_data), vmax = np.max(variable_data))
ax.set_xticklabels(discriminant_labels, rotation = 'vertical')
ax.set_yticklabels(datacol_labels)
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

In [225]:
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "input_variables_exclusive.pdf"))