In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from trainlib.FileCollection import FileCollection
from trainlib.config import Config
from trainlib.ConfigFileHandler import ConfigFileHandler
from trainlib.ConfigFileUtils import ConfigFileUtils
import trainlib.cuts as cuts
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import copy
import re
from scipy import interpolate
import scipy.integrate as integrate
import pickle
import os

Welcome to JupyROOT 6.10/09


In [3]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error



In [4]:
#candidate_branches = ["PFMET", "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF", "nExtraLep", "ZZMass", "nExtraZ", "Z1Mass", "Z2Mass", "Z1Pt", "Z2Pt", "ZZMassErr", "ZZPt", "ZZEta", "ZZPhi", "Z1Flav", "Z2Flav", "costhetastar", "helphi", "helcosthetaZ1", "helcosthetaZ2", "phistarZ1", "phistarZ2", "xi", "xistar"]
candidate_branches = ["PFMET", "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF", "nExtraLep", "ZZMass", "nExtraZ", "Z1Mass", "Z2Mass", "Z1Pt", "Z2Pt", "ZZMassErr", "ZZPt", "ZZEta", "ZZPhi", "Z1Flav", "Z2Flav"]
MELA_branches = ["D_VBF2j_ggH_ME", "D_VBF1j_ggH_ME", "D_WHh_ggH_ME", "D_ZHh_ggH_ME", "D_WHh_ZHh_ME", "D_VBF2j_WHh_ME", "D_VBF2j_ZHh_ME"]
#list_branches = ["Jet", "Lep", "ExtraLep"]
list_branches = ["Jet", "ExtraLep"]
pt_limits = [30.0, 0.0, 0.0]

In [5]:
allbranches = ["JetPt", "JetEta", "JetPhi", "LepPt", "LepEta", "LepPhi", "ExtraLepPt", "ExtraLepEta", "ExtraLepPhi"] + candidate_branches + MELA_branches + ["LHEAssociatedParticleId", "GenAssocLep1Id", "GenAssocLep2Id", "training_weight"]

In [6]:
#MC_path = "/data_CMS/cms/wind/CJLST_NTuples_randomizeda/"
MC_path = "/data_CMS/cms/wind/CJLST_NTuples/"

In [7]:
# these are the cuts without any m4l restriction imposed
def WHhadr0j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] == 0

def WHhadr01j_cut(row):
    return cuts.WHhadr_cut(row) and (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def WHhadr1j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] == 1

def WHhadr2j_cut(row):
    return cuts.WHhadr_cut(row) and row["nCleanedJetsPt30"] >= 2

def ZHhadr0j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] == 0

def ZHhadr01j_cut(row):
    return cuts.ZHhadr_cut(row) and (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def ZHhadr1j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] == 1

def ZHhadr2j_cut(row):
    return cuts.ZHhadr_cut(row) and row["nCleanedJetsPt30"] >= 2

def mZZ0j_cut(row):
    return row["nCleanedJetsPt30"] == 0

def mZZ01j_cut(row):
    return (row["nCleanedJetsPt30"] == 0 or row["nCleanedJetsPt30"] == 1)

def mZZ1j_cut(row):
    return row["nCleanedJetsPt30"] == 1

def mZZ2j_cut(row):
    return row["nCleanedJetsPt30"] >= 2

In [8]:
collections = {"VBF2j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ2j_cut},
            "VBF1j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ1j_cut},
            "VBF0j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ0j_cut},
            "VBF01j": {MC_path + "VBFH125/ZZ4lAnalysis.root": mZZ01j_cut},
            "VBF": {MC_path + "VBFH125/ZZ4lAnalysis.root": cuts.no_cut},
            "ggH2j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ2j_cut},
            "ggH1j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ1j_cut},
            "ggH0j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ0j_cut},
            "ggH01j": {MC_path + "ggH125/ZZ4lAnalysis.root": mZZ01j_cut},
            "ggH" : {MC_path + "ggH125/ZZ4lAnalysis.root": cuts.no_cut},
            "WHh2j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr2j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr2j_cut},
            "WHh1j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr1j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr1j_cut},
            "WHh0j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr0j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr0j_cut},
            "WHh": {MC_path + "WplusH125/ZZ4lAnalysis.root": cuts.WHhadr_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": cuts.WHhadr_cut},
            "WHh01j": {MC_path + "WplusH125/ZZ4lAnalysis.root": WHhadr01j_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": WHhadr01j_cut},
            "WHl": {MC_path + "WplusH125/ZZ4lAnalysis.root": cuts.WHlept_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": cuts.WHlept_cut},
            "ZHh2j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr2j_cut},
            "ZHh1j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr1j_cut},
            "ZHh01j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr01j_cut},
            "ZHh0j": {MC_path + "ZH125/ZZ4lAnalysis.root": ZHhadr0j_cut},
            "ZHh": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHhadr_cut},
            "ZHl": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHlept_cut},
            "ttHh": {MC_path + "ttH125/ZZ4lAnalysis.root": cuts.ttHhadr_cut},
            "ttHl": {MC_path + "ttH125/ZZ4lAnalysis.root": cuts.ttHlept_cut},
            "ZHMET": {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHMET_cut}
          }

In [9]:
# all the model combinations for which neural networks are currently trained
discriminant_pairs = [("VBF2j", "ggH2j"), ("VBF1j", "ggH1j"), ("VBF0j", "ggH0j"), ("WHh2j", "ggH2j"), 
                     ("WHh1j", "ggH1j"), ("WHh0j", "ggH0j"), ("ZHh2j", "ggH2j"), ("ZHh1j", "ggH1j"), 
                      ("ZHh0j", "ggH0j"), ("WHh2j", "ZHh2j"), ("WHh01j", "ZHh01j"), ("VBF2j", "WHh2j"),
                     ("VBF1j", "WHh1j"), ("VBF0j", "WHh0j"), ("VBF2j", "ZHh2j"), ("VBF1j", "ZHh1j"), 
                      ("VBF0j", "ZHh0h"), ("WHl", "ggH"), ("WHl", "VBF"), ("WHl", "WHh"), ("WHl", "ZHh"),
                     ("WHl", "ZHl"), ("WHl", "ZHMET"), ("WHl", "ttHh"), ("WHl", "ttHl"), ("ZHh", "ZHl"),
                     ("ZHh", "ZHMET"), ("ZHh", "ttHh"), ("ZHh", "ttHl"), ("ZHl", "ggH"), ("ZHl", "VBF"),
                     ("ZHl", "WHh"), ("ZHl", "ZHMET"), ("ZHl", "ttHh"), ("ZHl", "ttHl"), ("ZHMET", "ggH"),
                     ("ZHMET", "VBF"), ("ZHMET", "WHh"), ("ZHMET", "ttHh"), ("ZHMET", "ttHl"), ("ttHh", "ggH"),
                      ("ttHh", "VBF"), ("ttHh", "WHh"), ("ttHh", "ttHl"), ("ttHl", "ggH"), ("ttHl", "VBF"),
                     ("ttHl", "WHh")]

In [10]:
def extract_order(df, col_basename, sorted_column, columns, order):
    def get_index(row, order, col_basename, sorted_column):
        sorted_column = row[col_basename + sorted_column]
        if order >= len(sorted_column):
            return -1
        else:
            return np.flipud(np.argsort(sorted_column))[order]
    
    index_column = pd.DataFrame(df.transform(lambda row: get_index(row, order, col_basename, sorted_column), axis = 1, raw = True))
    index_column.columns = ["index"]
    df_temp = pd.concat([index_column, df], axis = 1)
    
    def get_element(row, column_name):
        if row["index"] == -1:
            return 0
        else:
            return row[column_name][row["index"]]
        
    extracted_cols = pd.DataFrame()
    for column in columns:
        extracted_col = pd.DataFrame(df_temp.transform(lambda row: get_element(row, col_basename + column), axis = 1, raw = True))
        extracted_col.columns = [col_basename + column + "(" + col_basename + "Pt|" + str(order) + ")"]
        extracted_cols = pd.concat([extracted_cols, extracted_col], axis = 1)
        
    return extracted_cols

In [11]:
def prepare_data(df, col_basenames, sorted_column, columns, orders, pt_limits):
    all_extracted = pd.DataFrame()
    for col_basename, pt_limit in zip(col_basenames, pt_limits):
        for order in orders:
            extracted = extract_order(df, col_basename, sorted_column, columns, order)
            mask = extracted[col_basename + "Pt(" + col_basename + "Pt|" + str(order) + ")"] < pt_limit
            extracted[mask] = 0.0

            all_extracted = pd.concat([all_extracted, extracted], axis = 1)
            
    return all_extracted

In [12]:
def get_data(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits):
    H1_df = H1_coll.get_data(read_branches, 0.0, 1.0)
    H0_df = H0_coll.get_data(read_branches, 0.0, 1.0)
    
    H1_list_df = prepare_data(H1_df, list_branches, "Pt", ["Pt", "Eta", "Phi"], range(4), pt_limits)
    H0_list_df = prepare_data(H0_df, list_branches, "Pt", ["Pt", "Eta", "Phi"], range(4), pt_limits)
    
    list_branches_unrolled = H1_list_df.columns
            
    H1_df = pd.concat([H1_df, H1_list_df], axis = 1)
    H0_df = pd.concat([H0_df, H0_list_df], axis = 1)        
    
    complete_input_branches = np.concatenate([input_branches, list_branches_unrolled])
            
    H1_df = H1_df[complete_input_branches]
    H0_df = H0_df[complete_input_branches]
    
    return H1_df, H0_df

In [13]:
def get_data_dmatrix(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits):
    H1_df, H0_df = get_data(H1_coll, H0_coll, read_branches, input_branches, list_branches, pt_limits)
    
    complete_input_branches = H1_df.columns
    print "number of input variables: " + str(len(complete_input_branches))
    print "final list of inputs: " + str(complete_input_branches)
    
    # try with the same weights as used later in the neural network training, to balance out some (very)
    # unbalanced datasets
    H1_class_weight = 1.0 + float(len(H0_df)) / float(len(H1_df))
    H0_class_weight = 1.0 + float(len(H1_df)) / float(len(H0_df))
    
    print "using class weights: " + str(H1_class_weight) + " (H1), " + str(H0_class_weight) + " (H0)"
    
    H1_weights = np.full(len(H1_df), H1_class_weight)
    H0_weights = np.full(len(H0_df), H0_class_weight)
    
    H1_data = H1_df.as_matrix()
    H0_data = H0_df.as_matrix()
    H1_target = np.ones(np.shape(H1_data)[0])
    H0_target = np.zeros(np.shape(H0_data)[0])
    
    target = np.concatenate([H1_target, H0_target])
    data = np.concatenate([H1_data, H0_data])
    weights = np.concatenate([H1_weights, H0_weights])
    
    dmatrix = xgb.DMatrix(data, label = target, feature_names = complete_input_branches, weight = weights)
    
    return dmatrix

In [14]:
def get_feature_correlation(source, corr_branches, mandatory_branches, optional_branches, list_branches, pt_limits):    
    coll = FileCollection(collections[source], 0.0, 0.5)
    
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if "0j" in source and ("0j" in optional_branch):
            input_branches.append(optional_branch)
            
        if "1j" in source and ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
        if "2j" in source and ("2j" in optional_branch):
            input_branches.append(optional_branch)

    df, _ = get_data(coll, coll, allbranches, input_branches, list_branches, pt_limits)

    df = df[corr_branches]
    
    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(111)
    
    cax = ax.matshow(df.corr(), vmin = -1.0, vmax = 1.0, cmap = "RdBu")
    
    fig.colorbar(cax)
    
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    
    ax.set_yticklabels([''] + corr_branches)
    ax.set_xticklabels([''] + corr_branches, rotation = 'vertical')
    
    return fig

In [15]:
def get_interpolating_function(data, bins):
    bin_centers = [np.mean([bins[i], bins[i + 1]]) for i in range(len(bins) - 1)]
    intf = interpolate.interp1d(bin_centers, data, kind = "linear")
    interpolated_function = lambda x: intf(x) if x > bin_centers[0] and x < bin_centers[-1] else 0
    
    return interpolated_function

In [16]:
def get_binned_data(df, branch):
    data = df[branch].as_matrix()
    weights = df["training_weight"].as_matrix()
    
    # set the bin width
    q75, q25 = np.percentile(data, [75, 25])
    bin_width = max(2 * (q75 - q25) / len(data)**0.33, 0.005)

    data_max = np.max(data)
    data_min = np.min(data)
    bins = np.arange(data_min, data_max + bin_width, bin_width)
    
    weights = weights / (np.sum(weights) * bin_width)
    
    hist = np.histogram(data, bins = bins, weights = weights)
    return hist

In [17]:
def get_feature_importance_list_separation(disc_pair, mandatory_branches, optional_branches, list_branches, pt_limits):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if ("0j" in H1_name or "0j" in H0_name) and ("0j" in optional_branch):
            input_branches.append(optional_branch)  
        elif ("1j" in H1_name or "1j" in H0_name) and ("1j" in optional_branch):
            input_branches.append(optional_branch)
        elif ("2j" in H1_name or "2j" in H0_name) and not ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
    # needed to build the histograms
    input_branches.append("training_weight")
    
    H1_coll = FileCollection(collections[H1_name], 0.0, 0.5)
    H0_coll = FileCollection(collections[H0_name], 0.0, 0.5)
    
    H1_df, H0_df = get_data(H1_coll, H0_coll, allbranches, input_branches, list_branches, pt_limits)
    
    available_branches = H1_df.columns
    
    implist = {}
    for branch in available_branches:
                
        if "training_weight" not in branch:
            data_H1, bins_H1 = get_binned_data(H1_df, branch)
            data_H0, bins_H0 = get_binned_data(H0_df, branch)

            if(len(data_H1) > 0):
                H1_func = get_interpolating_function(data_H1, bins_H1)
                H0_func = get_interpolating_function(data_H0, bins_H0)

                # compute the separation in this branch
                global_min = np.min(np.concatenate([bins_H0, bins_H1]))
                global_max = np.max(np.concatenate([bins_H0, bins_H1]))

                separation_func = lambda x: (H1_func(x) + H0_func(x)) * (H1_func(x) - H0_func(x))**2

                sep = integrate.quad(separation_func, global_min, global_max)[0]
            else:
                sep = 0.0
                
            print "separation for " + branch + " = " + str(sep)
            implist[branch] = sep
        
    # normalize the importance list
    impsum = sum([val for key, val in implist.iteritems()])
    for key in implist.keys():
        implist[key] /= impsum
        
    return None, None, implist

In [18]:
def get_feature_importance_list_BDT(disc_pair, mandatory_branches, optional_branches, list_branches, pt_limits):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    # first assemble the list of branches that can serve as input: it will *always* contain the mandatory branches,
    # and *can* contain some of the optional branches, if the name of the categories allows it
    input_branches = [branch for branch in mandatory_branches]
    
    for optional_branch in optional_branches:
        if ("0j" in H1_name or "0j" in H0_name) and ("0j" in optional_branch):
            input_branches.append(optional_branch)  
        elif ("1j" in H1_name or "1j" in H0_name) and ("1j" in optional_branch):
            input_branches.append(optional_branch)
        elif ("2j" in H1_name or "2j" in H0_name) and not ("1j" in optional_branch):
            input_branches.append(optional_branch)
            
        # the fully inclusive categories (i.e. those with NO "xxj" in their name, can not use MELA, since there may
        # be events with low number of jets contained)
    
    # get the training data for the BDT ...
    H1_coll_train = FileCollection(collections[H1_name], 0.0, 0.5)
    H0_coll_train = FileCollection(collections[H0_name], 0.0, 0.5)
    
    dtrain = get_data_dmatrix(H1_coll_train, H0_coll_train, allbranches, input_branches, list_branches, pt_limits)
    
    # ... and the validation data as well
    H1_coll_val = FileCollection(collections[H1_name], 0.5, 1.0)
    H0_coll_val = FileCollection(collections[H0_name], 0.5, 1.0)
    dval = get_data_dmatrix(H1_coll_val, H0_coll_val, allbranches, input_branches, list_branches, pt_limits)
    
    evallist = [(dtrain, 'train'), (dval, 'eval')]
    
    # perform the training
    # try different tree depths and choose the one that gives the best RMSE (i.e. avoid too deep trees to start with)
        
    params = {'eta': 0.01, 'silent': 1, 'gamma': 0.5, 'objective': 'binary:logistic'}
    params['nthread'] = 4
    params['eval_metric'] = 'rmse'
    max_num_rounds = 2000
    
    best_loss = 1e6
    best_imp = None
    best_params = None
    for tree_depth in range(1,8):
        params['max_depth'] = tree_depth
        
        bst = xgb.train(params, dtrain, max_num_rounds, evals = evallist, early_stopping_rounds = 10, verbose_eval = False)
    
        pred = bst.predict(dval)
        cur_loss = np.sqrt(mean_squared_error(pred, dval.get_label()))
        cur_imp = bst.get_fscore()

        print "for max_depth = " + str(params['max_depth']) + ": loss = " + str(cur_loss)
        
        if cur_loss < best_loss:
            best_loss = cur_loss
            best_imp = copy.copy(cur_imp)
            best_params = copy.copy(params)
            
    # normalize the usage score w.r.t. the total score (i.e. sum of all individuals)
    score_sum = sum([val for key, val in best_imp.iteritems()])
    used_variables = {key: val / float(score_sum) for key, val in sorted(best_imp.iteritems(), key = lambda x: x[1], reverse = True)}           
    return best_params, dtrain.feature_names, used_variables

In [19]:
def get_histogram(df, branch, label):
    data = df[branch].as_matrix()
    weights = df["training_weight"].as_matrix()
    
    # set the bin width
    q75, q25 = np.percentile(data, [75, 25])
    bin_width = max(2 * (q75 - q25) / len(data)**0.33, 0.005)

    data_max = np.max(data)
    data_min = np.min(data)
    bins = np.arange(data_min, data_max + bin_width, bin_width)
    
    weights = weights / (np.sum(weights) * bin_width)
    
    fig = plt.hist(data, bins = bins, weights = weights, alpha = 0.5, label = label)
    return fig

In [20]:
def plot_branch(disc_pair, branch, start_fraction = 0.0, end_fraction = 1.0):
    H1_name = disc_pair[0]
    H0_name = disc_pair[1]
    
    # get the training data for the BDT ...
    H1_coll = FileCollection(collections[H1_name], start_fraction, end_fraction)
    H0_coll = FileCollection(collections[H0_name], start_fraction, end_fraction)
    
    H1_df, H0_df = get_data(H1_coll, H0_coll, allbranches, allbranches, list_branches, pt_limits)
    
    plt.figure()
    H1_hist = get_histogram(H1_df, branch, H1_name)
    H0_hist = get_histogram(H0_df, branch, H0_name)
    
    plt.legend(loc = 'upper right')
    plt.show()

In [21]:
def plot_variables(discs):
    plotframe = pd.DataFrame()
    
    for disc in discs:
        _, _, implist = get_feature_importance_list_BDT(disc, candidate_branches, MELA_branches, list_branches, pt_limits)
        
        # cut the list to select only the 95% most important variables
        cutimplist = {key: val for key, val in implist.iteritems() if val > 0.00}
        curframe = pd.DataFrame(cutimplist, index = [len(plotframe)])
        
        plotframe = pd.concat([plotframe, curframe])
        
    plotframe = plotframe.fillna(0.0)
    
    print plotframe
    print "number of pre-selected input variables = " + str(len(plotframe.columns))
    
    # start the plotting
    parameters = plotframe.columns
    plotdata = np.transpose(plotframe.as_matrix())
    
    plt.close('all')
    fig = plt.figure(figsize=(10,15))
    ax = fig.add_subplot(111)
    cax = ax.matshow(plotdata, cmap = 'Blues')
    
    # make axis labels
    disclabels = []
    for disc in discs:
        if "0j" in disc[0] or "0j" in disc[1]:
            disclabels.append('D_' + re.sub('0j', '', disc[0]) + "_" + re.sub('0j', '', disc[1]) + "_0j")
        elif "01j" in disc[0] or "01j" in disc[1]:
            disclabels.append('D_' + re.sub('01j', '', disc[0]) + "_" + re.sub('01j', '', disc[1]) + "_01j")
        elif "1j" in disc[0] or "1j" in disc[1]:
            disclabels.append('D_' + re.sub('1j', '', disc[0]) + "_" + re.sub('1j', '', disc[1]) + "_1j")
        elif "2j" in disc[0] or "2j" in disc[1]:
            disclabels.append('D_' + re.sub('2j', '', disc[0]) + "_" + re.sub('2j', '', disc[1]) + "_2j")
        else:
            disclabels.append('D_' + disc[0] + "_" + disc[1] + "_2j")
            
    disclabels = np.concatenate([[''], np.array(disclabels)])
    parameters = np.concatenate([[''], np.array(parameters)])
        
    ax.set_xticklabels(disclabels, rotation = 'vertical')
    ax.set_yticklabels(parameters)
    
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    
    # sort the used variables according to their importance
    sorted_implist = []
    for key, val in sorted(cutimplist.iteritems(), key = lambda x: x[1], reverse = True):
        sorted_implist.append((key, val))
    
    return fig, sorted_implist

In [22]:
def append_variables(confhandler, impdict, threshold_fscore):
    confhandler.new_section(impdict["discriminant"])
    cur_sec = confhandler.get_section(impdict["discriminant"])

    periodic_inputs = []
    nonperiodic_inputs = []
    for key, val in impdict.iteritems():
        if val[0] > threshold_fscore and key is not "discriminant":
            if "phi" in key or "Phi" in key:
                periodic_inputs.append(key)
            else:
                nonperiodic_inputs.append(key)
    cur_sec["nonperiodic_columns"] = ConfigFileUtils.serialize_list(nonperiodic_inputs, lambda x: x)
    cur_sec["periodic_columns"] = ConfigFileUtils.serialize_list(periodic_inputs, lambda x: x)

In [23]:
def convert_varname(raw):
    raw = raw.replace('(', '[')
    raw = raw.replace(')', ']')
    return raw

In [24]:
df = pd.DataFrame()

In [25]:
out_dir = "/data_CMS/cms/wind/InputConfigurations/"
out_path = os.path.join(out_dir, "exclusive_99_fullmassrange.conf")
threshold_fscore = 0.01

In [26]:
confhandler = ConfigFileHandler()

In [27]:
%%capture
fig, implist = plot_variables([("VBF2j", "ggH2j")])

In [28]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ggH_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [29]:
append_variables(confhandler, impdict, threshold_fscore)

In [30]:
implist

[('D_VBF2j_ggH_ME', 0.06387977760718357),
 ('JetPt(JetPt|0)', 0.06278642380254496),
 ('JetPt(JetPt|1)', 0.05280666247935422),
 ('PFMET', 0.051317840277293136),
 ('ZZPt', 0.051061948961313885),
 ('JetEta(JetPt|1)', 0.04326889524740038),
 ('JetEta(JetPt|0)', 0.042803638309256285),
 ('JetPhi(JetPt|1)', 0.042780375462349085),
 ('ZZEta', 0.041873124432968103),
 ('Z1Pt', 0.04101239909740154),
 ('JetEta(JetPt|2)', 0.04066345639379347),
 ('Z2Mass', 0.0398027310582269),
 ('ZZPhi', 0.03694140088864075),
 ('JetPhi(JetPt|0)', 0.036452881103589456),
 ('ZZMass', 0.03638309256286784),
 ('D_WHh_ZHh_ME', 0.03570847000255891),
 ('ZZMassErr', 0.0356386814618373),
 ('Z2Pt', 0.0356386814618373),
 ('Z1Mass', 0.03501058459534278),
 ('D_WHh_ggH_ME', 0.02970665550050015),
 ('D_ZHh_ggH_ME', 0.029404238490706492),
 ('JetPt(JetPt|2)', 0.029055295787098426),
 ('D_VBF2j_WHh_ME', 0.02289064135668923),
 ('D_VBF2j_ZHh_ME', 0.022774327122153208),
 ('JetPhi(JetPt|2)', 0.01544653034638379),
 ('JetPhi(JetPt|3)', 0.0048386

In [31]:
%%capture
fig, implist = plot_variables([("VBF1j", "ggH1j")])

In [32]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ggH_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [33]:
append_variables(confhandler, impdict, threshold_fscore)

In [34]:
implist

[('JetEta(JetPt|0)', 0.14584154513204572),
 ('ZZEta', 0.0856873822975518),
 ('D_VBF1j_ggH_ME', 0.08505233653046029),
 ('JetPt(JetPt|0)', 0.08100118249901458),
 ('ZZPt', 0.07684053781807033),
 ('PFMET', 0.07561424254368677),
 ('Z1Pt', 0.0657162878290194),
 ('JetPhi(JetPt|0)', 0.05505189856786231),
 ('ZZMass', 0.05496430604826348),
 ('ZZMassErr', 0.05467963035956729),
 ('Z1Mass', 0.05386939955327815),
 ('ZZPhi', 0.0536942145140805),
 ('Z2Pt', 0.04979634739193273),
 ('Z2Mass', 0.04907370910524241),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0044672184995401395),
 ('Z1Flav', 0.003021941926159506),
 ('Z2Flav', 0.002364998029168309),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0016204616125782857),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.0011168046248850349),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0003722682082950116),
 ('nExtraLep', 0.00015328690929794595)]

In [35]:
%%capture
fig, implist = plot_variables([("VBF0j", "ggH0j")])

In [36]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ggH_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [37]:
append_variables(confhandler, impdict, threshold_fscore)

In [38]:
implist

[('ZZPt', 0.1606582032113947),
 ('PFMET', 0.11248728270004865),
 ('ZZMass', 0.10903702393064095),
 ('ZZEta', 0.09970363161852523),
 ('Z1Pt', 0.09545715928694652),
 ('Z2Pt', 0.08966249391781307),
 ('ZZPhi', 0.08276197637899765),
 ('Z1Mass', 0.0816561242093157),
 ('Z2Mass', 0.08130225151501748),
 ('ZZMassErr', 0.07670190648914053),
 ('Z1Flav', 0.004644579112664219),
 ('Z2Flav', 0.0030963860751094794),
 ('nExtraLep', 0.0009731499093201221),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0008846817357455655),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.0005308090414473393),
 ('ExtraLepPt(ExtraLepPt|0)', 0.00044234086787278275)]

In [39]:
%%capture
fig, implist = plot_variables([("WHh2j", "ggH2j")])

In [40]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ggH_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [41]:
append_variables(confhandler, impdict, threshold_fscore)

In [42]:
implist

[('JetPt(JetPt|0)', 0.05882962195753495),
 ('JetPt(JetPt|1)', 0.05427239772138788),
 ('D_VBF2j_ggH_ME', 0.0524287933713102),
 ('ZZEta', 0.05178663904712584),
 ('PFMET', 0.04996374935266701),
 ('ZZMass', 0.04967374417400311),
 ('JetEta(JetPt|0)', 0.04859658208182289),
 ('ZZPt', 0.048513723459347485),
 ('D_WHh_ggH_ME', 0.04493008803728638),
 ('Z1Pt', 0.040932159502848266),
 ('JetEta(JetPt|2)', 0.03950284826514759),
 ('ZZPhi', 0.03554634904194718),
 ('JetEta(JetPt|1)', 0.035380631796996374),
 ('JetPhi(JetPt|0)', 0.03469704816157431),
 ('Z2Pt', 0.034137752459865355),
 ('ZZMassErr', 0.03393060590367685),
 ('D_WHh_ZHh_ME', 0.033516312791299845),
 ('Z1Mass', 0.032749870533402385),
 ('Z2Mass', 0.03250129466597618),
 ('D_ZHh_ggH_ME', 0.032128430864836875),
 ('JetPhi(JetPt|1)', 0.03132055929570171),
 ('JetPhi(JetPt|2)', 0.024919730709476954),
 ('JetPt(JetPt|2)', 0.02392542723977214),
 ('D_VBF2j_ZHh_ME', 0.022931123770067324),
 ('D_VBF2j_WHh_ME', 0.01615743138270326),
 ('JetEta(JetPt|3)', 0.00901

In [43]:
%%capture
fig, implist = plot_variables([("WHh1j", "ggH1j")])

In [44]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ggH_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [45]:
append_variables(confhandler, impdict, threshold_fscore)

In [46]:
implist

[('JetEta(JetPt|0)', 0.13845729267283013),
 ('ZZEta', 0.11585483648406238),
 ('JetPt(JetPt|0)', 0.09405271146681385),
 ('D_VBF1j_ggH_ME', 0.07870843107492756),
 ('ZZPt', 0.07639023044018214),
 ('PFMET', 0.0639161032151235),
 ('ZZMassErr', 0.061018352421691735),
 ('ZZMass', 0.05900372567959156),
 ('Z1Pt', 0.05362218849178971),
 ('ZZPhi', 0.053456602732165036),
 ('Z2Mass', 0.051717952256105974),
 ('Z2Pt', 0.051717952256105974),
 ('Z1Mass', 0.04570166965640955),
 ('JetPhi(JetPt|0)', 0.044404581206016286),
 ('Z2Flav', 0.004139643990616807),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0025665792741824205),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0020146267421001793),
 ('Z1Flav', 0.0016282599696426107),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0008831240513315855),
 ('nExtraLep', 0.0007175382917069132),
 ('ExtraLepPhi(ExtraLepPt|0)', 2.7597626604112046e-05)]

In [47]:
%%capture
fig, implist = plot_variables([("WHh0j", "ggH0j")])

In [48]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ggH_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [49]:
append_variables(confhandler, impdict, threshold_fscore)

In [50]:
implist

[('ZZPt', 0.16273490603758498),
 ('PFMET', 0.11168865787018525),
 ('ZZMass', 0.10655737704918032),
 ('ZZEta', 0.10582433693189391),
 ('ZZMassErr', 0.09429561508729842),
 ('ZZPhi', 0.08869785419165667),
 ('Z2Mass', 0.08483273357323737),
 ('Z2Pt', 0.07963481274157004),
 ('Z1Pt', 0.07730241236838598),
 ('Z1Mass', 0.07570305211248834),
 ('Z1Flav', 0.004998000799680128),
 ('Z2Flav', 0.0035319205651072904),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.0032653605224576836),
 ('nExtraLep', 0.0008663201386112222),
 ('ExtraLepEta(ExtraLepPt|0)', 6.66400106624017e-05)]

In [51]:
%%capture
fig, implist = plot_variables([("ZHh2j", "ggH2j")])

In [52]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ggH_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [53]:
append_variables(confhandler, impdict, threshold_fscore)

In [54]:
implist

[('JetPt(JetPt|0)', 0.0652191591445068),
 ('JetPt(JetPt|1)', 0.05354964625518419),
 ('D_ZHh_ggH_ME', 0.05139464910140685),
 ('ZZPt', 0.051110026835813616),
 ('JetEta(JetPt|0)', 0.05037814101000244),
 ('D_VBF2j_ggH_ME', 0.0499308774497845),
 ('Z1Pt', 0.04826380417988127),
 ('ZZEta', 0.046840692851915104),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04106692689273807),
 ('ZZPhi', 0.04086362527445719),
 ('JetEta(JetPt|2)', 0.03956249491745954),
 ('JetEta(JetPt|1)', 0.03874928844433602),
 ('Z2Mass', 0.03854598682605514),
 ('JetPhi(JetPt|0)', 0.03427665284215663),
 ('PFMET', 0.03374806863462633),
 ('ZZMass', 0.033260144750752216),
 ('D_WHh_ggH_ME', 0.030617223713100758),
 ('Z1Mass', 0.03045458241847605),
 ('Z2Pt', 0.02935675367975929),
 ('D_WHh_ZHh_ME', 0.026876473936732537),
 ('D_VBF2j_WHh_ME', 0.025006099048548426),
 ('JetPhi(JetPt|2)', 0.02427421322273725),
 ('ZZMassErr', 0.023461006749613728),
 ('JetPt(JetPt|2)', 0.023379686102301376),
 ('JetPhi(JetPt|1)', 0.021712612832398146),
 ('D_VBF2j_ZH

In [55]:
%%capture
fig, implist = plot_variables([("ZHh1j", "ggH1j")])

In [56]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ggH_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [57]:
append_variables(confhandler, impdict, threshold_fscore)

In [58]:
implist

[('JetEta(JetPt|0)', 0.11710013282488539),
 ('ZZEta', 0.11401516774497622),
 ('JetPt(JetPt|0)', 0.09486267620720683),
 ('D_VBF1j_ggH_ME', 0.08800719825185312),
 ('ZZPhi', 0.0754102575088907),
 ('ZZPt', 0.07112558378679464),
 ('ZZMass', 0.06559835468529071),
 ('Z2Mass', 0.056729080080551865),
 ('Z1Pt', 0.05437250953339903),
 ('PFMET', 0.05253009983289773),
 ('JetPhi(JetPt|0)', 0.048502506534127424),
 ('Z1Mass', 0.046960023994172845),
 ('ZZMassErr', 0.0463601696730794),
 ('Z2Pt', 0.04408929260036848),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.012468400531299542),
 ('ExtraLepPt(ExtraLepPt|0)', 0.003084965079909165),
 ('nExtraLep', 0.0026993444449205195),
 ('Z2Flav', 0.002185183598268992),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.0016281760143965038),
 ('Z1Flav', 0.001242555379407858),
 ('ExtraLepEta(ExtraLepPt|0)', 0.001028321693303055)]

In [59]:
%%capture
fig, implist = plot_variables([("ZHh0j", "ggH0j")])

In [60]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ggH_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [61]:
append_variables(confhandler, impdict, threshold_fscore)

In [62]:
implist

[('ZZPt', 0.14670854600183222),
 ('Z2Mass', 0.11870174060986782),
 ('ZZMassErr', 0.10914801727522576),
 ('Z2Pt', 0.10522182960345504),
 ('ZZPhi', 0.10208087946603847),
 ('Z1Mass', 0.08663787462374035),
 ('Z1Pt', 0.08624525585656327),
 ('ZZEta', 0.08297343279675436),
 ('PFMET', 0.07420494699646643),
 ('ZZMass', 0.07302709069493522),
 ('Z1Flav', 0.007852375343541421),
 ('ExtraLepPt(ExtraLepPt|0)', 0.004973171050909567),
 ('Z2Flav', 0.0014396021463159272),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.0007852375343541421)]

In [63]:
%%capture
fig, implist = plot_variables([("WHh2j", "ZHh2j")])

In [64]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ZHh_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [65]:
append_variables(confhandler, impdict, threshold_fscore)

In [66]:
implist

[('D_WHh_ZHh_ME', 0.0660503845882436),
 ('JetPhi(JetPt|1)', 0.05440688730505963),
 ('PFMET', 0.051937054548020604),
 ('ZZMass', 0.0467151224331381),
 ('Z2Mass', 0.04495095617811023),
 ('ZZMassErr', 0.04219885682026674),
 ('JetEta(JetPt|0)', 0.04219885682026674),
 ('D_WHh_ggH_ME', 0.041704890268858937),
 ('JetPt(JetPt|1)', 0.04001129066403218),
 ('Z1Mass', 0.03987015736362995),
 ('JetPt(JetPt|0)', 0.03923505751181992),
 ('Z2Pt', 0.03768259120739539),
 ('ZZEta', 0.037259191306188696),
 ('JetPhi(JetPt|0)', 0.03690635805518312),
 ('Z1Pt', 0.03669465810457977),
 ('JetEta(JetPt|1)', 0.035565591701361934),
 ('ZZPhi', 0.03168442594030062),
 ('JetPhi(JetPt|2)', 0.030978759438289465),
 ('D_ZHh_ggH_ME', 0.03076705948768612),
 ('ZZPt', 0.02780326017923929),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.027732693529038177),
 ('JetPt(JetPt|2)', 0.026533060475619222),
 ('D_VBF2j_ggH_ME', 0.025403994072401383),
 ('D_VBF2j_WHh_ME', 0.02505116082139581),
 ('D_VBF2j_ZHh_ME', 0.02194622821254675),
 ('JetEta(JetPt

In [67]:
%%capture
fig, implist = plot_variables([("WHh1j", "ZHh1j")])

In [68]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ZHh_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [69]:
append_variables(confhandler, impdict, threshold_fscore)

In [70]:
implist

[('ZZMass', 0.11099712761592122),
 ('Z2Mass', 0.0883258104226508),
 ('Z2Pt', 0.08627410750923266),
 ('ZZMassErr', 0.08165777595404186),
 ('ZZPhi', 0.0720147722609766),
 ('PFMET', 0.07129667624128026),
 ('Z1Mass', 0.06955272876487485),
 ('JetPt(JetPt|0)', 0.0613459171112023),
 ('ZZPt', 0.05970455478046779),
 ('D_VBF1j_ggH_ME', 0.05857611817808781),
 ('JetPhi(JetPt|0)', 0.05642183011899877),
 ('ZZEta', 0.05365203118588428),
 ('JetEta(JetPt|0)', 0.05313910545752975),
 ('Z1Pt', 0.050984817398440706),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.018362741075092327),
 ('Z2Flav', 0.0029749692244562987),
 ('Z1Flav', 0.0025646286417726713),
 ('ExtraLepPt(ExtraLepPt|0)', 0.002154288059089044)]

In [71]:
%%capture
fig, implist = plot_variables([("WHh0j", "ZHh0j")])

In [72]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHh_ZHh_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [73]:
append_variables(confhandler, impdict, threshold_fscore)

In [74]:
implist

[('PFMET', 0.18597560975609756),
 ('ZZPhi', 0.1524390243902439),
 ('ZZPt', 0.1402439024390244),
 ('ZZEta', 0.125),
 ('Z1Pt', 0.11890243902439024),
 ('Z2Pt', 0.07926829268292683),
 ('Z2Mass', 0.07317073170731707),
 ('Z1Mass', 0.06402439024390244),
 ('ZZMassErr', 0.04573170731707317),
 ('nExtraLep', 0.01524390243902439)]

In [75]:
%%capture
fig, implist = plot_variables([("VBF2j", "WHh2j")])

In [76]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_WHh_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [77]:
append_variables(confhandler, impdict, threshold_fscore)

In [78]:
implist

[('JetEta(JetPt|2)', 0.08227812000912617),
 ('D_VBF2j_ggH_ME', 0.07805726671229751),
 ('JetEta(JetPt|0)', 0.06202943189596167),
 ('JetPt(JetPt|2)', 0.053416609628108604),
 ('ZZMass', 0.047256445357061375),
 ('JetEta(JetPt|1)', 0.04637234770704997),
 ('ZZEta', 0.0453171343828428),
 ('D_WHh_ggH_ME', 0.04200889801505818),
 ('JetPt(JetPt|0)', 0.04121035820214465),
 ('PFMET', 0.039812913529545975),
 ('D_WHh_ZHh_ME', 0.03684690851015286),
 ('ZZPt', 0.035706137348847825),
 ('JetPt(JetPt|1)', 0.03553502167465206),
 ('Z2Mass', 0.03259753593429158),
 ('ZZPhi', 0.03171343828428017),
 ('Z1Pt', 0.031399726214921285),
 ('D_ZHh_ggH_ME', 0.03094341775039927),
 ('Z1Mass', 0.02803445128907141),
 ('Z2Pt', 0.027663700661647275),
 ('D_VBF2j_ZHh_ME', 0.02726443075519051),
 ('JetPhi(JetPt|0)', 0.022872461784166097),
 ('ZZMassErr', 0.02255874971480721),
 ('JetPhi(JetPt|1)', 0.019507186858316223),
 ('D_VBF2j_WHh_ME', 0.01691193246634725),
 ('JetPhi(JetPt|2)', 0.01676933607118412),
 ('JetEta(JetPt|3)', 0.011464

In [79]:
%%capture
fig, implist = plot_variables([("VBF1j", "WHh1j")])

In [80]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_WHh_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [81]:
append_variables(confhandler, impdict, threshold_fscore)

In [82]:
implist

[('JetEta(JetPt|0)', 0.1595338788268824),
 ('ZZEta', 0.1399282890502896),
 ('JetPt(JetPt|0)', 0.10434862554013055),
 ('PFMET', 0.06925163188379149),
 ('D_VBF1j_ggH_ME', 0.06123011859887837),
 ('ZZPt', 0.05950629769237841),
 ('JetPhi(JetPt|0)', 0.05860991082099844),
 ('ZZMassErr', 0.05334651098648525),
 ('Z2Pt', 0.050979130274891975),
 ('Z1Pt', 0.049783947779718674),
 ('ZZPhi', 0.04971499494345868),
 ('Z2Mass', 0.04971499494345868),
 ('Z1Mass', 0.04688792865679875),
 ('ZZMass', 0.03992369219453894),
 ('Z2Flav', 0.0030109405166865865),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0025742392203732646),
 ('Z1Flav', 0.0014480095614599613),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0001149213937666636),
 ('ExtraLepEta(ExtraLepPt|0)', 9.193711501333089e-05)]

In [83]:
%%capture
fig, implist = plot_variables([("VBF0j", "WHh0j")])

In [84]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_WHh_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [85]:
append_variables(confhandler, impdict, threshold_fscore)

In [86]:
implist

[('ZZMassErr', 0.13749382512761402),
 ('ZZEta', 0.12646138646467972),
 ('PFMET', 0.11509962127449366),
 ('Z2Pt', 0.09550469290301333),
 ('ZZMass', 0.0936933970031286),
 ('Z1Mass', 0.09188210110324387),
 ('Z1Pt', 0.09089412152148856),
 ('ZZPhi', 0.08200230528569076),
 ('ZZPt', 0.07689774411328833),
 ('Z2Mass', 0.07327515231351886),
 ('Z2Flav', 0.004775234645150667),
 ('Z1Flav', 0.004610571381524782),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.002140622427136506),
 ('ExtraLepPt(ExtraLepPt|0)', 0.002140622427136506),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0016466326362588506),
 ('nExtraLep', 0.0014819693726329656)]

In [87]:
%%capture
fig, implist = plot_variables([("VBF2j", "ZHh2j")])

In [88]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ZHh_2j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [89]:
append_variables(confhandler, impdict, threshold_fscore)

In [90]:
implist

[('JetEta(JetPt|2)', 0.07775507751472178),
 ('D_VBF2j_ggH_ME', 0.07063453911789448),
 ('JetEta(JetPt|0)', 0.06204182189640668),
 ('ZZEta', 0.06002884268717702),
 ('D_ZHh_ggH_ME', 0.04828145655570244),
 ('PFMET', 0.04566758803028482),
 ('JetEta(JetPt|1)', 0.04173176300925369),
 ('JetPt(JetPt|2)', 0.037825982454031966),
 ('ZZMass', 0.03662420382165605),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03473140247566398),
 ('D_WHh_ZHh_ME', 0.03358971277490686),
 ('D_VBF2j_ZHh_ME', 0.03346953491166927),
 ('ZZMassErr', 0.031005888715298642),
 ('Z2Pt', 0.03001442134358851),
 ('JetPhi(JetPt|0)', 0.02989424348035092),
 ('Z1Pt', 0.02974402115130393),
 ('Z2Mass', 0.029083042903497176),
 ('JetPhi(JetPt|1)', 0.02887273164283139),
 ('ZZPt', 0.027130152625886313),
 ('D_VBF2j_WHh_ME', 0.02700997476264872),
 ('JetPt(JetPt|1)', 0.02700997476264872),
 ('JetPt(JetPt|0)', 0.026108640788366784),
 ('Z1Mass', 0.02475663982694388),
 ('ZZPhi', 0.02337459439971157),
 ('D_WHh_ggH_ME', 0.021902415575051074),
 ('JetPhi(JetPt

In [91]:
%%capture
fig, implist = plot_variables([("VBF1j", "ZHh1j")])

In [92]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ZHh_1j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [93]:
append_variables(confhandler, impdict, threshold_fscore)

In [94]:
implist

[('JetEta(JetPt|0)', 0.15612302306682502),
 ('ZZEta', 0.15040320060011253),
 ('JetPt(JetPt|0)', 0.09007938988560354),
 ('PFMET', 0.06576233043695692),
 ('ZZMass', 0.06069888104019504),
 ('ZZMassErr', 0.05919859973745077),
 ('ZZPt', 0.05788585359754954),
 ('Z1Mass', 0.05763580671375883),
 ('Z2Mass', 0.05447896480590111),
 ('D_VBF1j_ggH_ME', 0.05307245108457836),
 ('Z1Pt', 0.04360192536100519),
 ('ZZPhi', 0.042101644058260924),
 ('Z2Pt', 0.038726011127086325),
 ('JetPhi(JetPt|0)', 0.03566293680065012),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.018534725260986436),
 ('Z2Flav', 0.006594986559979996),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.005532287303869476),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0017190723260611365),
 ('Z1Flav', 0.0010001875351628431),
 ('nExtraLep', 0.0007501406513721323),
 ('ExtraLepPt(ExtraLepPt|0)', 0.00043758204663374385)]

In [95]:
%%capture
fig, implist = plot_variables([("VBF0j", "ZHh0j")])

In [96]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_VBF_ZHh_0j_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [97]:
append_variables(confhandler, impdict, threshold_fscore)

In [98]:
implist

[('ZZPhi', 0.14472727272727273),
 ('PFMET', 0.12290909090909091),
 ('Z2Mass', 0.11927272727272727),
 ('ZZEta', 0.10763636363636364),
 ('ZZPt', 0.10109090909090909),
 ('Z2Pt', 0.09454545454545454),
 ('ZZMass', 0.09236363636363637),
 ('Z1Pt', 0.06836363636363636),
 ('ZZMassErr', 0.06327272727272727),
 ('Z1Mass', 0.04872727272727273),
 ('ExtraLepPt(ExtraLepPt|0)', 0.01890909090909091),
 ('Z1Flav', 0.009454545454545455),
 ('ExtraLepEta(ExtraLepPt|0)', 0.006545454545454545),
 ('Z2Flav', 0.002181818181818182)]

In [99]:
%%capture
fig, implist = plot_variables([("WHl", "ggH")])

In [100]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [101]:
append_variables(confhandler, impdict, threshold_fscore)

In [102]:
implist

[('PFMET', 0.1252445394590888),
 ('ZZMass', 0.11310328136659742),
 ('ZZPt', 0.09087792186838231),
 ('Z1Mass', 0.06790633886614364),
 ('ZZMassErr', 0.06439707157695178),
 ('JetPt(JetPt|0)', 0.057862573866042794),
 ('ZZEta', 0.05671298630579029),
 ('ZZPhi', 0.054857511647137126),
 ('Z2Pt', 0.05469616602464555),
 ('Z1Pt', 0.05223564528164895),
 ('Z2Mass', 0.045902829598854444),
 ('JetEta(JetPt|0)', 0.04080027428755824),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03922715446826533),
 ('JetPhi(JetPt|0)', 0.03275316136579069),
 ('ExtraLepEta(ExtraLepPt|0)', 0.016174898654780872),
 ('JetEta(JetPt|1)', 0.013855555331464414),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.01228243551217151),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.011374866385656374),
 ('JetPhi(JetPt|1)', 0.01085049311255874),
 ('JetPt(JetPt|1)', 0.007684085271161487),
 ('nCleanedJetsPt30', 0.005062218905673315),
 ('ExtraLepPhi(ExtraLepPt|1)', 0.005001714297238973),
 ('JetPhi(JetPt|2)', 0.0045378456325756815),
 ('Z1Flav', 0.0035697718976262023),
 ('nExtr

In [103]:
%%capture
fig, implist = plot_variables([("WHl", "VBF")])

In [104]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [105]:
append_variables(confhandler, impdict, threshold_fscore)

In [106]:
implist

[('JetEta(JetPt|0)', 0.12277066671830522),
 ('PFMET', 0.10083073526848822),
 ('ZZMass', 0.09420809047074999),
 ('ZZEta', 0.06938285470846808),
 ('JetPt(JetPt|0)', 0.05755117058151469),
 ('JetEta(JetPt|1)', 0.05121899266087024),
 ('ZZMassErr', 0.0486047907670262),
 ('Z1Mass', 0.047326736507813556),
 ('Z2Mass', 0.04655215816889681),
 ('ZZPt', 0.04542901957746752),
 ('Z1Pt', 0.043976685191998606),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0418078658430317),
 ('ZZPhi', 0.03803179644081253),
 ('JetPhi(JetPt|0)', 0.03760577835440832),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03745086268662497),
 ('Z2Pt', 0.037218489184949945),
 ('JetPt(JetPt|1)', 0.020158401270308476),
 ('JetPhi(JetPt|1)', 0.01409732576828489),
 ('JetEta(JetPt|2)', 0.010727909993997017),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.007823241223059198),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.004937936910594295),
 ('JetPhi(JetPt|2)', 0.004395732073352569),
 ('JetPt(JetPt|2)', 0.003989078445421274),
 ('JetPt(JetPt|3)', 0.0031176778141399275),
 ('Z2Flav', 0.0

In [107]:
%%capture
fig, implist = plot_variables([("WHl", "WHh")])

In [108]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [109]:
append_variables(confhandler, impdict, threshold_fscore)

In [110]:
implist

[('PFMET', 0.17002667599597673),
 ('ZZMass', 0.1086281541085407),
 ('JetPt(JetPt|0)', 0.08007171907115057),
 ('ExtraLepPt(ExtraLepPt|0)', 0.062491800411072725),
 ('JetEta(JetPt|0)', 0.058293610880307864),
 ('ZZPt', 0.05348318537630647),
 ('Z1Mass', 0.048541566449468665),
 ('ZZMassErr', 0.04429964577775834),
 ('Z1Pt', 0.04049503651550269),
 ('ZZEta', 0.037958630340665586),
 ('Z2Mass', 0.037215200944592644),
 ('JetPhi(JetPt|0)', 0.0359469978571741),
 ('ZZPhi', 0.03507237503826475),
 ('Z2Pt', 0.034066558796519),
 ('JetEta(JetPt|1)', 0.028119123627935453),
 ('JetPt(JetPt|1)', 0.023614816110552325),
 ('ExtraLepEta(ExtraLepPt|0)', 0.020728560808151483),
 ('JetPhi(JetPt|1)', 0.0169676826868413),
 ('JetPt(JetPt|2)', 0.012200988323785368),
 ('JetEta(JetPt|2)', 0.010626667249748546),
 ('nExtraLep', 0.008658765907202519),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.00848384134342065),
 ('nCleanedJetsPt30', 0.007740411947347706),
 ('JetPhi(JetPt|2)', 0.004722963222110465),
 ('JetPhi(JetPt|3)', 0.002711330738

In [111]:
%%capture
fig, implist = plot_variables([("WHl", "ZHh")])

In [112]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [113]:
append_variables(confhandler, impdict, threshold_fscore)

In [114]:
implist

[('PFMET', 0.1596031357177854),
 ('ZZMass', 0.13602400783929447),
 ('JetPt(JetPt|0)', 0.06106075453209211),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0589171974522293),
 ('ExtraLepPt(ExtraLepPt|0)', 0.0583047525722685),
 ('JetEta(JetPt|0)', 0.05597746202841744),
 ('JetPhi(JetPt|0)', 0.05377266046055855),
 ('Z2Pt', 0.05101665850073493),
 ('ZZEta', 0.0408500734933856),
 ('ZZPt', 0.0383390494855463),
 ('Z1Pt', 0.03791033806957374),
 ('ZZPhi', 0.03307202351788339),
 ('Z1Mass', 0.03288829005389515),
 ('ZZMassErr', 0.030254777070063694),
 ('JetPt(JetPt|1)', 0.02449779519843214),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.02235423811856933),
 ('Z2Mass', 0.020088192062714356),
 ('ExtraLepEta(ExtraLepPt|0)', 0.018312101910828025),
 ('nExtraLep', 0.014453699167074964),
 ('JetEta(JetPt|1)', 0.012922586967172954),
 ('nCleanedJetsPt30', 0.012616364527192553),
 ('JetPhi(JetPt|1)', 0.007839294463498285),
 ('JetEta(JetPt|2)', 0.006308182263596277),
 ('JetPt(JetPt|2)', 0.004164625183733464),
 ('JetPhi(JetPt|2)', 0.0

In [115]:
%%capture
fig, implist = plot_variables([("WHl", "ZHl")])

In [116]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [117]:
append_variables(confhandler, impdict, threshold_fscore)

In [118]:
implist

[('PFMET', 0.1050065731814198),
 ('ZZMass', 0.10283377154542798),
 ('Z2Mass', 0.09547546012269939),
 ('Z1Mass', 0.09350350569675724),
 ('Z2Pt', 0.07228673678060181),
 ('Z1Pt', 0.05793529068068946),
 ('ZZPt', 0.05786225533158049),
 ('ZZEta', 0.05092389716622846),
 ('ZZMassErr', 0.04582968156587788),
 ('JetPt(JetPt|0)', 0.04121019573473561),
 ('ZZPhi', 0.03953038270522933),
 ('ExtraLepPt(ExtraLepPt|0)', 0.030693105463044112),
 ('JetEta(JetPt|0)', 0.029981010809231667),
 ('JetPhi(JetPt|0)', 0.025325007303534913),
 ('ExtraLepEta(ExtraLepPt|0)', 0.02216622845457201),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.016579024247735902),
 ('JetEta(JetPt|1)', 0.015191352614665497),
 ('JetPhi(JetPt|1)', 0.014607069821793748),
 ('JetPt(JetPt|1)', 0.013694127957931639),
 ('nExtraLep', 0.01219690330119778),
 ('ExtraLepPt(ExtraLepPt|1)', 0.008015629564709319),
 ('nExtraZ', 0.007449605609114811),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.007413087934560327),
 ('Z2Flav', 0.006007157464212679),
 ('JetEta(JetPt|2)', 0.005

In [119]:
%%capture
fig, implist = plot_variables([("WHl", "ZHMET")])

In [120]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [121]:
append_variables(confhandler, impdict, threshold_fscore)

In [122]:
implist

[('ZZMass', 0.14070988772183993),
 ('PFMET', 0.12205722564288302),
 ('ZZPt', 0.08167330677290836),
 ('Z1Mass', 0.06996257394663769),
 ('Z1Pt', 0.06416757213569962),
 ('Z2Mass', 0.05396595436436074),
 ('ZZMassErr', 0.050887359652299895),
 ('Z2Pt', 0.050344078232524445),
 ('ZZEta', 0.047446577327055414),
 ('JetEta(JetPt|0)', 0.0447301702281782),
 ('ExtraLepPt(ExtraLepPt|0)', 0.044186888808402755),
 ('ZZPhi', 0.043764336593021855),
 ('JetPt(JetPt|0)', 0.040323554267777374),
 ('JetPhi(JetPt|0)', 0.02933719666787396),
 ('ExtraLepEta(ExtraLepPt|0)', 0.025232403718459494),
 ('JetEta(JetPt|1)', 0.01774719304599783),
 ('JetPhi(JetPt|1)', 0.014547869129542436),
 ('ExtraLepPhi(ExtraLepPt|1)', 0.012495472654835204),
 ('nExtraLep', 0.01032234697573343),
 ('JetPt(JetPt|1)', 0.008632138114209827),
 ('JetEta(JetPt|2)', 0.00597609561752988),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.004285886756006278),
 ('JetPt(JetPt|2)', 0.0030785947120608474),
 ('ExtraLepEta(ExtraLepPt|1)', 0.0028371363032717615),
 ('nExtraZ

In [123]:
%%capture
fig, implist = plot_variables([("WHl", "ttHh")])

In [124]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [125]:
append_variables(confhandler, impdict, threshold_fscore)

In [126]:
implist

[('PFMET', 0.12519230769230769),
 ('JetPt(JetPt|0)', 0.08007692307692307),
 ('ExtraLepPt(ExtraLepPt|0)', 0.06984615384615385),
 ('ZZMass', 0.05815384615384615),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.05434615384615385),
 ('JetEta(JetPt|1)', 0.04634615384615385),
 ('ZZMassErr', 0.04592307692307692),
 ('Z1Mass', 0.04553846153846154),
 ('JetPt(JetPt|1)', 0.04296153846153846),
 ('JetEta(JetPt|0)', 0.03619230769230769),
 ('ZZEta', 0.035653846153846154),
 ('Z2Mass', 0.03519230769230769),
 ('JetPhi(JetPt|0)', 0.03023076923076923),
 ('JetPt(JetPt|2)', 0.029846153846153845),
 ('ZZPt', 0.029576923076923077),
 ('Z2Pt', 0.02880769230769231),
 ('Z1Pt', 0.026115384615384617),
 ('JetPt(JetPt|3)', 0.0255),
 ('ZZPhi', 0.024384615384615383),
 ('JetEta(JetPt|2)', 0.02423076923076923),
 ('ExtraLepEta(ExtraLepPt|0)', 0.019269230769230768),
 ('JetPhi(JetPt|1)', 0.018846153846153846),
 ('nCleanedJetsPt30', 0.01603846153846154),
 ('JetEta(JetPt|3)', 0.011384615384615385),
 ('JetPhi(JetPt|2)', 0.011038461538461

In [127]:
%%capture
fig, implist = plot_variables([("WHl", "ttHl")])

In [128]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_WHl_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [129]:
append_variables(confhandler, impdict, threshold_fscore)

In [130]:
implist

[('ZZMass', 0.1277819565505951),
 ('PFMET', 0.08030618114242732),
 ('JetPt(JetPt|0)', 0.07309411088004955),
 ('ZZEta', 0.05504181230918986),
 ('ZZPt', 0.05322773328613778),
 ('JetPt(JetPt|1)', 0.04871465864342286),
 ('Z2Pt', 0.04690057962037078),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04548471306579355),
 ('JetEta(JetPt|0)', 0.044644042299013316),
 ('Z1Mass', 0.041237113402061855),
 ('Z2Mass', 0.03482146807663378),
 ('ExtraLepEta(ExtraLepPt|0)', 0.029865935135613468),
 ('JetPhi(JetPt|0)', 0.02951196849696916),
 ('JetEta(JetPt|1)', 0.0284058227512057),
 ('Z1Pt', 0.025662581301712315),
 ('ZZPhi', 0.025087385513915313),
 ('ExtraLepPt(ExtraLepPt|0)', 0.024998893854254238),
 ('ZZMassErr', 0.02318481483120216),
 ('JetPhi(JetPt|1)', 0.02318481483120216),
 ('ExtraLepPt(ExtraLepPt|1)', 0.02260961904340516),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.01676916950577408),
 ('nCleanedJetsPt30', 0.01654794035662139),
 ('nExtraLep', 0.014601123844077695),
 ('JetPt(JetPt|2)', 0.01446838635458608),
 ('JetPt(JetPt

In [131]:
%%capture
fig, implist = plot_variables([("ZHh", "ZHl")])

In [132]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ZHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [133]:
append_variables(confhandler, impdict, threshold_fscore)

In [134]:
implist

[('ZZMass', 0.14744227157220197),
 ('Z1Mass', 0.11103745442492542),
 ('ZZMassErr', 0.08087504143188598),
 ('PFMET', 0.0774776267815711),
 ('Z2Pt', 0.07543365373991824),
 ('Z2Mass', 0.06717489780134792),
 ('JetPt(JetPt|0)', 0.05463484697823445),
 ('ZZPt', 0.053060435311015355),
 ('ZZPhi', 0.04913821677162745),
 ('Z1Pt', 0.0440282841674953),
 ('JetPhi(JetPt|0)', 0.04121091592089272),
 ('ZZEta', 0.03488564799469672),
 ('JetEta(JetPt|0)', 0.03477516296541819),
 ('ExtraLepPt(ExtraLepPt|0)', 0.027676499834272457),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.015302176555076787),
 ('JetEta(JetPt|1)', 0.013976356203734394),
 ('JetPhi(JetPt|1)', 0.009943652635067949),
 ('ExtraLepEta(ExtraLepPt|0)', 0.00961219754723235),
 ('nCleanedJetsPt30', 0.009280742459396751),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.00908739365815932),
 ('Z2Flav', 0.006546237984753066),
 ('nExtraLep', 0.0064909954701138),
 ('JetPt(JetPt|1)', 0.006297646668876367),
 ('JetPhi(JetPt|2)', 0.004806098773616175),
 ('JetPt(JetPt|2)', 0.00309358

In [135]:
%%capture
fig, implist = plot_variables([("ZHh", "ZHMET")])

In [136]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [137]:
append_variables(confhandler, impdict, threshold_fscore)

In [138]:
implist

[('PFMET', 0.15654504225932797),
 ('ZZMass', 0.07862296433725005),
 ('ZZPt', 0.07388167388167388),
 ('JetEta(JetPt|0)', 0.0655534941249227),
 ('ZZEta', 0.06444032158317872),
 ('Z1Pt', 0.061513090084518655),
 ('Z2Pt', 0.05974025974025974),
 ('JetPt(JetPt|0)', 0.05821480107194393),
 ('ZZPhi', 0.05660688517831375),
 ('Z1Mass', 0.05042259327973614),
 ('Z2Mass', 0.043331271902700474),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03871366728509586),
 ('ZZMassErr', 0.036611008039579465),
 ('JetPhi(JetPt|0)', 0.03331271902700474),
 ('JetPt(JetPt|1)', 0.025479282622139766),
 ('JetEta(JetPt|1)', 0.020861678004535148),
 ('JetPhi(JetPt|1)', 0.020449391877963307),
 ('nCleanedJetsPt30', 0.01673881673881674),
 ('JetPt(JetPt|2)', 0.014017728303442589),
 ('JetEta(JetPt|2)', 0.006596578025149453),
 ('JetPhi(JetPt|2)', 0.00404040404040404),
 ('Z1Flav', 0.002968460111317254),
 ('JetPhi(JetPt|3)', 0.002886002886002886),
 ('ExtraLepPt(ExtraLepPt|0)', 0.002391259534116677),
 ('Z2Flav', 0.002350030921459493),
 ('Jet

In [139]:
%%capture
fig, implist = plot_variables([("ZHh", "ttHh")])

In [140]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [141]:
append_variables(confhandler, impdict, threshold_fscore)

In [142]:
implist

[('nCleanedJetsPt30BTagged_bTagSF', 0.12467214547997901),
 ('JetPt(JetPt|1)', 0.09984262983038993),
 ('JetPt(JetPt|0)', 0.07623710438887918),
 ('PFMET', 0.07361426822871131),
 ('ZZPt', 0.06557090400419653),
 ('nCleanedJetsPt30', 0.06487148102815178),
 ('JetPt(JetPt|2)', 0.06224864486798391),
 ('ZZMass', 0.05175730022731247),
 ('JetPt(JetPt|3)', 0.044763070466864834),
 ('Z1Pt', 0.044238503234831264),
 ('JetEta(JetPt|2)', 0.041440811330652215),
 ('ZZEta', 0.03724427347438363),
 ('JetEta(JetPt|1)', 0.030599755201958383),
 ('ExtraLepPt(ExtraLepPt|0)', 0.022031823745410038),
 ('Z2Pt', 0.02063297779332051),
 ('Z1Mass', 0.01818499737716384),
 ('JetPhi(JetPt|3)', 0.01818499737716384),
 ('Z2Mass', 0.017835285889141458),
 ('JetEta(JetPt|0)', 0.017135862913096694),
 ('nExtraLep', 0.015212449728973596),
 ('JetPhi(JetPt|1)', 0.012065046336772163),
 ('ZZPhi', 0.010141633152649064),
 ('JetEta(JetPt|3)', 0.008742787200559538),
 ('ZZMassErr', 0.0073439412484700125),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.005

In [143]:
%%capture
fig, implist = plot_variables([("ZHh", "ttHl")])

In [144]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHh_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [145]:
append_variables(confhandler, impdict, threshold_fscore)

In [146]:
implist

[('PFMET', 0.1270892147800235),
 ('ZZMass', 0.12231743573088165),
 ('Z1Mass', 0.0596347465460814),
 ('Z2Pt', 0.05938491518225198),
 ('ZZPhi', 0.050690783720988335),
 ('ZZEta', 0.04836735203737477),
 ('ExtraLepPt(ExtraLepPt|0)', 0.047842706173333),
 ('ZZPt', 0.04669348189971769),
 ('JetPt(JetPt|0)', 0.04382042121567942),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.03964823743972818),
 ('ZZMassErr', 0.03714992380143403),
 ('JetPt(JetPt|1)', 0.03350238588952457),
 ('nCleanedJetsPt30', 0.030579358932720412),
 ('JetEta(JetPt|0)', 0.028880505658680392),
 ('Z2Mass', 0.02820596097634097),
 ('JetPhi(JetPt|0)', 0.025857546156344467),
 ('Z1Pt', 0.02438354110975092),
 ('JetPt(JetPt|2)', 0.021085767107202637),
 ('JetPt(JetPt|3)', 0.017588128013590828),
 ('JetPhi(JetPt|1)', 0.015464561421040798),
 ('JetEta(JetPt|1)', 0.014989881829764908),
 ('ExtraLepEta(ExtraLepPt|0)', 0.01319109601019312),
 ('JetEta(JetPt|2)', 0.013041197191895471),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.010567866689984261),
 ('JetPhi(JetPt|2

In [147]:
%%capture
fig, implist = plot_variables([("ZHl", "ggH")])

In [148]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [149]:
append_variables(confhandler, impdict, threshold_fscore)

In [150]:
implist

[('ZZMass', 0.13349310460813993),
 ('Z1Mass', 0.09911425047651082),
 ('ZZMassErr', 0.07325653100123332),
 ('Z2Pt', 0.07256979482004709),
 ('ZZPt', 0.06980883507119633),
 ('PFMET', 0.06675355981612288),
 ('Z1Pt', 0.06553425271891468),
 ('Z2Mass', 0.05856878573831147),
 ('ZZEta', 0.05629835183316515),
 ('ZZPhi', 0.050566206973875996),
 ('JetEta(JetPt|0)', 0.046627985200134545),
 ('JetPt(JetPt|0)', 0.044413611391411596),
 ('JetPhi(JetPt|0)', 0.027959973091153716),
 ('ExtraLepPt(ExtraLepPt|0)', 0.02337706020854356),
 ('JetEta(JetPt|1)', 0.015122211010202937),
 ('JetPt(JetPt|1)', 0.012893822177374146),
 ('JetPhi(JetPt|1)', 0.011716560152483462),
 ('nCleanedJetsPt30', 0.01024498262137011),
 ('ExtraLepEta(ExtraLepPt|0)', 0.009712411705348133),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.00766621818589528),
 ('Z2Flav', 0.00738591770377845),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.005970400269088463),
 ('JetEta(JetPt|2)', 0.00594237022087678),
 ('JetPhi(JetPt|2)', 0.005591994618230743),
 ('JetPt(JetPt|2)', 

In [151]:
%%capture
fig, implist = plot_variables([("ZHl", "VBF")])

In [152]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [153]:
append_variables(confhandler, impdict, threshold_fscore)

In [154]:
implist

[('ZZMass', 0.12303642450427767),
 ('JetEta(JetPt|0)', 0.11270708747031388),
 ('Z1Mass', 0.09659789979684684),
 ('ZZEta', 0.06606769864659932),
 ('Z2Pt', 0.05814186385876563),
 ('PFMET', 0.056911499613722855),
 ('JetPt(JetPt|0)', 0.05639646341812355),
 ('JetEta(JetPt|1)', 0.05330624624452774),
 ('ZZPt', 0.046982746287447426),
 ('Z2Mass', 0.045552090188560476),
 ('ZZMassErr', 0.04412143408967353),
 ('Z1Pt', 0.04068785945234484),
 ('ExtraLepPt(ExtraLepPt|0)', 0.029957938710692723),
 ('ZZPhi', 0.02360582563163467),
 ('JetPhi(JetPt|0)', 0.02211794328879224),
 ('ExtraLepEta(ExtraLepPt|0)', 0.01902772611519643),
 ('JetPt(JetPt|1)', 0.01671006323499957),
 ('JetEta(JetPt|2)', 0.014907436550402014),
 ('nExtraLep', 0.012646999914160633),
 ('JetPt(JetPt|2)', 0.011388022547140119),
 ('JetPhi(JetPt|1)', 0.011216343815273684),
 ('JetPhi(JetPt|2)', 0.009413717130676128),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.007324959226301182),
 ('JetEta(JetPt|3)', 0.005579558785659103),
 ('Z2Flav', 0.00437780766259

In [155]:
%%capture
fig, implist = plot_variables([("ZHl", "WHh")])

In [156]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [157]:
append_variables(confhandler, impdict, threshold_fscore)

In [158]:
implist

[('ZZMass', 0.16506285218899003),
 ('Z1Mass', 0.12102297355873429),
 ('Z2Mass', 0.07984395318595579),
 ('PFMET', 0.07728651928912007),
 ('Z2Pt', 0.07581274382314694),
 ('ZZMassErr', 0.06970091027308192),
 ('ZZPt', 0.05739055049848288),
 ('JetPt(JetPt|0)', 0.047074122236671),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04312960554833117),
 ('Z1Pt', 0.033810143042912875),
 ('JetEta(JetPt|0)', 0.032162982228001735),
 ('ZZEta', 0.024013870827915042),
 ('JetPhi(JetPt|1)', 0.023450368443866495),
 ('JetPhi(JetPt|0)', 0.018595578673602082),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.01660164716081491),
 ('ZZPhi', 0.016558300823580407),
 ('JetPt(JetPt|1)', 0.014477676636324231),
 ('nExtraLep', 0.013567403554399653),
 ('JetPhi(JetPt|2)', 0.011356740355439965),
 ('ExtraLepEta(ExtraLepPt|0)', 0.008799306458604248),
 ('JetEta(JetPt|1)', 0.008322496749024707),
 ('JetEta(JetPt|2)', 0.007715648027741656),
 ('JetPhi(JetPt|3)', 0.006718682271348071),
 ('ExtraLepPt(ExtraLepPt|1)', 0.005461638491547465),
 ('JetEta(JetPt|3

In [159]:
%%capture
fig, implist = plot_variables([("ZHl", "ZHMET")])

In [160]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ZHMET_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [161]:
append_variables(confhandler, impdict, threshold_fscore)

In [162]:
implist

[('ZZMass', 0.16701819140522015),
 ('PFMET', 0.1255822128482292),
 ('Z1Mass', 0.1125318569294314),
 ('Z2Pt', 0.09662536250988664),
 ('Z2Mass', 0.0732489673960805),
 ('ZZPt', 0.06059407680815537),
 ('Z1Pt', 0.04728007733544248),
 ('JetPt(JetPt|0)', 0.04635732489673961),
 ('ZZMassErr', 0.04508304771948326),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03752526584058353),
 ('JetEta(JetPt|1)', 0.03181298883908955),
 ('ZZEta', 0.02412338518323227),
 ('JetEta(JetPt|0)', 0.019729325951313825),
 ('JetPhi(JetPt|0)', 0.019026276474206873),
 ('JetPhi(JetPt|1)', 0.016521662712013357),
 ('nCleanedJetsPt30', 0.015291326127076193),
 ('ZZPhi', 0.015247385534757008),
 ('JetPt(JetPt|1)', 0.009886633271816504),
 ('JetPt(JetPt|2)', 0.007294138324984621),
 ('nExtraLep', 0.006547148255558485),
 ('ExtraLepEta(ExtraLepPt|0)', 0.006195623517005009),
 ('Z2Flav', 0.006195623517005009),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0036470691624923106),
 ('Z1Flav', 0.002504613762193514),
 ('JetEta(JetPt|2)', 0.0020652078390016697),
 

In [163]:
%%capture
fig, implist = plot_variables([("ZHl", "ttHh")])

In [164]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [165]:
append_variables(confhandler, impdict, threshold_fscore)

In [166]:
implist

[('ZZMass', 0.12045838614279679),
 ('Z1Mass', 0.08034959864807774),
 ('JetPt(JetPt|0)', 0.07419729615547106),
 ('Z2Pt', 0.07213772708069285),
 ('Z2Mass', 0.06582699619771863),
 ('JetPt(JetPt|1)', 0.04892796789184622),
 ('PFMET', 0.0470532319391635),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04517849598648078),
 ('ZZMassErr', 0.04401668779045205),
 ('ZZPt', 0.03165927334178285),
 ('JetPhi(JetPt|0)', 0.030022179974651456),
 ('JetEta(JetPt|1)', 0.029256442754541614),
 ('JetPhi(JetPt|1)', 0.02764575411913815),
 ('JetPt(JetPt|2)', 0.027328897338403043),
 ('ExtraLepPt(ExtraLepPt|0)', 0.026563160118293197),
 ('Z1Pt', 0.025797422898183354),
 ('JetEta(JetPt|0)', 0.024926066751161807),
 ('JetPt(JetPt|3)', 0.024688424165610477),
 ('ZZPhi', 0.02315694972539079),
 ('JetPhi(JetPt|2)', 0.022470426700464723),
 ('ZZEta', 0.020146810308407266),
 ('ExtraLepEta(ExtraLepPt|0)', 0.0194602872834812),
 ('JetEta(JetPt|2)', 0.013994507815800592),
 ('JetPhi(JetPt|3)', 0.013070342205323195),
 ('ExtraLepPhi(ExtraLepPt

In [167]:
%%capture
fig, implist = plot_variables([("ZHl", "ttHl")])

In [168]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHl_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [169]:
append_variables(confhandler, impdict, threshold_fscore)

In [170]:
implist

[('PFMET', 0.10034639338083529),
 ('Z2Mass', 0.0868675628623221),
 ('Z1Mass', 0.08629323186819103),
 ('Z1Pt', 0.06929662401062513),
 ('JetPt(JetPt|0)', 0.06707109140836728),
 ('Z2Pt', 0.06209953874042034),
 ('ZZMass', 0.05888687474199975),
 ('ZZPt', 0.053628156576987274),
 ('ZZEta', 0.0440081124252921),
 ('ZZPhi', 0.03810327189188219),
 ('JetEta(JetPt|0)', 0.03655975734515498),
 ('JetPt(JetPt|1)', 0.032306118419871854),
 ('ZZMassErr', 0.02898576736005169),
 ('JetPhi(JetPt|0)', 0.02763967909255703),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.021322038157115423),
 ('JetEta(JetPt|1)', 0.020245167543119693),
 ('ExtraLepPt(ExtraLepPt|0)', 0.018934974962758224),
 ('JetPhi(JetPt|1)', 0.016601755299100813),
 ('JetPt(JetPt|2)', 0.014124952886910638),
 ('nExtraZ', 0.012994238742215123),
 ('JetEta(JetPt|2)', 0.012473751278783854),
 ('ExtraLepEta(ExtraLepPt|0)', 0.012258377155984709),
 ('nExtraLep', 0.011989159502485776),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.01137893282122153),
 ('JetPhi(JetPt|2)', 0.01066

In [171]:
%%capture
fig, implist = plot_variables([("ZHMET", "ggH")])

In [172]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [173]:
append_variables(confhandler, impdict, threshold_fscore)

In [174]:
implist

[('ZZPt', 0.15190525231719876),
 ('PFMET', 0.14830072090628219),
 ('ZZPhi', 0.07695908622788128),
 ('JetPt(JetPt|0)', 0.07283962175826234),
 ('ZZMass', 0.06965639921355678),
 ('Z1Pt', 0.06726898230502762),
 ('Z2Pt', 0.06436663233779609),
 ('ZZEta', 0.058374684018350345),
 ('Z2Mass', 0.056970321130980245),
 ('ZZMassErr', 0.0548637767999251),
 ('Z1Mass', 0.05102518490778017),
 ('JetEta(JetPt|0)', 0.03890085198015167),
 ('JetPhi(JetPt|0)', 0.028227694036138937),
 ('JetPt(JetPt|1)', 0.013950004681209625),
 ('nCleanedJetsPt30', 0.011468963580189121),
 ('JetEta(JetPt|1)', 0.010813594232749742),
 ('JetPhi(JetPt|1)', 0.007302687014324501),
 ('Z2Flav', 0.004681209624566988),
 ('JetPt(JetPt|2)', 0.00383859189214493),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.002481041101020504),
 ('Z1Flav', 0.0020597322348094747),
 ('JetEta(JetPt|2)', 0.0018724838498267952),
 ('JetPhi(JetPt|2)', 0.0008426177324220578),
 ('JetPhi(JetPt|3)', 0.0005149330587023687),
 ('JetPt(JetPt|3)', 0.0004213088662110289),
 ('nExtra

In [175]:
%%capture
fig, implist = plot_variables([("ZHMET", "VBF")])

In [176]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [177]:
append_variables(confhandler, impdict, threshold_fscore)

In [178]:
implist

[('JetEta(JetPt|0)', 0.14048227347495887),
 ('PFMET', 0.13936233507157106),
 ('ZZPt', 0.08567528785916774),
 ('JetPt(JetPt|0)', 0.07850068246246457),
 ('ZZMass', 0.06884121373324467),
 ('Z1Pt', 0.06082665453400063),
 ('JetEta(JetPt|1)', 0.05960172190529521),
 ('ZZEta', 0.05659188744619046),
 ('ZZPhi', 0.04766737829419382),
 ('Z2Mass', 0.043677597732124734),
 ('ZZMassErr', 0.04213768242746649),
 ('Z1Mass', 0.0386028768417737),
 ('Z2Pt', 0.03629300388478634),
 ('JetPhi(JetPt|0)', 0.03296818674972876),
 ('JetPt(JetPt|1)', 0.019388933608651526),
 ('JetPhi(JetPt|1)', 0.01648409337486438),
 ('JetPhi(JetPt|2)', 0.008644524551149687),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.00636964966926819),
 ('JetEta(JetPt|2)', 0.005704686242256676),
 ('nCleanedJetsPt30', 0.004619745913974732),
 ('Z2Flav', 0.0024498652574108425),
 ('JetPt(JetPt|2)', 0.0022398768067756272),
 ('Z1Flav', 0.0020648864312462815),
 ('JetEta(JetPt|3)', 0.000804955727434991)]

In [179]:
%%capture
fig, implist = plot_variables([("ZHMET", "WHh")])

In [180]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [181]:
append_variables(confhandler, impdict, threshold_fscore)

In [182]:
implist

[('PFMET', 0.1514353016329301),
 ('ZZPt', 0.09003074220465525),
 ('ZZEta', 0.07066714576595999),
 ('ZZMass', 0.06847127400487084),
 ('JetPt(JetPt|0)', 0.06639517706711383),
 ('JetEta(JetPt|0)', 0.06515750389268175),
 ('Z1Pt', 0.06419930530602468),
 ('ZZPhi', 0.05876951331496786),
 ('Z2Pt', 0.05493671896833952),
 ('ZZMassErr', 0.05333972132391105),
 ('Z2Mass', 0.04771030462730067),
 ('Z1Mass', 0.0461133069828722),
 ('JetPhi(JetPt|0)', 0.03880704275961193),
 ('JetPt(JetPt|1)', 0.03281830159300515),
 ('JetPhi(JetPt|1)', 0.02171916796422725),
 ('nCleanedJetsPt30', 0.017886373617598914),
 ('JetEta(JetPt|1)', 0.01373417974208488),
 ('JetPt(JetPt|2)', 0.010699884217670779),
 ('JetPhi(JetPt|2)', 0.006028666107717491),
 ('JetPt(JetPt|3)', 0.005988741166606779),
 ('Z2Flav', 0.004910767756617559),
 ('Z1Flav', 0.003912644228849763),
 ('JetEta(JetPt|2)', 0.003792869405517627),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0009581985866570847),
 ('JetEta(JetPt|3)', 0.0008384237633249491),
 ('JetPhi(JetPt|3)

In [183]:
%%capture
fig, implist = plot_variables([("ZHMET", "ttHh")])

In [184]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ttHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [185]:
append_variables(confhandler, impdict, threshold_fscore)

In [186]:
implist

[('PFMET', 0.21936131081758903),
 ('JetPt(JetPt|0)', 0.09530178899849523),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0877779635512456),
 ('ZZMass', 0.06520648720949673),
 ('JetPt(JetPt|1)', 0.05952181909379702),
 ('JetEta(JetPt|0)', 0.0540043470991473),
 ('nCleanedJetsPt30', 0.05300117037284735),
 ('JetPt(JetPt|2)', 0.050660424678147464),
 ('JetPt(JetPt|3)', 0.035779969904698214),
 ('ExtraLepPt(ExtraLepPt|0)', 0.030931282394248454),
 ('ZZPt', 0.024076241431198797),
 ('ZZEta', 0.023909045310148806),
 ('Z2Pt', 0.022738672462798863),
 ('Z2Mass', 0.021735495736498914),
 ('JetEta(JetPt|1)', 0.02123390737334894),
 ('Z1Pt', 0.02023073064704899),
 ('Z1Mass', 0.019227553920749037),
 ('ZZPhi', 0.01822437719444909),
 ('JetPhi(JetPt|0)', 0.018057181073399098),
 ('ZZMassErr', 0.017555592710249122),
 ('JetEta(JetPt|2)', 0.016886808226049155),
 ('JetPhi(JetPt|1)', 0.007691021568299616),
 ('Z2Flav', 0.007356629326199632),
 ('ExtraLepPhi(ExtraLepPt|0)', 0.0051830797525497406),
 ('JetPhi(JetPt|3)', 0.002173

In [187]:
%%capture
fig, implist = plot_variables([("ZHMET", "ttHl")])

In [188]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ZHMET_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [189]:
append_variables(confhandler, impdict, threshold_fscore)

In [190]:
implist

[('ZZMass', 0.2171469111547697),
 ('PFMET', 0.08366077275703995),
 ('Z1Mass', 0.07023575638506876),
 ('JetPt(JetPt|0)', 0.0504256712508186),
 ('Z2Mass', 0.04927963326784545),
 ('nCleanedJetsPt30', 0.04911591355599214),
 ('ExtraLepPt(ExtraLepPt|0)', 0.04595066579349487),
 ('ZZEta', 0.04480462781052172),
 ('nExtraLep', 0.041803099759877754),
 ('JetPt(JetPt|3)', 0.04060248853962017),
 ('JetEta(JetPt|0)', 0.03623662955686531),
 ('Z2Pt', 0.03487229862475442),
 ('Z1Pt', 0.03137961143855054),
 ('ZZMassErr', 0.02941497489631085),
 ('JetPt(JetPt|1)', 0.025267408862693734),
 ('ZZPt', 0.022975332896747434),
 ('JetPhi(JetPt|0)', 0.02281161318489413),
 ('ZZPhi', 0.021283562540929928),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.020792403405370006),
 ('JetPt(JetPt|2)', 0.01413446845666885),
 ('JetEta(JetPt|2)', 0.009495743287491814),
 ('JetEta(JetPt|1)', 0.009277450338354072),
 ('ExtraLepEta(ExtraLepPt|0)', 0.008185985592665358),
 ('JetPhi(JetPt|1)', 0.008185985592665358),
 ('Z2Flav', 0.004365858982754857

In [191]:
%%capture
fig, implist = plot_variables([("ttHh", "ggH")])

In [192]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [193]:
append_variables(confhandler, impdict, threshold_fscore)

In [194]:
implist

[('JetEta(JetPt|0)', 0.08755695263659803),
 ('JetEta(JetPt|1)', 0.08335393635432486),
 ('JetPt(JetPt|0)', 0.07982198989863314),
 ('PFMET', 0.06689506587080139),
 ('ZZMass', 0.061809062974605304),
 ('ZZPt', 0.06149118779359305),
 ('JetPt(JetPt|1)', 0.06014904814043019),
 ('Z1Pt', 0.05068343163917635),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.049800445025253416),
 ('JetPt(JetPt|2)', 0.04121781513792251),
 ('JetPhi(JetPt|0)', 0.03782714654045845),
 ('ZZEta', 0.037120757249320104),
 ('JetEta(JetPt|2)', 0.031787518101225584),
 ('JetPt(JetPt|3)', 0.029633030763253628),
 ('ZZPhi', 0.028997280401229116),
 ('Z2Pt', 0.028326210574647688),
 ('Z1Mass', 0.025182778229082048),
 ('JetPhi(JetPt|1)', 0.024335111079716032),
 ('Z2Mass', 0.021121039805036556),
 ('ZZMassErr', 0.021015081411365804),
 ('nCleanedJetsPt30', 0.016423551018966552),
 ('JetEta(JetPt|3)', 0.014657577791120686),
 ('JetPhi(JetPt|2)', 0.010454561508847526),
 ('ExtraLepPt(ExtraLepPt|0)', 0.006357503620245117),
 ('JetPhi(JetPt|3)', 0.00550

In [195]:
%%capture
fig, implist = plot_variables([("ttHh", "VBF")])

In [196]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [197]:
append_variables(confhandler, impdict, threshold_fscore)

In [198]:
implist

[('JetEta(JetPt|0)', 0.15609767868346122),
 ('JetEta(JetPt|1)', 0.1453597799334009),
 ('JetEta(JetPt|2)', 0.08155977028135708),
 ('JetPt(JetPt|0)', 0.04965976545533517),
 ('PFMET', 0.04314463587664688),
 ('ZZMass', 0.04268616379518363),
 ('ZZEta', 0.04082814535977993),
 ('JetPt(JetPt|1)', 0.0383910042951595),
 ('JetPt(JetPt|2)', 0.033975194247381886),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.029848945514212636),
 ('Z1Pt', 0.02963177452825636),
 ('JetPhi(JetPt|1)', 0.028497659379373583),
 ('Z2Mass', 0.028449399160272188),
 ('ZZMassErr', 0.02524009459002944),
 ('JetPt(JetPt|3)', 0.025071183823174557),
 ('JetPhi(JetPt|0)', 0.024347280536653637),
 ('Z1Mass', 0.023719897688335503),
 ('JetEta(JetPt|3)', 0.023695767578784808),
 ('ZZPt', 0.023237295497321557),
 ('ZZPhi', 0.021934269581583902),
 ('nCleanedJetsPt30', 0.021258626514164373),
 ('Z2Pt', 0.020076251146180203),
 ('JetPhi(JetPt|2)', 0.013175039814680758),
 ('ExtraLepPt(ExtraLepPt|0)', 0.007914675932628734),
 ('JetPhi(JetPt|3)', 0.00733555

In [199]:
%%capture
fig, implist = plot_variables([("ttHh", "WHh")])

In [200]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [201]:
append_variables(confhandler, impdict, threshold_fscore)

In [202]:
implist

[('PFMET', 0.07829557157569517),
 ('JetPt(JetPt|0)', 0.0782183316168898),
 ('JetPt(JetPt|1)', 0.06954170957775489),
 ('ZZPt', 0.06220391349124614),
 ('ZZMass', 0.059500514933058704),
 ('Z1Pt', 0.05682286302780638),
 ('JetEta(JetPt|0)', 0.0493048403707518),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.04338311019567456),
 ('JetPhi(JetPt|1)', 0.04111740473738414),
 ('ZZEta', 0.04026776519052523),
 ('JetEta(JetPt|1)', 0.038568486096807415),
 ('ZZMassErr', 0.038233779608650875),
 ('Z2Pt', 0.03748712667353244),
 ('JetPhi(JetPt|0)', 0.03532440782698249),
 ('JetPt(JetPt|2)', 0.0349124613800206),
 ('ZZPhi', 0.033522142121524205),
 ('Z2Mass', 0.030844490216271885),
 ('JetEta(JetPt|2)', 0.026390319258496397),
 ('Z1Mass', 0.026261585993820804),
 ('JetPt(JetPt|3)', 0.02219361483007209),
 ('JetEta(JetPt|3)', 0.021652935118434604),
 ('nCleanedJetsPt30', 0.019773429454170956),
 ('JetPhi(JetPt|2)', 0.018202883625128733),
 ('JetPhi(JetPt|3)', 0.009732234809474768),
 ('ExtraLepPt(ExtraLepPt|0)', 0.008547888774

In [203]:
%%capture
fig, implist = plot_variables([("ttHh", "ttHl")])

In [204]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHh_ttHl_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [205]:
append_variables(confhandler, impdict, threshold_fscore)

In [206]:
implist

[('PFMET', 0.10745045138528588),
 ('ZZMass', 0.08166441838746498),
 ('Z1Mass', 0.05997717131887517),
 ('ZZMassErr', 0.04498287848915638),
 ('ZZPt', 0.04353014423575802),
 ('JetPt(JetPt|3)', 0.04197364324997406),
 ('JetPt(JetPt|0)', 0.039016291376984535),
 ('JetPhi(JetPt|2)', 0.0369150150461762),
 ('nCleanedJetsPt30', 0.03587734772232022),
 ('Z2Pt', 0.03468403029988586),
 ('ExtraLepPt(ExtraLepPt|0)', 0.03390577980699388),
 ('JetEta(JetPt|1)', 0.03367230465912628),
 ('JetPhi(JetPt|1)', 0.033542596243644286),
 ('JetPhi(JetPt|0)', 0.03336100446196949),
 ('Z1Pt', 0.03237522050430632),
 ('ZZEta', 0.03227145377192072),
 ('JetPt(JetPt|2)', 0.030066410708726784),
 ('JetPt(JetPt|1)', 0.029884818927051988),
 ('JetEta(JetPt|0)', 0.0293919269482204),
 ('Z2Mass', 0.029158451800352807),
 ('ZZPhi', 0.028042959427207637),
 ('JetPhi(JetPt|3)', 0.025500674483760505),
 ('JetEta(JetPt|2)', 0.024463007159904536),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.020364221230673447),
 ('JetEta(JetPt|3)', 0.0182629448998

In [207]:
%%capture
fig, implist = plot_variables([("ttHl", "ggH")])

In [208]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_ggH_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [209]:
append_variables(confhandler, impdict, threshold_fscore)

In [210]:
implist

[('PFMET', 0.14102265673494724),
 ('ZZMass', 0.14003336436995656),
 ('JetPt(JetPt|0)', 0.06021104903786468),
 ('ZZPt', 0.059027777777777776),
 ('Z1Mass', 0.05619568590937306),
 ('Z1Pt', 0.045837212911235256),
 ('ExtraLepPt(ExtraLepPt|0)', 0.044964307883302294),
 ('Z2Pt', 0.04422718808193669),
 ('ZZMassErr', 0.04281114214773433),
 ('JetPt(JetPt|1)', 0.038524208566108005),
 ('JetEta(JetPt|0)', 0.03454764121663563),
 ('ZZPhi', 0.031133612662942272),
 ('ZZEta', 0.030609869646182494),
 ('Z2Mass', 0.028553693358162633),
 ('JetEta(JetPt|1)', 0.028456703910614525),
 ('nCleanedJetsPt30', 0.02281191806331471),
 ('JetPhi(JetPt|0)', 0.02193901303538175),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.02067815021725636),
 ('nExtraLep', 0.01864137181874612),
 ('JetPt(JetPt|2)', 0.015169149596523898),
 ('JetPhi(JetPt|1)', 0.012666821849782744),
 ('JetEta(JetPt|2)', 0.011386561142147734),
 ('JetPt(JetPt|3)', 0.011153786468032278),
 ('ExtraLepEta(ExtraLepPt|0)', 0.010824022346368716),
 ('JetEta(JetPt|3)', 0.010

In [211]:
%%capture
fig, implist = plot_variables([("ttHl", "VBF")])

In [212]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_VBF_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [213]:
append_variables(confhandler, impdict, threshold_fscore)

In [214]:
implist

[('ZZMass', 0.10692982272546517),
 ('JetEta(JetPt|0)', 0.10672052575399234),
 ('PFMET', 0.099206764478118),
 ('JetEta(JetPt|1)', 0.07777475459930094),
 ('Z1Mass', 0.0449988488666569),
 ('ZZMassErr', 0.042738441574750416),
 ('ExtraLepPt(ExtraLepPt|0)', 0.041105925197262395),
 ('JetEta(JetPt|2)', 0.03867808032817765),
 ('JetPt(JetPt|0)', 0.03746415789363528),
 ('ZZEta', 0.03581071181899998),
 ('JetPt(JetPt|1)', 0.03482701605307771),
 ('ZZPt', 0.034680508173046735),
 ('JetPt(JetPt|2)', 0.032378241486845684),
 ('Z2Pt', 0.028903911760396826),
 ('Z2Mass', 0.026224910525544695),
 ('JetPhi(JetPt|0)', 0.025973754159777308),
 ('ZZPhi', 0.02589003537118818),
 ('Z1Pt', 0.024864480210971346),
 ('nCleanedJetsPt30', 0.020908767450134997),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.020908767450134997),
 ('ExtraLepEta(ExtraLepPt|0)', 0.020385525021452938),
 ('JetPhi(JetPt|1)', 0.019422758952677956),
 ('JetPt(JetPt|3)', 0.014169404968710103),
 ('JetPhi(JetPt|2)', 0.009606730990602566),
 ('JetEta(JetPt|3)', 0

In [215]:
%%capture
fig, implist = plot_variables([("ttHl", "WHh")])

In [216]:
impdict = {convert_varname(entry[0]): [entry[1]] for entry in implist}
impdict["discriminant"] = "D_ttHl_WHh_ML"
df = df.append(pd.DataFrame.from_dict(impdict))

In [217]:
append_variables(confhandler, impdict, threshold_fscore)

In [218]:
implist

[('ZZMass', 0.17309507754551584),
 ('PFMET', 0.1463924477410654),
 ('ExtraLepPt(ExtraLepPt|0)', 0.07835468644639244),
 ('ZZPt', 0.062306136210384354),
 ('nCleanedJetsPt30BTagged_bTagSF', 0.0602832097100472),
 ('JetPt(JetPt|0)', 0.052663519892110586),
 ('ZZMassErr', 0.045853000674308836),
 ('Z1Mass', 0.03742414025623736),
 ('JetPt(JetPt|1)', 0.027983816587997302),
 ('ZZEta', 0.026163182737693864),
 ('JetEta(JetPt|0)', 0.025354012137559),
 ('Z1Pt', 0.022184760620364126),
 ('Z2Mass', 0.021780175320296694),
 ('Z2Pt', 0.02164531355360755),
 ('nCleanedJetsPt30', 0.02157788267026298),
 ('JetEta(JetPt|1)', 0.021375590020229266),
 ('JetPt(JetPt|2)', 0.020903573836817263),
 ('JetPhi(JetPt|3)', 0.01861092380310182),
 ('JetPhi(JetPt|1)', 0.01638570465273095),
 ('JetPt(JetPt|3)', 0.016115981119352665),
 ('JetPhi(JetPt|2)', 0.01409305461901551),
 ('JetPhi(JetPt|0)', 0.012879298718813216),
 ('ZZPhi', 0.011867835468644639),
 ('nExtraLep', 0.010654079568442347),
 ('Z1Flav', 0.007552258934592044),
 ('Ex

In [219]:
# save the variable configuration
confhandler.save_configuration(out_path)

In [220]:
df = df.fillna(0.0)

In [221]:
df.to_csv("input_parameters_table.csv")

In [222]:
# now plot the data contained in the table to have a global picture of the relevant input variables
datacol_labels = [col for col in df.columns.tolist() if col != "discriminant"]
variable_data = df[datacol_labels].as_matrix().transpose()
datacol_labels = np.concatenate([[''], np.array(datacol_labels)])

In [223]:
discriminant_labels = np.concatenate([[''], df["discriminant"].as_matrix()])

In [224]:
fig = plt.figure(figsize = (15, 10))
ax = fig.add_subplot(111)
cax = ax.matshow(variable_data, interpolation = 'nearest', cmap = 'Blues', vmin = np.min(variable_data), vmax = np.max(variable_data))
ax.set_xticklabels(discriminant_labels, rotation = 'vertical')
ax.set_yticklabels(datacol_labels)
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

In [225]:
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "input_variables_exclusive_fullmassrange.pdf"))