In [1]:
import pandas as pd
import numpy as np
from bff_processor.data_tools import regex_select, get_files, make_df
from bff_processor.utils import time_func, hist2unc, linear, parabola, heaviside, significance, power_func, apply_multiple_filters
from bff_processor.bff_meta import preselection, band_cut, isin, identity, all_reg, weights, jet_variations
from bff_processor.plotting_utils import produce_bff_hists, boost_plot, boost_plot2d, unc_plot
from bff_processor.bff_cuts import bff_1, bff_2, reg_filter
from bff_processor.sig_op_func import *
import boost_histogram as bh
import matplotlib.pyplot as plt
import mplhep as hep
hep.set_style(hep.style.CMS)
import dill as pickle

In [2]:
plt.rcParams.update({
    "text.usetex": True,
    })

In [3]:
# get <era> files
era = 2016
file_re = regex_select(era)
file_dict = get_files(file_re)

In [4]:
#make df of background
backgrounds = file_dict['DY']+file_dict['TT']+file_dict['ST']+file_dict['VB']
background_df = preselection()(make_df(backgrounds))

In [5]:
#make df of signals
import re
bff_dict = {}
for fname in file_dict['BFF']:
    #print(fname)
    name = re.findall('_M_([0-9]+)_dbs(\d)p(\d+)', fname)[0]
    name = "{} {}.{}".format(*name)
    #print(name)
    if name not in bff_dict:
        bff_dict[name] = []
    bff_dict[name].append(fname)
for name, item in bff_dict.items():
    if len(item) != 1: continue
    print(item)
    bff_dict[name] = preselection()(make_df(item))
bff_dict.keys()

['data/tw_2016_BFFZprimeToMuMu_M_500_dbs0p04.csv']
['data/tw_2016_BFFZprimeToMuMu_M_250_dbs0p04.csv']
['data/tw_2016_BFFZprimeToMuMu_M_200_dbs0p04.csv']
['data/tw_2016_BFFZprimeToMuMu_M_500_dbs1p0.csv']
['data/tw_2016_BFFZprimeToMuMu_M_200_dbs0p5.csv']
['data/tw_2016_BFFZprimeToMuMu_M_175_dbs0p04.csv']
['data/tw_2016_BFFZprimeToMuMu_M_350_dbs0p04.csv']
['data/tw_2016_BFFZprimeToMuMu_M_500_dbs0p5.csv']
['data/tw_2016_BFFZprimeToMuMu_M_125_dbs0p04.csv']
['data/tw_2016_BFFZprimeToMuMu_M_200_dbs1p0.csv']
['data/tw_2016_BFFZprimeToMuMu_M_350_dbs1p0.csv']
['data/tw_2016_BFFZprimeToMuMu_M_350_dbs0p5.csv']
['data/tw_2016_BFFZprimeToMuMu_M_150_dbs0p04.csv']
['data/tw_2016_BFFZprimeToMuMu_M_400_dbs0p04.csv']
['data/tw_2016_BFFZprimeToMuMu_M_300_dbs0p04.csv']


dict_keys(['500 0.04', '250 0.04', '200 0.04', '500 1.0', '200 0.5', '175 0.04', '350 0.04', '500 0.5', '125 0.04', '200 1.0', '350 1.0', '350 0.5', '150 0.04', '400 0.04', '300 0.04'])

In [7]:
reg_filter = lambda df, reg: df[df[reg]==1]

In [13]:
all_reg, weights, jet_variations

(['SR1', 'SR2', 'CR10', 'CR20', 'CR13', 'CR23', 'CR14', 'CR24'],
 [['Weight_PuUp', 'Weight_PuDown'],
  ['Weight_BTagUp', 'Weight_BTagDown'],
  ['Weight_PUIDUp', 'Weight_PUIDDown'],
  ['Weight_PDF_ISRFSR_Up', 'Weight_PDF_ISRFSR_Down'],
  ['Weight_MuonSFUp', 'Weight_MuonSFDown'],
  ['Weight_ElectronSFUp', 'Weight_ElectronSFDown']],
 [['jerUp', 'jerDown'], ['jesUp', 'jesDown']])

In [15]:
for reg in all_reg:
    for jv in ['nom']:
        reg_nom = '{}_{}'.format(reg, jv)
        df = bff_1(reg_filter(background_df, reg_nom), jv)
        hist_1d_dict, hist_2d_dict = produce_bff_hists(df, 'test', columns, weight='Weight')
        print(reg)
        print(hist_1d_dict['DiLepMass'])
        fix = 
    break

SR1
{'DiLepMass': Histogram(Regular(139, 105, 800, metadata='DiLepMass test'), storage=Weight()) # Sum: WeightedSum(value=5984.71, variance=33728.7), 'TMB_nom': Histogram(Regular(80, 0, 800, metadata='TMB_nom test'), storage=Weight()) # Sum: WeightedSum(value=5984.65, variance=33728.7) (WeightedSum(value=5984.71, variance=33728.7) with flow), 'HTLT_nom': Histogram(Regular(100, -500, 500, metadata='HTLT_nom test'), storage=Weight()) # Sum: WeightedSum(value=5974.43, variance=33712.6) (WeightedSum(value=5984.71, variance=33728.7) with flow), 'RelMET_nom': Histogram(Regular(100, 0, 1, metadata='RelMET_nom test'), storage=Weight()) # Sum: WeightedSum(value=5984.71, variance=33728.7)}


In [None]:
def boost_plot(ax, bh, **kwargs):
    val, var = bh.values(), bh.variances()
    center = bh.axes[0].centers
    ax.errorbar(center, val, yerr=var**.5, **kwargs)
    
def boost_plot2d(ax, h, lock_aspect=0, log=0, min_val=.1, **kwargs):
    w, x, y = h.to_numpy()
    # Draw the count matrix
    if not log:
        ax.pcolormesh(x, y, w.T)
    else:
        import matplotlib.colors as colors
        vmin = max(w.T.min(),min_val)
        vmax = w.T.max()
        ax.pcolor(x, y, w.T,
                   norm=colors.LogNorm(vmin=vmin, vmax=vmax),
                   cmap='PuBu_r', shading='auto')
    ax.set_xlabel(h.axes[0].metadata)
    ax.set_ylabel(h.axes[1].metadata)
    if lock_aspect: ax.set_aspect("equal")