In [1]:
from __future__ import division, print_function

import argparse
import json
import logging
import math
import pickle as pkl
import warnings

import numpy as np
import pandas as pd
import rhalphalib as rl
from utils import blindBins, get_template, labels, samples, shape_to_num, sigs

rl.ParametericSample.PreferRooParametricHist = False
logging.basicConfig(level=logging.INFO)

warnings.filterwarnings("ignore", message="Found duplicate branch ")
pd.set_option("mode.chained_assignment", None)

In [4]:
! ls templates/v1

hists_templates_Run2_ele.pkl model_Run2_ele.pkl
hists_templates_Run2_mu.pkl


In [10]:
years = ["2018", "2017", "2016", "2016APV"]
channels = ["ele", "mu"]

In [99]:
# with open(f"templates/test/hists_templates_Run2_ele.pkl", "rb") as f:
#     hists_templates = pkl.load(f)
    
with open(f"templates/v1/hists_templates_Run2.pkl", "rb") as f:
    hists_templates = pkl.load(f)    

In [100]:
hists_templates["pass"].axes

(StrCategory(['DYJets', 'WJetsLNu', 'VBF', 'VH', 'TTbar', 'SingleTop', 'ggF', 'Data', 'Diboson', 'WZQQ', 'ttH', 'QCD'], growth=True, name='samples'),
 StrCategory(['nominal', 'weight_pileupUp', 'weight_pileupDown', 'weight_isolationUp', 'weight_isolationDown', 'weight_idUp', 'weight_idDown', 'weight_reco_eleUp', 'weight_reco_eleDown', 'weight_L1PrefiringUp', 'weight_L1PrefiringDown', 'weight_trigger_eleUp', 'weight_trigger_eleDown', 'weight_trigger_iso_muUp', 'weight_trigger_iso_muDown', 'weight_trigger_noniso_muUp', 'weight_trigger_noniso_muDown', 'weight_aS_weightUp', 'weight_aS_weightDown', 'weight_UEPS_FSRUp', 'weight_UEPS_FSRDown', 'weight_UEPS_ISRUp', 'weight_UEPS_ISRDown', 'weight_PDF_weightUp', 'weight_PDF_weightDown', 'weight_PDFaS_weightUp', 'weight_PDFaS_weightDown', 'weight_scalevar_3ptUp', 'weight_scalevar_3ptDown', 'weight_scalevar_7ptUp', 'weight_scalevar_7ptDown', 'weight_d1kappa_EWUp', 'weight_d1kappa_EWDown', 'weight_d1K_NLOUp', 'weight_d1K_NLODown', 'weight_d2K_NLOUp

In [75]:
    LUMI = {}
    for year in years:
        LUMI[year] = 0.0
        for lep_ch in channels:
            with open("../fileset/luminosity.json") as f:
                LUMI[year] += json.load(f)[lep_ch][year]
        LUMI[year] /= len(channels)

    # get the LUMI covered in the templates
    full_lumi = 0
    for year_ in years:
        full_lumi += LUMI[year_]

    ptbins = hists_templates["pass"].axes[2].edges
    npt = len(ptbins) - 1

    massbins = hists_templates["pass"].axes[3].edges
    mass = rl.Observable("mass_observable", massbins)

    # here we derive these all at once with 2D array
    ptpts, masspts = np.meshgrid(ptbins[:-1] + 0.3 * np.diff(ptbins), massbins[:-1] + 0.5 * np.diff(massbins), indexing="ij")

#     rhopts = 2 * np.log(masspts / ptpts)
    pt_scaled = (ptpts - ptbins[0]) / (ptbins[-1] - ptbins[0])
    # define mass_scaled in a similar way
    
    
#     rho_scaled = (rhopts - (-6)) / ((-2.1) - (-6))  # TODO: understand why

#     validbins = (rho_scaled >= 0) & (rho_scaled <= 1)
#     rho_scaled[~validbins] = 1  # we will mask these out later

In [86]:
list(hists_templates["pass"].axes["categories"])

['ggF_pt200to300', 'ggF_pt300toinf', 'VBF_pt200toinf']

In [98]:
list(hists_templates["pass"].axes["categories"])

['ggF_pt200to300', 'ggF_pt300toinf', 'VBF_pt200toinf']

In [89]:
hists_templates["pass"].axes["mass_observable"].name

'mass_observable'

In [67]:
categ = {
    "VBF_pt200toinf": (),
    "ggF_pt200to300": (200, 300),
    "ggF_pt300toinf": (300, "inf"),
}

In [70]:
for cat in categ:
    if len(categ[cat]) == 0:
        print(cat, "1 bin")
        npt = 1
        
    else:
         
    
    if "inf" in categ[cat]:
        print(cat)

1 bin VBF_pt200toinf
ggF_pt300toinf


In [66]:
len(categ["VBF_pt200toinf"])

0

In [None]:
# for vbf
# 

In [60]:
pt_scaled

array([[0.1       , 0.1       , 0.1       , 0.1       , 0.1       ,
        0.1       , 0.1       , 0.1       , 0.1       ],
       [0.43333333, 0.43333333, 0.43333333, 0.43333333, 0.43333333,
        0.43333333, 0.43333333, 0.43333333, 0.43333333],
       [0.76666667, 0.76666667, 0.76666667, 0.76666667, 0.76666667,
        0.76666667, 0.76666667, 0.76666667, 0.76666667]])

In [45]:
ptbins    # 

array([ 200.,  300.,  450., 2000.])

In [46]:
npt

3

In [47]:
ptpts   # choice to be just 0.3 above

array([[230., 230., 230., 230., 230., 230., 230., 230., 230.],
       [345., 345., 345., 345., 345., 345., 345., 345., 345.],
       [915., 915., 915., 915., 915., 915., 915., 915., 915.]])

In [48]:
masspts

array([[ 60.,  80., 100., 120., 140., 160., 180., 200., 220.],
       [ 60.,  80., 100., 120., 140., 160., 180., 200., 220.],
       [ 60.,  80., 100., 120., 140., 160., 180., 200., 220.]])

In [49]:
massbins

array([ 50.,  70.,  90., 110., 130., 150., 170., 190., 210., 230.])

In [50]:
rhopts

array([[-2.68746949, -2.11210535, -1.66581825, -1.30117513, -0.99287377,
        -0.72581099, -0.49024492, -0.27952388, -0.08890353],
       [-3.49839971, -2.92303556, -2.47674846, -2.11210535, -1.80380399,
        -1.5367412 , -1.30117513, -1.0904541 , -0.89983374],
       [-5.44915901, -4.87379486, -4.42750776, -4.06286464, -3.75456329,
        -3.4875005 , -3.25193443, -3.0412134 , -2.85059304]])

array([[0.01666667, 0.01666667, 0.01666667, 0.01666667, 0.01666667,
        0.01666667, 0.01666667, 0.01666667, 0.01666667],
       [0.08055556, 0.08055556, 0.08055556, 0.08055556, 0.08055556,
        0.08055556, 0.08055556, 0.08055556, 0.08055556],
       [0.39722222, 0.39722222, 0.39722222, 0.39722222, 0.39722222,
        0.39722222, 0.39722222, 0.39722222, 0.39722222]])

In [33]:
hists_templates["pass"].axes[2][0].replace("_", "-")

'ggF-pt200to300'

In [12]:
    LUMI = {}
    for year in years:
        LUMI[year] = 0.0
        for lep_ch in channels:
            with open("../fileset/luminosity.json") as f:
                LUMI[year] += json.load(f)[lep_ch][year]
        LUMI[year] /= len(channels)

    # get the LUMI covered in the templates
    full_lumi = 0
    for year_ in years:
        full_lumi += LUMI[year_]

    ptbins = hists_templates["pass"].axes[2].edges
    npt = len(ptbins) - 1

    massbins = hists_templates["pass"].axes[3].edges
    mass = rl.Observable("mass_observable", massbins)

    # here we derive these all at once with 2D array
    ptpts, masspts = np.meshgrid(ptbins[:-1] + 0.3 * np.diff(ptbins), massbins[:-1] + 0.5 * np.diff(massbins), indexing="ij")

    rhopts = 2 * np.log(masspts / ptpts)
    pt_scaled = (ptpts - ptbins[0]) / (ptbins[-1] - ptbins[0])
    rho_scaled = (rhopts - (-6)) / ((-2.1) - (-6))  # TODO: understand why

    validbins = (rho_scaled >= 0) & (rho_scaled <= 1)
    rho_scaled[~validbins] = 1  # we will mask these out later


In [13]:
ptbins

array([ 200.,  300.,  450., 2000.])

In [14]:
npt

3

In [15]:
ptpts

array([[230., 230., 230., 230., 230., 230., 230., 230., 230.],
       [345., 345., 345., 345., 345., 345., 345., 345., 345.],
       [915., 915., 915., 915., 915., 915., 915., 915., 915.]])

In [16]:
masspts

array([[ 60.,  80., 100., 120., 140., 160., 180., 200., 220.],
       [ 60.,  80., 100., 120., 140., 160., 180., 200., 220.],
       [ 60.,  80., 100., 120., 140., 160., 180., 200., 220.]])

In [17]:
pt_scaled

array([[0.01666667, 0.01666667, 0.01666667, 0.01666667, 0.01666667,
        0.01666667, 0.01666667, 0.01666667, 0.01666667],
       [0.08055556, 0.08055556, 0.08055556, 0.08055556, 0.08055556,
        0.08055556, 0.08055556, 0.08055556, 0.08055556],
       [0.39722222, 0.39722222, 0.39722222, 0.39722222, 0.39722222,
        0.39722222, 0.39722222, 0.39722222, 0.39722222]])