# Make datacard


In [1]:
from typing import Dict, List, Tuple, Union

import json
import logging
from collections import OrderedDict

In [3]:
from __future__ import print_function, division
import sys
import os
import rhalphalib as rl
import numpy as np
import scipy.stats
rl.util.install_roofit_helpers()
rl.ParametericSample.PreferRooParametricHist = False

In [4]:
CMS_PARAMS_LABEL = "CMS_HWW_semileptonic_boosted"

In [5]:
year = "2017"   # can give "all"
ch = "mu"

with open("../fileset/luminosity.json") as f:
    LUMI = json.load(f)[ch]

if year != "all":
    years = [year]
    full_lumi = LUMI[year]
else:
    years = ["2016", "2016APV", "2017", "2018"]
    full_lumi = np.sum(list(LUMI.values()))

full_lumi

41475.26

In [6]:
# systematics that are NOT necessarily stored in the parquets
sys_dict = {
    f"lumi_13TeV_{year}": rl.NuisanceParameter(f'CMS_lumi_13TeV_{year}', 'lnN'),
    "BR_hww": rl.NuisanceParameter(f'BR_hww', 'lnN'),
}

sys_dict_values = {
    f"lumi_13TeV_{year}": (1.02 ** (LUMI["2017"] / full_lumi), None),
    "BR_hww": (1.0153, 0.9848)
}

# systematics that ARE stored in the parquets
sys_from_parquets = {
    "mu": {
        "all_samples": {
            "weight_mu_btagSFlight_2017": rl.NuisanceParameter(f'{CMS_PARAMS_LABEL}_btagSFlight_{year}', 'lnN'),
            "weight_mu_btagSFlight_correlated": rl.NuisanceParameter(f'{CMS_PARAMS_LABEL}_btagSFlight_correlated', 'lnN'),
            "weight_mu_btagSFbc_2017": rl.NuisanceParameter(f'{CMS_PARAMS_LABEL}_btagSFbc_{year}', 'lnN'),
            "weight_mu_btagSFbc_correlated": rl.NuisanceParameter(f'{CMS_PARAMS_LABEL}_btagSFbc_correlated', 'lnN'),
            "weight_mu_pileup": rl.NuisanceParameter(f'{CMS_PARAMS_LABEL}_PU_{year}', 'shape'),
            "weight_mu_isolation_muon": rl.NuisanceParameter(f'CMS_mu_iso_{year}', 'lnN'),
            "weight_mu_id_muon": rl.NuisanceParameter(f'CMS_mu_id_{year}', 'lnN'),
            "weight_mu_L1Prefiring": rl.NuisanceParameter(f'CMS_L1Prefiring_{year}', 'lnN'),
            "weight_mu_trigger_iso_muon": rl.NuisanceParameter('CMS_btagSF', 'lnN'),
            "weight_mu_trigger_noniso_muon": rl.NuisanceParameter(f'{CMS_PARAMS_LABEL}_mu_trigger_{year}', 'lnN'),
        },
        "HWW": {
            "weight_mu_UEPS_FSR": rl.NuisanceParameter('UEPS_FSR_ggF', 'lnN'),
            "weight_mu_UEPS_ISR": rl.NuisanceParameter('UEPS_ISR_ggF', 'lnN'),
            "weight_mu_PDF_weight": rl.NuisanceParameter('pdf_Higgs_ggF', 'lnN'),
            "weight_mu_PDFaS_weight": rl.NuisanceParameter('pdfAS_Higgs_ggF', 'lnN'),
            "weight_mu_scalevar_3pt": rl.NuisanceParameter(f'{CMS_PARAMS_LABEL}_scale_pt_3_{year}', 'lnN'),
            "weight_mu_scalevar_7pt": rl.NuisanceParameter(f'{CMS_PARAMS_LABEL}_scale_pt_7_{year}', 'lnN'),
        },
        "WJetsLNu": {
            
        },
        "TTbar": {
            
        }
    },
}

In [7]:
def shape_to_num(var, nom, clip=1.5):
    nom_rate = np.sum(nom)
    var_rate = np.sum(var)

    if abs(var_rate/nom_rate) > clip:
        var_rate = clip*nom_rate

    if var_rate < 0:
        var_rate = 0

    return var_rate/nom_rate

def get_template(sample, ptbin):
    return (
        hists_templates["cat1_sr"][{"samples": sample, "systematic": "Nominal", "fj_pt": ptbin}].values(),
        massbins,
        "reco_higgs_m"
    )

# Load template

In [8]:
import pickle as pkl
with open("hists_templates.pkl", "rb") as f:
    hists_templates = pkl.load(f)

In [9]:
hists_templates

{'cat1_sr': Hist(
   StrCategory(['WJetsLNu', 'TTbar', 'HWW', 'QCD', 'Data'], growth=True, name='samples'),
   StrCategory(['Nominal', 'weight_mu_btagSFlight_2017Up', 'weight_mu_btagSFlight_2017Down', 'weight_mu_btagSFlight_correlatedUp', 'weight_mu_btagSFlight_correlatedDown', 'weight_mu_btagSFbc_2017Up', 'weight_mu_btagSFbc_2017Down', 'weight_mu_btagSFbc_correlatedUp', 'weight_mu_btagSFbc_correlatedDown', 'weight_mu_pileupUp', 'weight_mu_pileupDown', 'weight_mu_isolation_muonUp', 'weight_mu_isolation_muonDown', 'weight_mu_id_muonUp', 'weight_mu_id_muonDown', 'weight_mu_L1PrefiringUp', 'weight_mu_L1PrefiringDown', 'weight_mu_trigger_iso_muonUp', 'weight_mu_trigger_iso_muonDown', 'weight_mu_trigger_noniso_muonUp', 'weight_mu_trigger_noniso_muonDown', 'weight_mu_d1K_NLOUp', 'weight_mu_d1K_NLODown', 'weight_mu_d2K_NLOUp', 'weight_mu_d2K_NLODown', 'weight_mu_d3K_NLOUp', 'weight_mu_d3K_NLODown', 'weight_mu_d1kappa_EWUp', 'weight_mu_d1kappa_EWDown', 'weight_mu_W_d2kappa_EWUp', 'weight_mu_W_

# Create datacard

In [10]:
ptbins = hists_templates["cat1_sr"].axes[2].edges
npt = len(ptbins) - 1

massbins = hists_templates["cat1_sr"].axes[3].edges
mass = rl.Observable('reco_higgs_m', massbins)

# here we derive these all at once with 2D array
ptpts, masspts = np.meshgrid(ptbins[:-1] + 0.3 * np.diff(ptbins), massbins[:-1] + 0.5 * np.diff(massbins), indexing='ij')
rhopts = 2*np.log(masspts/ptpts)
ptscaled = (ptpts - 450.) / (1200. - 450.)
rhoscaled = (rhopts - (-6)) / ((-2.1) - (-6))

In [11]:
# build actual fit model now
model = rl.Model("testModel")

In [12]:
for ptbin in range(npt):
    for region in ['pass']:
        ch = rl.Channel("ptbin%d%s" % (ptbin, region))
        model.addChannel(ch)

        isPass = region == 'pass'
        ptnorm = 1.

        for sName in ['HWW', 'WJetsLNu', 'TTbar']:
            # some mock expectations
            templ = get_template(sName, ptbin)
            stype = rl.Sample.SIGNAL if sName == 'HWW' else rl.Sample.BACKGROUND
            sample = rl.TemplateSample(ch.name + '_' + sName, stype, templ)

            ### systematics NOT from parquets
            for sys_name, sys_value in sys_dict.items():
                sample.setParamEffect(sys_value, 
                                      sys_dict_values[sys_name][0], 
                                      sys_dict_values[sys_name][1] if sys_dict_values[sys_name][1] is not None else sys_dict_values[sys_name][0])
            

            ### systematics from parquets
            # apply systematics that are common for all samples
            for sys_name, sys_value in sys_from_parquets["mu"]["all_samples"].items():
                syst_up = hists_templates["cat1_sr"][{"samples": sName, "fj_pt": ptbin, "systematic": sys_name+"Up"}].values()
                syst_do = hists_templates["cat1_sr"][{"samples": sName, "fj_pt": ptbin, "systematic": sys_name+"Down"}].values()
                nominal = hists_templates["cat1_sr"][{"samples": sName, "fj_pt": ptbin, "systematic": "Nominal"}].values()
                
                eff_up = shape_to_num(syst_up,nominal)
                eff_do = shape_to_num(syst_do,nominal)
                
                sample.setParamEffect(sys_value, eff_up, eff_do)

            # apply systematics that are common for this particular sample
            for sys_name, sys_value in sys_from_parquets["mu"][sName].items():
                syst_up = hists_templates["cat1_sr"][{"samples": sName, "fj_pt": ptbin, "systematic": sys_name+"Up"}].values()
                syst_do = hists_templates["cat1_sr"][{"samples": sName, "fj_pt": ptbin, "systematic": sys_name+"Down"}].values()
                nominal = hists_templates["cat1_sr"][{"samples": sName, "fj_pt": ptbin, "systematic": "Nominal"}].values()
                
                eff_up = shape_to_num(syst_up,nominal)
                eff_do = shape_to_num(syst_do,nominal)
                
                sample.setParamEffect(sys_value, eff_up, eff_do)

            ch.addSample(sample)
        
        # add data
        data_obs = get_template("Data", ptbin)
        ch.setObservation(data_obs)

In [13]:
tmpdir = "tmpdir/"
with open(os.path.join(str(tmpdir), 'testModel.pkl'), "wb") as fout:
    pkl.dump(model, fout)