In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from src.data_tools.get_data import get_data
from src.assets.lumi import lumi_dict

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [4]:
era = '2016'
wdf, lumi = get_data(era, stitch_dy=True)

In [5]:
masses = wdf.mass.unique()
masses_df = pd.DataFrame([{"mass": mass} for mass in masses]).dropna().astype('int')

In [6]:
lumi = lumi_dict['16-18']

In [7]:
backgrounds = [
    {"name":'DYJetsToLL_M-50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8',
     "xsec": 5929.0*1e3, "nevents":79575677, "nickname": "dy"} ,
    {"name" :'mc_ttbar', "xsec": 76.7*1e3, "nevents": 79140880, "nickname": "tt"}]
backgrounds = pd.DataFrame(backgrounds)

In [8]:
wdf.loc[wdf.name=='y3_400_deepflavour_bff', 'mass'] = 400
wdf.loc[wdf.name=='y3_1000_deepflavour_bff', 'mass'] = 1000 
wdf.loc[wdf.name=='y3_700_deepflavour_bff', 'mass'] = 700 
wdf.loc[wdf.name=='y3_250_deepflavour_bff', 'mass'] = 250

wdf.loc[wdf.name=='y3_400_deepflavour_bff', 'dbs'] = -1
wdf.loc[wdf.name=='y3_1000_deepflavour_bff', 'dbs'] = -1
wdf.loc[wdf.name=='y3_700_deepflavour_bff', 'dbs'] = -1
wdf.loc[wdf.name=='y3_250_deepflavour_bff', 'dbs'] = -1

In [10]:
signals = [
{"name" :"BFFZprimeToMuMu_M_250_dbs0p04", "xsec": 122.5, "nevents": 511129}, 
{"name" :"BFFZprimeToMuMu_M_300_dbs0p04", "xsec": 58.86, "nevents": 442196}, 
{"name" :"BFFZprimeToMuMu_M_400_dbs0p04", "xsec": 17.42, "nevents": 378564}, 
{"name" :"BFFZprimeToMuMu_M_500_dbs0p04", "xsec": 6.42, "nevents": 421806}, 
{"name" :"BFFZprimeToMuMu_M_500_dbs0p5", "xsec": 13.81, "nevents": 508321}, 
{"name" :"BFFZprimeToMuMu_M_500_dbs1p0", "xsec": 35.56, "nevents": 436956}, 
{"name" :"BFFZprimeToMuMu_M_350_dbs0p04", "xsec": 31.04, "nevents": 470696}, 
{"name" :"BFFZprimeToMuMu_M_350_dbs0p5", "xsec": 65.35, "nevents": 386677}, 
{"name" :"BFFZprimeToMuMu_M_350_dbs1p0", "xsec": 165.8, "nevents": 476490}, 
{"name" :"BFFZprimeToMuMu_M_200_dbs0p04", "xsec": 288.9, "nevents": 454692}, 
{"name" :"BFFZprimeToMuMu_M_200_dbs1p0", "xsec": 1486.0, "nevents": 507520}, 
{"name" :"BFFZprimeToMuMu_M_200_dbs0p5", "xsec": 592.6, "nevents": 479332}, 
{"name" :"BFFZprimeToMuMu_M_125_dbs0p04", "xsec": 1528.0, "nevents": 337381}, 
{"name" :"BFFZprimeToMuMu_M_125_dbs0p5", "xsec": 3119.0, "nevents": 347033}, 
{"name" :"BFFZprimeToMuMu_M_125_dbs1p0", "xsec": 7866.0, "nevents": 349518}, 
{"name" :"BFFZprimeToMuMu_M_150_dbs0p04", "xsec": 815.0, "nevents": 316360}, 
{"name" :"BFFZprimeToMuMu_M_150_dbs0p5", "xsec": 1673.0, "nevents": 327249}, 
{"name" :"BFFZprimeToMuMu_M_150_dbs1p0", "xsec": 4187.0, "nevents": 336801}, 
{"name" :"BFFZprimeToMuMu_M_175_dbs0p04", "xsec": 471.2, "nevents": 301727}, 
{"name" :"BFFZprimeToMuMu_M_175_dbs0p5", "xsec": 969.8, "nevents": 312508}, 
{"name" :"BFFZprimeToMuMu_M_175_dbs1p0", "xsec": 2438.0, "nevents": 319185}, 
{"name" :"BFFZprimeToMuMu_M_750_dbs0p04", "xsec": 0.9116, "nevents": 210705}, 
    
    {"name" :"y3_250_deepflavour_bff", "xsec": 10.1, "nevents": 435000}, 
    {"name" :"y3_400_deepflavour_bff", "xsec": 2.9, "nevents": 456000}, 
    
    {"name" :"y3_1000_deepflavour_bff", "xsec": .2, "nevents": 500000}, 
    
    {"name" :"y3_700_deepflavour_bff", "xsec": 0.6, "nevents": 469000}, 
    
]
signals = pd.DataFrame(signals)

In [11]:
signals['weight']  = lumi * signals['xsec'] / signals['nevents'] 
backgrounds['weight']  = lumi * backgrounds['xsec'] / backgrounds['nevents'] 

In [12]:
def df_mass_filter(df, mass, widthper):
    width = mass*widthper
    return df[ ((df.DiLepMass-mass) > -width) & ((df.DiLepMass-mass) < width)]

In [13]:
import uncertainties as unc

In [15]:
def region_counts(df, weight, postfix=""):
    sr1 = unc.ufloat(df[df.SR1_nom==1].shape[0]*weight, df[df.SR1_nom==1].shape[0]**.5*weight)
    sr2 = unc.ufloat(df[df.SR2_nom==1].shape[0]*weight, df[df.SR2_nom==1].shape[0]**.5*weight)
    return {"SR1{}".format(postfix): sr1, "SR2{}".format(postfix): sr2}

In [16]:
sample_counts = []
for i, signal in signals.iterrows():
    stdf = wdf[wdf.name==signal['name']]
    mass = stdf.mass.mean()
    stdf = df_mass_filter(stdf, mass, .1)
    signal_regions = region_counts(stdf, signal.weight)
    #get data stats
    bck_regions = {}
    for i, bck in backgrounds.iterrows():
        btdf = wdf[wdf.name==bck['name']]
        btdf = df_mass_filter(btdf, mass, .1)
        bck_regions = {**bck_regions, **region_counts(btdf, bck.weight, postfix=bck.nickname)}
    
    #fill sample list
    sample_dict = {"mass": int(mass), "dbs": round(stdf.dbs.mean(),2), **signal_regions, **bck_regions}
    sample_counts.append(sample_dict)

In [17]:
sample_counts_csv = pd.DataFrame(sample_counts)

In [18]:
sample_counts_csv.to_csv('sample_s_b.csv')

In [19]:
sample_counts_csv

Unnamed: 0,mass,dbs,SR1,SR2,SR1dy,SR2dy,SR1tt,SR2tt
0,250,0.04,1820+/-8,537+/-4,(1.53+/-0.13)e+03,(9.0+/-1.0)e+02,1237+/-13,1447+/-14
1,300,0.04,921+/-4,299.9+/-2.3,(8.6+/-0.9)e+02,(5.8+/-0.8)e+02,716+/-10,826+/-10
2,400,0.04,269.9+/-1.3,99.9+/-0.8,(3.1+/-0.6)e+02,(1.7+/-0.4)e+02,211+/-5,247+/-6
3,500,0.04,95.2+/-0.4,39.58+/-0.29,(2.5+/-0.5)e+02,103+/-32,71.3+/-3.1,78.0+/-3.2
4,500,0.5,161.9+/-0.8,65.2+/-0.5,(2.5+/-0.5)e+02,103+/-32,71.3+/-3.1,78.0+/-3.2
5,500,1.0,358.9+/-2.0,143.1+/-1.3,(2.5+/-0.5)e+02,103+/-32,71.3+/-3.1,78.0+/-3.2
6,350,0.04,491.5+/-2.1,171.7+/-1.2,(5.5+/-0.8)e+02,(3.8+/-0.6)e+02,394+/-7,444+/-8
7,350,0.5,820+/-4,277.2+/-2.5,(5.5+/-0.8)e+02,(3.8+/-0.6)e+02,394+/-7,444+/-8
8,350,1.0,1768+/-9,582+/-5,(5.5+/-0.8)e+02,(3.8+/-0.6)e+02,394+/-7,444+/-8
9,200,0.04,3634+/-18,959+/-9,(3.07+/-0.18)e+03,(1.52+/-0.12)e+03,1957+/-16,2243+/-17


In [20]:
sample_counts = pd.read_csv('sample_s_b.csv')

In [21]:
sample_counts = sample_counts_csv

In [22]:
sample_counts = sample_counts.loc[:, ~sample_counts.columns.str.contains('^Unnamed')].set_index(['mass', 'dbs']).sort_index()

In [23]:
sample_counts.sort_index().round(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,SR1,SR2,SR1dy,SR2dy,SR1tt,SR2tt
mass,dbs,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
125,0.04,(4.74+/-0.05)e+03,1150+/-27,(6.87+/-0.27)e+03,(3.56+/-0.19)e+03,1437+/-14,1756+/-15
125,0.5,(7.68+/-0.10)e+03,(1.76+/-0.05)e+03,(6.87+/-0.27)e+03,(3.56+/-0.19)e+03,1437+/-14,1756+/-15
125,1.0,(1.643+/-0.023)e+04,(3.69+/-0.11)e+03,(6.87+/-0.27)e+03,(3.56+/-0.19)e+03,1437+/-14,1756+/-15
150,0.04,(6.04+/-0.05)e+03,1370+/-22,(6.04+/-0.25)e+03,(2.54+/-0.16)e+03,2081+/-17,2436+/-18
150,0.5,(9.57+/-0.08)e+03,(2.13+/-0.04)e+03,(6.04+/-0.25)e+03,(2.54+/-0.16)e+03,2081+/-17,2436+/-18
150,1.0,(2.035+/-0.019)e+04,(4.37+/-0.09)e+03,(6.04+/-0.25)e+03,(2.54+/-0.16)e+03,2081+/-17,2436+/-18
175,0.04,4938+/-33,1200+/-16,(4.44+/-0.21)e+03,(2.22+/-0.15)e+03,2191+/-17,2502+/-18
175,0.5,(8.00+/-0.06)e+03,1934+/-29,(4.44+/-0.21)e+03,(2.22+/-0.15)e+03,2191+/-17,2502+/-18
175,1.0,(1.706+/-0.013)e+04,(3.95+/-0.06)e+03,(4.44+/-0.21)e+03,(2.22+/-0.15)e+03,2191+/-17,2502+/-18
200,0.04,3634+/-18,959+/-9,(3.07+/-0.18)e+03,(1.52+/-0.12)e+03,1957+/-16,2243+/-17


In [24]:
print(sample_counts.sort_index().round(2).to_latex())

\begin{tabular}{llllllll}
\toprule
     &       &                  SR1 &                SR2 &              SR1dy &              SR2dy &        SR1tt &       SR2tt \\
mass & dbs &                      &                    &                    &                    &              &             \\
\midrule
125  &  0.04 &    (4.74+/-0.05)e+03 &          1150+/-27 &  (6.87+/-0.27)e+03 &  (3.56+/-0.19)e+03 &    1437+/-14 &   1756+/-15 \\
     &  0.50 &    (7.68+/-0.10)e+03 &  (1.76+/-0.05)e+03 &  (6.87+/-0.27)e+03 &  (3.56+/-0.19)e+03 &    1437+/-14 &   1756+/-15 \\
     &  1.00 &  (1.643+/-0.023)e+04 &  (3.69+/-0.11)e+03 &  (6.87+/-0.27)e+03 &  (3.56+/-0.19)e+03 &    1437+/-14 &   1756+/-15 \\
150  &  0.04 &    (6.04+/-0.05)e+03 &          1370+/-22 &  (6.04+/-0.25)e+03 &  (2.54+/-0.16)e+03 &    2081+/-17 &   2436+/-18 \\
     &  0.50 &    (9.57+/-0.08)e+03 &  (2.13+/-0.04)e+03 &  (6.04+/-0.25)e+03 &  (2.54+/-0.16)e+03 &    2081+/-17 &   2436+/-18 \\
     &  1.00 &  (2.035+/-0.019)e+04 &  (4

In [25]:
sample_counts['s'] = sample_counts['SR1']
sample_counts['b'] = sample_counts['SR1dy'] + sample_counts['SR1tt']

In [26]:
def make_s_b(df):
    df['s/b'] = df.s/df.b
    df['s/sqrt(b)'] = df.s/df.b**.5
    df['s/sqrt(s+b)'] = df.s/(df.s+df.b)**.5
    return df

In [27]:
make_s_b(sample_counts)[['s','b','s/b', 's/sqrt(b)', 's/sqrt(s+b)']].round(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,s,b,s/b,s/sqrt(b),s/sqrt(s+b)
mass,dbs,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
125,0.04,(4.74+/-0.05)e+03,(8.31+/-0.27)e+03,0.571+/-0.019,52.0+/-1.0,41.5+/-0.6
125,0.5,(7.68+/-0.10)e+03,(8.31+/-0.27)e+03,0.924+/-0.032,84.2+/-1.7,60.7+/-0.8
125,1.0,(1.643+/-0.023)e+04,(8.31+/-0.27)e+03,1.98+/-0.07,180+/-4,104.4+/-1.1
150,0.04,(6.04+/-0.05)e+03,(8.12+/-0.25)e+03,0.744+/-0.024,67.0+/-1.2,50.8+/-0.5
150,0.5,(9.57+/-0.08)e+03,(8.12+/-0.25)e+03,1.18+/-0.04,106.2+/-1.9,71.9+/-0.7
150,1.0,(2.035+/-0.019)e+04,(8.12+/-0.25)e+03,2.51+/-0.08,226+/-4,120.6+/-0.9
175,0.04,4938+/-33,(6.63+/-0.21)e+03,0.745+/-0.025,60.6+/-1.1,45.9+/-0.5
175,0.5,(8.00+/-0.06)e+03,(6.63+/-0.21)e+03,1.21+/-0.04,98.2+/-1.7,66.1+/-0.6
175,1.0,(1.706+/-0.013)e+04,(6.63+/-0.21)e+03,2.57+/-0.09,210+/-4,110.9+/-0.7
200,0.04,3634+/-18,(5.02+/-0.18)e+03,0.724+/-0.026,51.3+/-0.9,39.1+/-0.4


In [28]:
print(make_s_b(sample_counts)[['s','b','s/b', 's/sqrt(b)', 's/sqrt(s+b)']].round(2).to_latex())

\begin{tabular}{lllllll}
\toprule
     &       &                    s &                  b &              s/b &    s/sqrt(b) &   s/sqrt(s+b) \\
mass & dbs &                      &                    &                  &              &               \\
\midrule
125  &  0.04 &    (4.74+/-0.05)e+03 &  (8.31+/-0.27)e+03 &    0.571+/-0.019 &   52.0+/-1.0 &    41.5+/-0.6 \\
     &  0.50 &    (7.68+/-0.10)e+03 &  (8.31+/-0.27)e+03 &    0.924+/-0.032 &   84.2+/-1.7 &    60.7+/-0.8 \\
     &  1.00 &  (1.643+/-0.023)e+04 &  (8.31+/-0.27)e+03 &      1.98+/-0.07 &      180+/-4 &   104.4+/-1.1 \\
150  &  0.04 &    (6.04+/-0.05)e+03 &  (8.12+/-0.25)e+03 &    0.744+/-0.024 &   67.0+/-1.2 &    50.8+/-0.5 \\
     &  0.50 &    (9.57+/-0.08)e+03 &  (8.12+/-0.25)e+03 &      1.18+/-0.04 &  106.2+/-1.9 &    71.9+/-0.7 \\
     &  1.00 &  (2.035+/-0.019)e+04 &  (8.12+/-0.25)e+03 &      2.51+/-0.08 &      226+/-4 &   120.6+/-0.9 \\
175  &  0.04 &            4938+/-33 &  (6.63+/-0.21)e+03 &    0.745+/-0.025 &  

In [29]:
sample_counts['s'] = sample_counts['SR2']
sample_counts['b'] = sample_counts['SR2dy'] + sample_counts['SR2tt']
make_s_b(sample_counts)[['s','b','s/b', 's/sqrt(b)', 's/sqrt(s+b)']].round(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,s,b,s/b,s/sqrt(b),s/sqrt(s+b)
mass,dbs,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
125,0.04,1150+/-27,(5.31+/-0.19)e+03,0.216+/-0.009,15.8+/-0.5,14.3+/-0.4
125,0.5,(1.76+/-0.05)e+03,(5.31+/-0.19)e+03,0.331+/-0.015,24.1+/-0.8,20.9+/-0.6
125,1.0,(3.69+/-0.11)e+03,(5.31+/-0.19)e+03,0.694+/-0.032,50.6+/-1.7,38.8+/-1.0
150,0.04,1370+/-22,(4.98+/-0.16)e+03,0.275+/-0.010,19.4+/-0.4,17.19+/-0.33
150,0.5,(2.13+/-0.04)e+03,(4.98+/-0.16)e+03,0.428+/-0.016,30.2+/-0.7,25.3+/-0.5
150,1.0,(4.37+/-0.09)e+03,(4.98+/-0.16)e+03,0.877+/-0.033,61.9+/-1.6,45.2+/-0.8
175,0.04,1200+/-16,(4.73+/-0.15)e+03,0.254+/-0.009,17.4+/-0.4,15.58+/-0.27
175,0.5,1934+/-29,(4.73+/-0.15)e+03,0.409+/-0.015,28.1+/-0.6,23.7+/-0.4
175,1.0,(3.95+/-0.06)e+03,(4.73+/-0.15)e+03,0.835+/-0.030,57.4+/-1.3,42.4+/-0.7
200,0.04,959+/-9,(3.76+/-0.13)e+03,0.255+/-0.009,15.64+/-0.30,13.96+/-0.22


In [30]:
print(make_s_b(sample_counts)[['s','b','s/b', 's/sqrt(b)', 's/sqrt(s+b)']].round(2).to_latex())

\begin{tabular}{lllllll}
\toprule
     &       &                  s &                  b &              s/b &     s/sqrt(b) &   s/sqrt(s+b) \\
mass & dbs &                    &                    &                  &               &               \\
\midrule
125  &  0.04 &          1150+/-27 &  (5.31+/-0.19)e+03 &    0.216+/-0.009 &    15.8+/-0.5 &    14.3+/-0.4 \\
     &  0.50 &  (1.76+/-0.05)e+03 &  (5.31+/-0.19)e+03 &    0.331+/-0.015 &    24.1+/-0.8 &    20.9+/-0.6 \\
     &  1.00 &  (3.69+/-0.11)e+03 &  (5.31+/-0.19)e+03 &    0.694+/-0.032 &    50.6+/-1.7 &    38.8+/-1.0 \\
150  &  0.04 &          1370+/-22 &  (4.98+/-0.16)e+03 &    0.275+/-0.010 &    19.4+/-0.4 &  17.19+/-0.33 \\
     &  0.50 &  (2.13+/-0.04)e+03 &  (4.98+/-0.16)e+03 &    0.428+/-0.016 &    30.2+/-0.7 &    25.3+/-0.5 \\
     &  1.00 &  (4.37+/-0.09)e+03 &  (4.98+/-0.16)e+03 &    0.877+/-0.033 &    61.9+/-1.6 &    45.2+/-0.8 \\
175  &  0.04 &          1200+/-16 &  (4.73+/-0.15)e+03 &    0.254+/-0.009 &    17.4+/-0

In [31]:
sample_counts['s'] = sample_counts['SR2'] + sample_counts['SR1']
sample_counts['b'] = sample_counts['SR2dy'] + sample_counts['SR2tt'] + sample_counts['SR1dy'] + sample_counts['SR1tt']
make_s_b(sample_counts)[['s','b','s/b', 's/sqrt(b)', 's/sqrt(s+b)']].round(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,s,b,s/b,s/sqrt(b),s/sqrt(s+b)
mass,dbs,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
125,0.04,(5.89+/-0.06)e+03,(1.362+/-0.033)e+04,0.432+/-0.011,50.5+/-0.8,42.2+/-0.5
125,0.5,(9.44+/-0.11)e+03,(1.362+/-0.033)e+04,0.693+/-0.018,80.9+/-1.3,62.2+/-0.7
125,1.0,(2.011+/-0.025)e+04,(1.362+/-0.033)e+04,1.48+/-0.04,172.3+/-3.0,109.5+/-1.1
150,0.04,(7.41+/-0.05)e+03,(1.310+/-0.030)e+04,0.566+/-0.013,64.7+/-0.9,51.7+/-0.5
150,0.5,(1.170+/-0.009)e+04,(1.310+/-0.030)e+04,0.893+/-0.021,102.2+/-1.4,74.3+/-0.6
150,1.0,(2.471+/-0.021)e+04,(1.310+/-0.030)e+04,1.89+/-0.05,215.9+/-3.0,127.1+/-0.9
175,0.04,(6.14+/-0.04)e+03,(1.136+/-0.026)e+04,0.540+/-0.013,57.6+/-0.7,46.4+/-0.4
175,0.5,(9.93+/-0.07)e+03,(1.136+/-0.026)e+04,0.874+/-0.021,93.2+/-1.2,68.1+/-0.5
175,1.0,(2.101+/-0.015)e+04,(1.136+/-0.026)e+04,1.85+/-0.04,197.1+/-2.7,116.8+/-0.7
200,0.04,4593+/-20,(8.78+/-0.22)e+03,0.523+/-0.013,49.0+/-0.6,39.71+/-0.35


In [32]:
print(make_s_b(sample_counts)[['s','b','s/b', 's/sqrt(b)', 's/sqrt(s+b)']].round(2).to_latex())

\begin{tabular}{lllllll}
\toprule
     &       &                    s &                    b &              s/b &    s/sqrt(b) &   s/sqrt(s+b) \\
mass & dbs &                      &                      &                  &              &               \\
\midrule
125  &  0.04 &    (5.89+/-0.06)e+03 &  (1.362+/-0.033)e+04 &    0.432+/-0.011 &   50.5+/-0.8 &    42.2+/-0.5 \\
     &  0.50 &    (9.44+/-0.11)e+03 &  (1.362+/-0.033)e+04 &    0.693+/-0.018 &   80.9+/-1.3 &    62.2+/-0.7 \\
     &  1.00 &  (2.011+/-0.025)e+04 &  (1.362+/-0.033)e+04 &      1.48+/-0.04 &  172.3+/-3.0 &   109.5+/-1.1 \\
150  &  0.04 &    (7.41+/-0.05)e+03 &  (1.310+/-0.030)e+04 &    0.566+/-0.013 &   64.7+/-0.9 &    51.7+/-0.5 \\
     &  0.50 &  (1.170+/-0.009)e+04 &  (1.310+/-0.030)e+04 &    0.893+/-0.021 &  102.2+/-1.4 &    74.3+/-0.6 \\
     &  1.00 &  (2.471+/-0.021)e+04 &  (1.310+/-0.030)e+04 &      1.89+/-0.05 &  215.9+/-3.0 &   127.1+/-0.9 \\
175  &  0.04 &    (6.14+/-0.04)e+03 &  (1.136+/-0.026)e+04 &   