# $m_{4\ell}$ analysis

Let's start off with a "dumb" analysis: a binned histogram analysis using $m_{4\ell}$ as the summary statistic.

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import importlib

from hstar import c6
from inference import stat

In [7]:
# read dataset
filepath = '/u/taepa/higgs-trilinear-sensitivity/data/samples/ggZZ_all/events.csv'
events = pd.read_csv(filepath)
ggzz = c6.Sample(k=1.83, xs=1.4783394, events=events) # cross-section x k-factor [fb]
lumi = 300.0
ggzz.normalize(lumi)
ggzz.events.head()

Unnamed: 0,evtnum,p1_px,p1_py,p1_pz,p1_E,p2_px,p2_py,p2_pz,p2_E,p3_px,...,msq_c6_14,msq_c6_15,msq_c6_16,msq_c6_17,msq_c6_18,msq_c6_19,msq_c6_20,msq_c6_21,msq_sm,wt
0,0,0.0,0.0,-253.2592,-253.2592,0.0,0.0,43.80087,-43.80087,3.418067,...,1.31146e-08,1.311711e-08,1.31206e-08,1.312512e-08,1.313072e-08,1.313747e-08,1.314548e-08,1.315483e-08,1.311264e-08,0.000274
1,1,0.0,0.0,-105.4178,-105.4178,0.0,0.0,83.06607,-83.06607,-33.9276,...,1.624623e-08,1.624879e-08,1.625244e-08,1.62572e-08,1.626311e-08,1.627022e-08,1.627859e-08,1.628827e-08,1.624494e-08,0.000259
2,2,0.0,0.0,-176.4903,-176.4903,0.0,0.0,69.49255,-69.49255,-23.96682,...,5.646427e-09,5.647883e-09,5.649893e-09,5.652477e-09,5.655658e-09,5.659462e-09,5.663923e-09,5.669079e-09,5.645296e-09,0.000337
3,3,0.0,0.0,-73.21253,-73.21253,0.0,0.0,435.5704,-435.5704,32.08989,...,9.708751e-11,9.714519e-11,9.723117e-11,9.734593e-11,9.749014e-11,9.766467e-11,9.787056e-11,9.810907e-11,9.708228e-11,0.001838
4,4,0.0,0.0,-395.7967,-395.7967,0.0,0.0,24.94625,-24.94625,-50.25054,...,7.705212e-09,7.707123e-09,7.70984e-09,7.713385e-09,7.717782e-09,7.723063e-09,7.729268e-09,7.73644e-09,7.704206e-09,0.001209


In [8]:
# compute m4l
ileptons = range(3,7)
p4l_px = np.sum([ggzz.events[f'p{i}_px'] for i in ileptons], axis=0)
p4l_py = np.sum([ggzz.events[f'p{i}_py'] for i in ileptons], axis=0)
p4l_pz = np.sum([ggzz.events[f'p{i}_pz'] for i in ileptons], axis=0)
p4l_E = np.sum([ggzz.events[f'p{i}_E'] for i in ileptons], axis=0)
m4l = np.sqrt(p4l_E**2 - p4l_px**2 - p4l_py**2 - p4l_pz**2)

In [9]:
m4l_bins = np.arange(180.0,1020.0,20.0)
m4l_centers = 0.5 * (m4l_bins[1:] + m4l_bins[:-1])

In [10]:
# fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 8), sharex=True, height_ratios=(2,1))

# m4l_sm, _ = np.histogram(m4l, bins=m4l_bins, weights=ggzz.nu(per_event=True))
# ax1.step(m4l_bins[:-1], m4l_sm, where='post', label='SM')

# c6_val = -10.0
# m4l_c6, _ = np.histogram(m4l, bins=m4l_bins, weights=ggzz.nu(c6_val, per_event=True))
# ax1.step(m4l_bins[:-1], m4l_c6, where='post', label=f'$c_6 = {c6_val}$')
# ratio = np.divide(m4l_c6, m4l_sm, out=np.zeros_like(m4l_sm), where=m4l_sm!=0)
# ax2.plot(m4l_centers, ratio, '--', label=f'$c_6 = {c6_val}$')

# c6_val = +10.0
# m4l_c6, _ = np.histogram(m4l, bins=m4l_bins, weights=ggzz.nu(c6_val, per_event=True))
# ax1.step(m4l_bins[:-1], m4l_c6, where='post', label=f'$c_6 = {c6_val}$')
# ratio = np.divide(m4l_c6, m4l_sm, out=np.zeros_like(m4l_sm), where=m4l_sm!=0)
# ax2.plot(m4l_centers, ratio, '--', label=f'$c_6 = {c6_val}$')

# ax1.set_xlim(200,1000)
# ax1.set_ylabel('Number of Events')
# ax1.set_yscale('log')
# ax1.legend()

# ax2.set_ylim(0.8,1.2)
# ax2.set_xlabel('$m_{4\\ell}$ [GeV]')
# ax2.set_ylabel('$c_6$ / SM')

# plt.tight_layout()
# plt.show()

In [18]:
c6_vals = np.linspace(-20.0, 20.0, 201)
nll = np.zeros_like(c6_vals)

m4l_sm, _ = np.histogram(m4l, bins=m4l_bins, weights=ggzz.nu(per_event=True))
wts_c6 = ggzz.nu(c6_vals, per_event=True)
for i, c6_val in enumerate(c6_vals):
  m4l_c6, _ = np.histogram(m4l, bins=m4l_bins, weights=wts_c6[:,i])
  nll[i] = stat.nll(m4l_sm, m4l_c6)
nll = nll - np.min(nll)

KeyboardInterrupt: 

In [23]:
print(wts_c6[:,0]/wts_c6[:,1])

[1.00029978 1.00021072 1.00033568 ... 1.00101027 1.0021568  1.00092112]


In [17]:
c6_vals = np.round(c6_vals, decimals = 1)
df = pd.DataFrame({'c6' : c6_vals, 'nll' : nll} )
df.to_csv('c6_nll_m4l.csv', index=False)