In [1]:
print("Loading...")

import numpy as np

import matplotlib
matplotlib.rc('xtick', labelsize=10) 
matplotlib.rc('ytick', labelsize=10) 
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
%matplotlib widget
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from matplotlib import rcParams
rcParams['font.family'] = 'serif'
rcParams['font.sans-serif'] = ['Times New Roman']
import matplotlib.cm as cm
from matplotlib.colors import LogNorm
import matplotlib.patches as mpatches
from matplotlib.collections import PatchCollection

from ipywidgets import interact, widgets, FloatSlider, Dropdown, Checkbox, VBox
from IPython.display import display, clear_output

from PIL import Image

from pandas import DataFrame

import warnings
warnings.filterwarnings("ignore")

from keras.models import model_from_json

from tqdm import tqdm

from uproot3_methods import TLorentzVectorArray
from consistent_plots import hist

Loading...


In [2]:
## LOAD MODEL
model_dir = f'../models/classifier/reco/model/'
json_file = open(model_dir + f'model_1.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights(model_dir + f'model_1.h5')

## LOAD INPUTS
file = np.load("../inputs/reco/nn_input_MX700_MY400_classifier.npz")
nevents = len(np.concatenate((file['test'], file['train'], file['val'])))

In [3]:
x_test = file['x_test'] # normalized test examples
X_test = file['X_test'] # unmodified test examples
ntest = len(file['test'])

scores = model.predict(x_test)
scores = scores[:,0]
rscores = np.around(scores, decimals=3)

thresholds = np.arange(0.0,1.01,0.01)

In [4]:
# invariant mass of all pairings
pair_inv1 = TLorentzVectorArray.from_ptetaphim(X_test[:,0], X_test[:,1], X_test[:,2], np.repeat(4e-9,len(X_test)))
pair_inv2 = TLorentzVectorArray.from_ptetaphim(X_test[:,3], X_test[:,4], X_test[:,5], np.repeat(4e-9,len(X_test)))
pair_inv = pair_inv1 + pair_inv2

pair_p4 = np.reshape(pair_inv, (5039,15))

pair_mass = np.reshape(pair_inv.mass, (5039, 15))
true_Y = pair_p4[:,1] + pair_p4[:,2]
true_X = pair_p4[:,1] + pair_p4[:,2] +  pair_p4[:,0]

true_Y_m = np.array(())
true_X_m = np.array(())

for X, Y in zip(true_X, true_Y):
    true_Y_m = np.append(true_Y_m, Y.mass)
    true_X_m = np.append(true_X_m, X.mass)

In [5]:
## APPLY MODEL TO INPUTS
# pscores = scores binned by pair
pscores = []
pinvmass = []
for i in range(15):
    pscores.append(scores[np.arange(i, ntest*15, 15)])
    pinvmass.append(pair_inv[np.arange(i, ntest*15, 15)])
pair_scores = np.array((pscores))
pair_mass = np.array((pinvmass))
    
# escores = scores binned by event
escores = []
einvmass = []
for i in range(0, ntest*15, 15):
    escores.append(scores[i:i+15])
    einvmass.append(pair_inv[i:i+15])
event_scores = np.array((escores))
event_mass = np.array((einvmass))

In [34]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(6,3))
_ = hist(ax[0], true_Y_m, bins=np.linspace(200,600,60))
ax[0].set_xlabel(r'Reco $m_Y$  [GeV]')
_ = hist(ax[1], true_X_m, bins=np.linspace(400,900,60))
ax[1].set_xlabel(r'Reco $m_X$  [GeV]')
plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [7]:
HX_score = pscores[0]
H1_score = pscores[1]
H2_score = pscores[2]

In [8]:
def get_any_all_eff(cut):
    any_bool = np.any(np.column_stack((pscores[0], pscores[1], pscores[2]) > cut), axis=1)
    any_eff = np.sum(any_bool)/len(any_bool)
    all_bool = np.all(np.column_stack((pscores[0], pscores[1], pscores[2]) > cut), axis=1)
    all_eff = np.sum(all_bool)/len(all_bool)
    return any_eff, all_eff

In [9]:
HX_eff = [np.sum(HX_score > cut)/len(HX_score) for cut in thresholds]
H1_eff = [np.sum(H1_score > cut)/len(HX_score) for cut in thresholds]
H2_eff = [np.sum(H2_score > cut)/len(HX_score) for cut in thresholds]
nonH_eff = [[np.sum(nH_score > cut)/len(nH_score) for cut in thresholds] for nH_score in pscores[3:]]
any_eff = [get_any_all_eff(cut)[0] for cut in thresholds]
all_eff = [get_any_all_eff(cut)[1] for cut in thresholds]

In [10]:
from matplotlib.ticker import MultipleLocator

In [11]:
fig = plt.figure(figsize=(10,9))
grid = GridSpec(nrows=3, ncols=3, hspace=0.25, wspace=0.5)

ax = fig.add_subplot(grid[0,0])
ax.plot(thresholds, HX_eff, label='HX')
ax.legend()
ax.set_ylabel('Efficiency')
ax.set_xlabel('Score Cut')

ax = fig.add_subplot(grid[0,1])
ax.plot(thresholds, H1_eff, label='H1', color='C1')
ax.legend()
ax.set_ylabel('Efficiency')
ax.set_xlabel('Score Cut')

ax = fig.add_subplot(grid[0,2])
ax.plot(thresholds, H2_eff, label='H2', color='C2')
ax.legend()
ax.set_ylabel('Efficiency')
ax.set_xlabel('Score Cut')

ax = fig.add_subplot(grid[1,:2])
ax.plot(thresholds, any_eff, label='any')
ax.legend()
ax.set_ylabel('Efficiency')
ax.set_xlabel('Score Cut')
ax.set_aspect(1)
ax.yaxis.set_major_locator(MultipleLocator(0.2))

ax = fig.add_subplot(grid[1,1:])
ax.plot(thresholds, all_eff, label='all')
ax.legend()
ax.set_ylabel('Efficiency')
ax.set_xlabel('Score Cut')
ax.set_aspect(1)
ax.yaxis.set_major_locator(MultipleLocator(0.2))

ax = fig.add_subplot(grid[2,1])
ax.plot(thresholds, HX_eff, label='HX')
ax.plot(thresholds, H1_eff, label='H1', color='C1')
ax.plot(thresholds, H2_eff, label='H2', color='C2')
ax.legend()
ax.set_ylabel('Efficiency')
ax.set_xlabel('Score Cut')

plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [12]:
fig, ax = plt.subplots(nrows=3, ncols=2, figsize=(8,8))

ax[0][0].plot(thresholds, HX_eff, label='HX')
ax[0][1].plot(thresholds, H1_eff, label='H1')
ax[1][0].plot(thresholds, H2_eff, label='H2')
ax[1][1].plot(thresholds, any_eff, label='any')
ax[1][1].plot(thresholds, all_eff, label='all')
ax[2][0].plot(thresholds, HX_eff, label='HX')
ax[2][0].plot(thresholds, H1_eff, label='H1')
ax[2][0].plot(thresholds, H2_eff, label='H2')

for axis in ax.flat:
    axis.legend()
    axis.set_xlabel('Score Cut')
    axis.set_ylabel('Efficiency')
    axis.set_ylim(0, 1.05)
    
plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

No handles with labels found to put in legend.


In [13]:
labels = ['HX b1, H1 b1',
 'HX b1, H1 b2',
 'HX b1, H2 b1',
 'HX b1, H2 b2',
 'HX b2, H1 b1',
 'HX b2, H1 b2',
 'HX b2, H2 b1',
 'HX b2, H2 b2',
 'H1 b1, H2 b1',
 'H1 b1, H2 b2',
 'H1 b2, H2 b1',
 'H1 b2, H2 b2']

In [14]:
fig, axs = plt.subplots(nrows=4, ncols=3, figsize=(10,10))
for ax,eff,lab in zip(axs.flat, nonH_eff, labels):
    eff = np.ones_like(eff) - eff
    ax.plot(thresholds, eff, label=lab)
    ax.legend()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [15]:
#fig, axs =  plt.subplots(nrows=5, ncols=3, figsize=(10,12))
#for i,ax in enumerate(axs.flat):
#    hist(ax, pair_mass[:,i], bins=np.linspace(0,300,100))

In [16]:
events_sorted_pairs = np.argsort(event_scores, axis=1)[:,::-1]
top_three_scoring_pairs = events_sorted_pairs[:,:3]
higgs_mask_top_scoring = top_three_scoring_pairs < 3

hx_mask = higgs_mask_top_scoring[:,0]
h1_mask = higgs_mask_top_scoring[:,1]
h2_mask = higgs_mask_top_scoring[:,2]

higgs_event_top_3mask = hx_mask & h1_mask & h2_mask
higgs_event_top_2mask = (hx_mask & h1_mask) | (hx_mask & h2_mask) | (h1_mask & h2_mask)
higgs_event_top_1mask = hx_mask | h1_mask | h2_mask

higgs3 = np.sum(higgs_event_top_3mask)/len(higgs_event_top_3mask)
higgs2 = np.sum(higgs_event_top_2mask)/len(higgs_event_top_2mask)
higgs1 = np.sum(higgs_event_top_1mask)/len(higgs_event_top_1mask)

print(f"Efficiency of all three Higgs obtaining three highest scores: {higgs3*100:.1f}%")
print(f"Efficiency of two Higgs obtaining one of three highest scores: {higgs2*100:.1f}%")
print(f"Efficiency of one Higgs obtaining one of three highest scores: {higgs1*100:.1f}%")

Efficiency of all three Higgs obtaining three highest scores: 14.6%
Efficiency of two Higgs obtaining one of three highest scores: 75.1%
Efficiency of one Higgs obtaining one of three highest scores: 98.1%


In [17]:
sbe = scores.reshape(5039,15) # scores by event

def get_passing_pairs(cut):
    """Returns how many pairs pass the cut for each event, as well as indices of passing pairs."""
    pass_pairs = []
    pairs_mask = np.array((), dtype=bool)
    n = np.array(())
    higgs = np.array((), dtype=bool)
    for evt in sbe:
        evt_pass = [i for i,s in enumerate(evt) if s > cut]
        pass_pairs.append(evt_pass)
        pairs_mask =  np.append(pairs_mask, evt > cut)
        n  = np.append(n, len(evt_pass))
        higgs  = np.append(higgs, evt[0] > cut and evt[1] > cut and evt[2] > cut)
    return pass_pairs, higgs, n, pairs_mask

In [18]:
passing_pairs = []
higgs_passing = []
num_passing = []
pass_mask = []
for cut in tqdm(thresholds):
    pass_pairs, higgs, n,  pair_mask = get_passing_pairs(cut)
    passing_pairs.append(pass_pairs)
    pass_mask.append(pair_mask)
    num_passing.append(n)
    higgs_passing.append(higgs)

100%|██████████| 101/101 [00:30<00:00,  3.33it/s]


In [19]:
for i,masks in enumerate(pass_mask):
    pass_mask[i] = masks.reshape(5039,15)

In [20]:
index = 20

sanity_check = np.array(())
inv_Y_H = np.array(())
inv_X_H = np.array(())
inv_Y_cand = np.array(())
inv_X_cand = np.array(())
counter = 0
for p4, row in zip(pair_p4, pass_mask[index]):
    if counter%1000 == 0: print(counter)
    counter+=1
    sanity_check = np.append(sanity_check, (p4[0] + p4[1]).mass)
    if np.all(row[:3]):
            inv_Y_H = np.append(inv_Y_H, (p4[0] + p4[1]).mass)
            inv_X_H = np.append(inv_X_H, (p4[0] + p4[1] + p4[2]).mass)
    for i,pair1 in enumerate(p4[row][:-1]):
        for j,pair2 in enumerate(p4[row][i:]):
            m = (pair1 + pair2).mass
            inv_Y_cand  = np.append(inv_Y_cand, m)
            for pair3 in p4[row][j:]:
                m = (pair1 + pair2 + pair3).mass
                inv_X_cand = np.append(inv_X_cand, m)

0
1000
2000
3000
4000
5000


In [21]:
binsY = np.linspace(0,600,100)
n_nY, _ = np.histogram(inv_Y_cand, binsY)
n_Y, _ = np.histogram(inv_Y_H, binsY)
n_SC, _ = np.histogram(sanity_check, binsY)

binsX = np.linspace(0,1000,100)
n_nX, _ = np.histogram(inv_X_cand, binsX)
n_X, _ = np.histogram(inv_X_H, binsX)

In [22]:
fig, axs = plt.subplots(nrows=2, figsize=(8,8))
x = (binsY[:-1] +  binsY[1:])/2
hist(axs[0], x, weights=n_nY/np.max(n_nY), bins=binsY, label='0.200, non-Y')
hist(axs[0], x, weights=n_Y/np.max(n_Y), bins=binsY, label='0.200, Y')
hist(axs[0], x, weights=n_SC/np.max(n_SC), bins=binsY, label='No cut, true Y')
axs[0].legend()
axs[0].set_xlabel(r'$m_\mathrm{inv}$ [GeV]')
axs[0].ticklabel_format(style='plain')
x = (binsX[:-1] +  binsX[1:])/2
hist(axs[1], x, weights=n_nX, bins=binsX, label='0.200, non-X')
hist(axs[1], x, weights=n_X, bins=binsX, label='0.200, X')
axs[1].legend()
axs[1].set_xlabel(r'$m_\mathrm{inv}$ [GeV]')
axs[1].ticklabel_format(style='plain')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [23]:
cuts = [50, 75, 85, 90, 95]

Y_cands = []
X_cands = []
cut_count = 0
for index in tqdm(cuts):
    inv_Y_cand = np.array(())
    inv_X_cand = np.array(())
    counter = 0
    for p4, row in zip(pair_p4, pass_mask[index]):
        if counter%1000 == 0: print(counter)
        counter+=1
        for i,pair1 in enumerate(p4[row][:-1]):
            for j,pair2 in enumerate(p4[row][i:]):
                m = (pair1 + pair2).mass
                inv_Y_cand  = np.append(inv_Y_cand, m)
                for pair3 in p4[row][j:]:
                    m = (pair1 + pair2 + pair3).mass
                    inv_X_cand = np.append(inv_X_cand, m)
    Y_cands.append(inv_Y_cand)
    X_cands.append(inv_X_cand)

  0%|          | 0/5 [00:00<?, ?it/s]

0
1000
2000
3000
4000
5000


 20%|██        | 1/5 [01:05<04:20, 65.10s/it]

0
1000
2000
3000
4000
5000


 40%|████      | 2/5 [01:25<01:55, 38.61s/it]

0
1000
2000
3000
4000


 60%|██████    | 3/5 [01:33<00:49, 24.71s/it]

5000
0
1000
2000
3000
4000


 80%|████████  | 4/5 [01:37<00:16, 16.62s/it]

5000
0
1000
2000
3000
4000


100%|██████████| 5/5 [01:38<00:00, 19.77s/it]

5000





In [24]:
fig, axs = plt.subplots(nrows=2, figsize=(10,6))
for i,inv_Y in enumerate(Y_cands):
    hist(axs[0], inv_Y, bins=np.linspace(0,1000,100), label=round(thresholds[cuts[i]],3))
axs[0].legend()
axs[0].set_xlabel(r'$m_\mathrm{inv}$ [GeV]')
axs[0].ticklabel_format(style='plain')
for i,inv_X in enumerate(X_cands):
    hist(axs[1], inv_X, bins=np.linspace(0,1000,100), label=round(thresholds[cuts[i]],3))
axs[1].legend()
axs[1].set_xlabel(r'$m_\mathrm{inv}$ [GeV]')
axs[1].ticklabel_format(style='plain')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [25]:
n_cut = []
for cut in tqdm(thresholds):
    n = [np.array(()) for _ in range(16)]
    for evt in sbe:
        n_evt = np.sum(evt > cut)
        sorted_evt = np.append(np.sort(evt[:3])[::-1], np.sort(evt[3:])[::-1])
        n[n_evt] = np.append(n[n_evt], sorted_evt)
    n_cut.append(n)

100%|██████████| 101/101 [00:14<00:00,  6.84it/s]


In [26]:
fig, axs = plt.subplots(nrows=6, ncols=3, figsize=(10,10))
n_cut[50]
for i,ax in enumerate(axs.flat):
    ax.set_title(f'{i} passing pairs', fontsize=12)
    if i >= 16: continue
    if len(n_cut[50]) > 0:
        ax.hist(n_cut[50][i], bins=thresholds)
        
        
plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [27]:
x = thresholds
y = []
for n in np.arange(16):
    y_pair = np.array(())
    for i,cut in enumerate(thresholds):
        # Select only events with number of passing pairs less than or equal to n
        max_pair_mask = num_passing[i] <= n
        y_pair = np.append(y_pair, np.sum(higgs_passing[i][max_pair_mask])/np.sum(max_pair_mask))
    y.append(y_pair)

In [28]:
fig, ax = plt.subplots(figsize=(8,5))
for i,y_pair in enumerate(y):
    if i < 3: continue
    if i < 8: continue
    ax.plot(x, y_pair, label=fr'$\leq${i} passing pairs')
ax.set_xlabel('Classifier Score Cut')
ax.set_ylabel('Ratio of events with three passing Higgs')
ax.set_ylim(0,1)
ax.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc='upper left')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x7fdfa5046f10>

In [29]:
x_dict = {'HX':0,
 'HY1':1,
 'HY2':2,
 'X b1, Y1 b1':3,
 'X b1, Y1 b2':4,
 'X b1, Y2 b1':5,
 'X b1, Y2 b2':6,
 'X b2, Y1 b1':7,
 'X b2, Y1 b2':8,
 'X b2, Y2 b1':9,
 'X b2, Y2 b2':10,
 'Y1 b1, Y2 b1':11,
 'Y1 b1, Y2 b2':12,
 'Y1 b2, Y2 b1':13,
 'Y1 b2, Y2 b2':14}

In [30]:
x_drop = ['HX',
 'HY1',
 'HY2',
 'X b1, Y1 b1',
 'X b1, Y1 b2',
 'X b1, Y2 b1',
 'X b1, Y2 b2',
 'X b2, Y1 b1',
 'X b2, Y1 b2',
 'X b2, Y2 b1',
 'X b2, Y2 b2',
 'Y1 b1, Y2 b1',
 'Y1 b1, Y2 b2',
 'Y1 b2, Y2 b1',
 'Y1 b2, Y2 b2']