In [1]:
print("Importing numpy.")
import numpy as np

print("Importing matplotlib.")
import matplotlib
matplotlib.rc('xtick', labelsize=14) 
matplotlib.rc('ytick', labelsize=14) 
from matplotlib import gridspec
import matplotlib.pyplot as plt
%matplotlib widget
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from matplotlib import rcParams
rcParams['font.family'] = 'serif'
rcParams['font.sans-serif'] = ['Times New Roman']

print("Importing ipy.")
from ipywidgets import interact, widgets, FloatSlider, Dropdown, Checkbox, VBox
from IPython.display import display, clear_output

print("Importing PIL.")
from PIL import Image

print("Importing pandas.")
from pandas import DataFrame

import warnings
warnings.filterwarnings("ignore")

print("Importing keras.")
from keras.models import model_from_json

from tqdm import tqdm

from uproot3_methods import TLorentzVectorArray

Importing numpy.
Importing matplotlib.
Importing ipy.
Importing PIL.
Importing pandas.
Importing keras.


In [2]:
import matplotlib.patches as mpatches
from matplotlib.collections import PatchCollection

In [3]:
from consistent_plots import hist

In [4]:
from matplotlib.gridspec import GridSpec

In [5]:
## LOAD MODEL
model_dir = f'models/classifier/reco/model/'
json_file = open(model_dir + f'model_1.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights(model_dir + f'model_1.h5')

## LOAD INPUTS
file = np.load("inputs/reco/nn_input_MX700_MY400_classifier.npz")
nevents = len(np.concatenate((file['test'], file['train'], file['val'])))

In [6]:
x_test = file['x_test']
X_test = file['X_test']
ntest = len(file['test'])

scores = model.predict(x_test)
scores = scores[:,0]
rscores = np.around(scores, decimals=3)

## APPLY MODEL TO INPUTS
pairings = []
score_dist = []
for i in range(15):
    pairings.append(scores[np.arange(i, ntest, 15)])

events = []
for i in range(0, ntest*15, 15):
    events.append(scores[i:i+15])

In [7]:
pair_inv1 = TLorentzVectorArray.from_ptetaphim(X_test[:,0], X_test[:,1], X_test[:,2], np.repeat(4e-9,len(X_test)))
pair_inv2 = TLorentzVectorArray.from_ptetaphim(X_test[:,3], X_test[:,4], X_test[:,5], np.repeat(4e-9,len(X_test)))
pair_inv = pair_inv1 + pair_inv2

In [8]:
pair_p4 = np.reshape(pair_inv, (5039,15))

In [43]:
pair_mass = np.reshape(pair_inv.mass, (5039, 15))
true_Y = pair_p4[:,1] + pair_p4[:,2]
true_Y_m = np.array(())
for combo in true_Y:
    true_Y_m = np.append(true_Y_m, combo.mass)

In [48]:
true_X = pair_p4[:,1] + pair_p4[:,2] +  pair_p4[:,0]
true_X_m = np.array(())
for combo in true_X:
    true_X_m = np.append(true_X_m, combo.mass)

In [71]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10,5))
_ = hist(ax[0], true_Y_m, bins=np.linspace(0,600,100))
ax[0].set_xlabel(r'Reco $m_Y$  [GeV]')
_ = hist(ax[1], true_X_m, bins=np.linspace(300,1000,100))
ax[1].set_xlabel(r'Reco $m_X$  [GeV]')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'Reco $m_X$  [GeV]')

In [30]:
fig, axs =  plt.subplots(nrows=5, ncols=3, figsize=(10,12))
for i,ax in enumerate(axs.flat):
    hist(ax, pair_mass[:,i], bins=np.linspace(0,300,100))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [10]:
sbe = scores.reshape(5039,15) # scores by event

def get_passing_pairs(cut):
    """Returns how many pairs pass the cut for each event, as well as indices of passing pairs."""
    pass_pairs = []
    pairs_mask = np.array((), dtype=bool)
    n = np.array(())
    higgs = np.array((), dtype=bool)
    for evt in sbe:
        evt_pass = [i for i,s in enumerate(evt) if s > cut]
        pass_pairs.append(evt_pass)
        pairs_mask =  np.append(pairs_mask, evt > cut)
        n  = np.append(n, len(evt_pass))
        higgs  = np.append(higgs, evt[0] > cut and evt[1] > cut and evt[2] > cut)
    return pass_pairs, higgs, n, pairs_mask

In [11]:
thresholds = np.arange(0.0,1.01,0.01)
passing_pairs = []
higgs_passing = []
num_passing = []
pass_mask = []
for cut in tqdm(thresholds):
    pass_pairs, higgs, n,  pair_mask = get_passing_pairs(cut)
    passing_pairs.append(pass_pairs)
    pass_mask.append(pair_mask)
    num_passing.append(n)
    higgs_passing.append(higgs)

100%|██████████| 101/101 [00:40<00:00,  2.51it/s]


In [12]:
for i,masks in enumerate(pass_mask):
    pass_mask[i] = masks.reshape(5039,15)

In [67]:
index = 20

inv_Y_H = np.array(())
inv_X_H = np.array(())
inv_Y_cand = np.array(())
inv_X_cand = np.array(())
counter = 0
for p4, row in zip(pair_p4, pass_mask[index]):
    if counter%1000 == 0: print(counter)
    counter+=1
    if row[0] == True and row[1] == True and row[2] == True:
            inv_Y_H = np.append(inv_Y_H, (p4[0] + p4[1]).mass)
            inv_X_H = np.append(inv_X_H, (p4[0] + p4[1] + p4[2]).mass)
    for i,pair1 in enumerate(p4[row][:-1]):
        for j,pair2 in enumerate(p4[row][i:]):
            m = (pair1 + pair2).mass
            inv_Y_cand  = np.append(inv_Y_cand, m)
            for pair3 in p4[row][j:]:
                m = (pair1 + pair2 + pair3).mass
                inv_X_cand = np.append(inv_X_cand, m)

0
1000
2000
3000
4000
5000


In [68]:
fig, axs = plt.subplots(nrows=2, figsize=(10,6))
hist(axs[0], inv_Y_cand, bins=np.linspace(0,1000,100), label='0.200')
hist(axs[0], inv_Y_H, bins=np.linspace(0,1000,100), label='0.200')
axs[0].legend()
axs[0].set_xlabel(r'$m_\mathrm{inv}$ [GeV]')
axs[0].ticklabel_format(style='plain')
hist(axs[1], inv_X_cand, bins=np.linspace(0,1000,100), label='0.200')
hist(axs[1], inv_X_H, bins=np.linspace(0,1000,100), label='0.200')
axs[1].legend()
axs[1].set_xlabel(r'$m_\mathrm{inv}$ [GeV]')
axs[1].ticklabel_format(style='plain')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [15]:
cuts = [50, 75, 85, 90, 95]

Y_cands = []
X_cands = []
cut_count = 0
for index in tqdm(cuts):
    inv_Y_cand = np.array(())
    inv_X_cand = np.array(())
    counter = 0
    for p4, row in zip(pair_p4, pass_mask[index]):
        if counter%1000 == 0: print(counter)
        counter+=1
        for i,pair1 in enumerate(p4[row][:-1]):
            for j,pair2 in enumerate(p4[row][i:]):
                m = (pair1 + pair2).mass
                inv_Y_cand  = np.append(inv_Y_cand, m)
                for pair3 in p4[row][j:]:
                    m = (pair1 + pair2 + pair3).mass
                    inv_X_cand = np.append(inv_X_cand, m)
    Y_cands.append(inv_Y_cand)
    X_cands.append(inv_X_cand)

  0%|          | 0/5 [00:00<?, ?it/s]

0
1000
2000
3000
4000
5000


 20%|██        | 1/5 [03:27<13:51, 207.99s/it]

0
1000
2000
3000
4000
5000


 40%|████      | 2/5 [03:59<05:13, 104.35s/it]

0
1000
2000
3000
4000
5000


 60%|██████    | 3/5 [04:14<02:07, 63.58s/it] 

0
1000
2000
3000
4000


 80%|████████  | 4/5 [04:23<00:41, 41.82s/it]

5000
0
1000
2000
3000
4000


100%|██████████| 5/5 [04:26<00:00, 53.36s/it]

5000





In [60]:
fig, axs = plt.subplots(nrows=2, figsize=(10,6))
for i,inv_Y in enumerate(Y_cands):
    hist(axs[0], inv_Y, bins=np.linspace(0,1000,100), label=round(thresholds[cuts[i]],3))
axs[0].legend()
axs[0].set_xlabel(r'$m_\mathrm{inv}$ [GeV]')
axs[0].ticklabel_format(style='plain')
for i,inv_X in enumerate(X_cands):
    hist(axs[1], inv_X, bins=np.linspace(0,1000,100), label=round(thresholds[cuts[i]],3))
axs[1].legend()
axs[1].set_xlabel(r'$m_\mathrm{inv}$ [GeV]')
axs[1].ticklabel_format(style='plain')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [17]:
n_cut = []
for cut in tqdm(thresholds):
    n = [np.array(()) for _ in range(16)]
    for evt in sbe:
        n_evt = np.sum(evt > cut)
        sorted_evt = np.append(np.sort(evt[:3])[::-1], np.sort(evt[3:])[::-1])
        n[n_evt] = np.append(n[n_evt], sorted_evt)
    n_cut.append(n)

100%|██████████| 101/101 [00:19<00:00,  5.18it/s]


In [18]:
fig, axs = plt.subplots(nrows=6, ncols=3, figsize=(10,10))
n_cut[50]
for i,ax in enumerate(axs.flat):
    ax.set_title(f'{i} passing pairs', fontsize=12)
    if i >= 16: continue
    if len(n_cut[50]) > 0:
        ax.hist(n_cut[50][i], bins=thresholds)
        
        
plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [55]:
x = thresholds
y = []
for n in np.arange(16):
    y_pair = np.array(())
    for i,cut in enumerate(thresholds):
        # Select only events with number of passing pairs less than or equal to n
        max_pair_mask = num_passing[i] <= n
        y_pair = np.append(y_pair, np.sum(higgs_passing[i][max_pair_mask])/np.sum(max_pair_mask))
    y.append(y_pair)

In [58]:
fig, ax = plt.subplots(figsize=(8,5))
for i,y_pair in enumerate(y):
    if i < 3: continue
    if i < 8: continue
    ax.plot(x, y_pair, label=fr'$\leq${i} passing pairs')
ax.set_xlabel('Classifier Score Cut')
ax.set_ylabel('Ratio of events with three passing Higgs')
ax.set_ylim(0,1)
ax.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc='upper left')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x7f90cd963cd0>

In [37]:
x_dict = {'HX':0,
 'HY1':1,
 'HY2':2,
 'X b1, Y1 b1':3,
 'X b1, Y1 b2':4,
 'X b1, Y2 b1':5,
 'X b1, Y2 b2':6,
 'X b2, Y1 b1':7,
 'X b2, Y1 b2':8,
 'X b2, Y2 b1':9,
 'X b2, Y2 b2':10,
 'Y1 b1, Y2 b1':11,
 'Y1 b1, Y2 b2':12,
 'Y1 b2, Y2 b1':13,
 'Y1 b2, Y2 b2':14}

In [38]:
x_drop = ['HX',
 'HY1',
 'HY2',
 'X b1, Y1 b1',
 'X b1, Y1 b2',
 'X b1, Y2 b1',
 'X b1, Y2 b2',
 'X b2, Y1 b1',
 'X b2, Y1 b2',
 'X b2, Y2 b1',
 'X b2, Y2 b2',
 'Y1 b1, Y2 b1',
 'Y1 b1, Y2 b2',
 'Y1 b2, Y2 b1',
 'Y1 b2, Y2 b2']