# Evaluate Classifier Techniques

This notebook compares the efficiency of various techniques:
1. Choose the top three scoring pairs (how often are these the three Higgs?)
2. Set a cut on score and invariant mass

## Load Model, Obtain Classifier Scores

In [1]:
from keras.models import model_from_json
import numpy as np
from uproot3_methods import TLorentzVectorArray

In [2]:
## LOAD MODEL
model_dir = f'../models/classifier/reco/model/'
json_file = open(model_dir + f'model_1.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights(model_dir + f'model_1.h5')

## LOAD INPUTS
file = np.load("../inputs/reco/nn_input_MX700_MY400_classifier.npz")
nevents = len(np.concatenate((file['test'], file['train'], file['val'])))

In [3]:
x_test = file['x_test'] # normalized test examples
X_test = file['X_test'] # unmodified test examples
ntest = len(file['test'])

scores = model.predict(x_test)
scores = scores[:,0]
rscores = np.around(scores, decimals=3)

thresholds = np.arange(0.0,1.01,0.01)

## Calculate invariant mass of all pairs, of Y pairs, and X triplets

In [67]:
npairs = 15

In [68]:
# invariant mass of all pairings
pair_inv1 = TLorentzVectorArray.from_ptetaphim(X_test[:,0], X_test[:,1], X_test[:,2], np.repeat(4e-9,len(X_test)))
pair_inv2 = TLorentzVectorArray.from_ptetaphim(X_test[:,3], X_test[:,4], X_test[:,5], np.repeat(4e-9,len(X_test)))
pair_inv = pair_inv1 + pair_inv2

pair_p4 = np.reshape(pair_inv, (ntest,npairs))

pair_mass = np.reshape(pair_inv.mass, (ntest, npairs))
true_Y = pair_p4[:,1] + pair_p4[:,2]
true_X = pair_p4[:,1] + pair_p4[:,2] +  pair_p4[:,0]

true_Y_m = np.array(())
true_X_m = np.array(())

for X, Y in zip(true_X, true_Y):
    true_Y_m = np.append(true_Y_m, Y.mass)
    true_X_m = np.append(true_X_m, X.mass)

## Cut on discriminator

In [69]:
import matplotlib as mpl
%matplotlib widget
import matplotlib.pyplot as plt

In [70]:
from matplotlib.gridspec import GridSpec

In [71]:
from consistent_plots import hist

In [100]:
score_cut = 0.15
# score_cut = 0.52

class_cut = scores > score_cut
np.sum(class_cut[2::npairs])

37503

In [101]:
features_class_cut = X_test[class_cut,:]

In [102]:
mass_class_cut = pair_inv.mass.ravel()[class_cut]
dR_class_cut = X_test[:,-1][class_cut]

In [124]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(13,5))

ax = axs[0]
_ = hist(ax, mass_class_cut, bins=np.linspace(0,300,100))
ax.set_xlabel(r'pair $m_\mathrm{inv}$ [GeV]')
ax.set_title(f'Invariant Mass of Pairs with discriminator > {score_cut}')

ax = axs[1]
_ = hist(ax, dR_class_cut, bins=np.linspace(0,4,100))
ax.set_xlabel(r'pair $\Delta R$')
ax.set_title(f'Angular Separation Between Pairs with discriminator > {score_cut}')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 1.0, 'Angular Separation Between Pairs with discriminator > 0.15')

In [122]:
plt.close()
fig, ax = plt.subplots()

s = ax.scatter(pair_inv.mass, X_test[:,-1], s=1, c=scores, cmap='rainbow')
ax.set_xlabel(r'$m_\mathrm{inv}$ [GeV]')
ax.set_ylabel(r'$\Delta R$')
ax.set_xlim(0,500)
plt.colorbar(s)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.colorbar.Colorbar at 0x7fb43f89a970>

In [105]:
H_mask = np.tile(np.concatenate((np.repeat(True, 3), np.repeat(False, 12))), ntest)

In [106]:
plt.close()
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(15,5))

ax = axs[0]
s = ax.scatter(pair_inv.mass.ravel()[~H_mask], X_test[:,-1][~H_mask], s=1)
ax.set_xlabel(r'$m_\mathrm{inv}$ [GeV]')
ax.set_ylabel(r'$\Delta R$')
ax.set_xlim(0,600)

ax = axs[1]
s = ax.scatter(pair_inv.mass.ravel()[H_mask], X_test[:,-1][H_mask], s=1)
ax.set_xlabel(r'$m_\mathrm{inv}$ [GeV]')
ax.set_ylabel(r'$\Delta R$')
ax.set_xlim(0,300)

ax = axs[2]
s = ax.scatter(pair_inv.mass.ravel()[~H_mask], X_test[:,-1][~H_mask], s=1)
s = ax.scatter(pair_inv.mass.ravel()[H_mask], X_test[:,-1][H_mask], s=1)
ax.set_xlabel(r'$m_\mathrm{inv}$ [GeV]')
ax.set_ylabel(r'$\Delta R$')
ax.set_xlim(0,300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(0.0, 300.0)

In [107]:
## APPLY MODEL TO INPUTS
# pscores = scores binned by pair
pscores = []
pinvmass = []
for i in range(npairs):
    pscores.append(scores[np.arange(i, ntest*npairs, npairs)])
    pinvmass.append(pair_inv[np.arange(i, ntest*npairs, npairs)])
pair_scores = np.array((pscores))
pair_mass = np.array((pinvmass))
    
# escores = scores binned by event
escores = []
einvmass = []
for i in range(0, ntest*npairs, npairs):
    escores.append(scores[i:i+npairs])
    einvmass.append(pair_inv[i:i+npairs])
event_scores = np.array((escores))
event_mass = np.array((einvmass))

In [108]:
labels = ['HX',
          'H1',
          'H2',
 'HX b1, H1 b1',
 'HX b1, H1 b2',
 'HX b1, H2 b1',
 'HX b1, H2 b2',
 'HX b2, H1 b1',
 'HX b2, H1 b2',
 'HX b2, H2 b1',
 'HX b2, H2 b2',
 'H1 b1, H2 b1',
 'H1 b1, H2 b2',
 'H1 b2, H2 b1',
 'H1 b2, H2 b2',]

In [109]:
from prettytable import PrettyTable

In [110]:
len(pscores[:3])

3

In [111]:
table = PrettyTable()
table.field_names = ['Pair', 'Efficiency']

for i,label in enumerate(labels):
    table.add_row([label, f'{np.around(np.sum(pscores[i] > score_cut)/len(pscores[i])*100, decimals=1)}%'])
    if  i == 2:
        table.add_row(['Any Higgs', f'{np.around(np.sum(np.any(np.column_stack((pscores[0], pscores[1], pscores[2])) > score_cut, axis=1))/len(pscores[i])*100, decimals=1)}%'])
        table.add_row(['All Higgs', f'{np.around(np.sum(np.all(np.column_stack((pscores[0], pscores[1], pscores[2])) > score_cut, axis=1))/len(pscores[i])*100, decimals=1)}%'])

In [112]:
print(table)

+--------------+------------+
|     Pair     | Efficiency |
+--------------+------------+
|      HX      |   96.9%    |
|      H1      |   97.8%    |
|      H2      |   93.9%    |
|  Any Higgs   |   100.0%   |
|  All Higgs   |   89.0%    |
| HX b1, H1 b1 |    6.1%    |
| HX b1, H1 b2 |   32.0%    |
| HX b1, H2 b1 |   37.5%    |
| HX b1, H2 b2 |   53.9%    |
| HX b2, H1 b1 |    7.7%    |
| HX b2, H1 b2 |   41.4%    |
| HX b2, H2 b1 |   32.1%    |
| HX b2, H2 b2 |   57.3%    |
| H1 b1, H2 b1 |   21.4%    |
| H1 b1, H2 b2 |   52.0%    |
| H1 b2, H2 b1 |   29.5%    |
| H1 b2, H2 b2 |   59.5%    |
+--------------+------------+


In [113]:
score_block = scores.reshape(ntest, npairs)
# np.around(score_block[:3], decimals=3)

In [114]:
sorted_pairs = np.flip(score_block.argsort(), axis=1)

In [115]:
table = PrettyTable()
table.add_column("Pair", ['HX', 'H1', 'H2', 'Any', 'All'])

for i in range(3,6):
    any_higgs = np.sum(np.any(np.logical_or(np.logical_or(sorted_pairs[:,:i] == 0, sorted_pairs[:,:i] == 1), sorted_pairs[:,:i] == 2), axis=1)/(3*ntest))
    all_higgs = np.sum(np.all(np.logical_and(np.logical_and(sorted_pairs[:,:i] == 0, sorted_pairs[:,:i] == 1), sorted_pairs[:,:i] == 2), axis=1)/(3*ntest))
    table.add_column(f"Top {i}", 
                     [f'{round(np.sum(sorted_pairs[:,:i] == 0)/ntest*100, 1)}%',
                      f'{round(np.sum(sorted_pairs[:,:i] == 1)/ntest*100, 1)}%',
                      f'{round(np.sum(sorted_pairs[:,:i] == 2)/ntest*100, 1)}%',
                      f'{round(any_higgs, 1)}%',
                      f'{round(all_higgs, 1)}%'])

In [116]:
print(table)

+------+-------+-------+-------+
| Pair | Top 3 | Top 4 | Top 5 |
+------+-------+-------+-------+
|  HX  | 74.9% | 85.8% | 91.6% |
|  H1  | 82.1% | 89.7% | 93.4% |
|  H2  | 37.8% | 56.5% | 72.2% |
| Any  |  0.3% |  0.3% |  0.3% |
| All  |  0.0% |  0.0% |  0.0% |
+------+-------+-------+-------+


## Collect passing masses based on a score cut

In [117]:
passing_Y_mass = np.array(()) # save mY of events in which all three Higgs pass
passing_X_mass = np.array(()) # save mX of events in which all three Higgs pass
passing_mass = np.array(()) # save mjj of all pairs that pass discriminator cut in all events
pass_p4 = []
pass_score_events = np.array([], dtype=int)
n_pairs_pass = np.array(())

for ievt,(mY, mX, mass, evt, evt_p4) in enumerate(zip(true_Y_m, true_X_m, event_mass, escores, pair_p4)):
    masked_score = evt > score_cut
    n_nH_pairs_pass = np.append(n_pairs_pass, np.sum(masked_score[3:]))
    n_pairs_pass = np.append(n_pairs_pass, np.sum(masked_score))
    passing_mass = np.append(passing_mass, mass[masked_score])
    pass_p4.append(evt_p4[masked_score])
    if np.all(masked_score[:3]):
        passing_Y_mass = np.append(passing_Y_mass, mY)
        passing_X_mass = np.append(passing_X_mass, mX)
        pass_score_events = np.append(pass_score_events, int(ievt))

In [118]:
len(masked_score)

15

In [119]:
plt.close()
fig, ax = plt.subplots()
n, bins = np.histogram(n_pairs_pass, np.arange(13))
n_3H, bins = np.histogram(n_pairs_pass[pass_score_events], np.arange(13))

width=0.3

ax.bar(bins[:-1]-width/2, n, width=width, label='All events')
ax.bar(bins[:-1]+width/2, n_3H, width=width, label='3 Higgs Passing')
ax.set_xlabel(f'Number of Pairs That Pass Discriminator Cut (> {score_cut}) Per Event')
ax.legend()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x7fb43f716670>

In [120]:
plt.close()
fig, ax = plt.subplots()
n, bins = np.histogram(n_pairs_pass, np.arange(17))

width=0.5

ax.bar(bins[:-1], n, label='All events')
ax.set_xlabel(f'Number of Passing Pairs Per Event')
nH_avg = np.average(n_pairs_pass)
ax.text(0.55,0.85,fr"Average = {nH_avg:.2f} Pairs", transform=ax.transAxes)
ax.text(0.55,0.9,fr"NN Score > {score_cut:.2f} Pairs", transform=ax.transAxes)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.55, 0.9, 'NN Score > 0.15 Pairs')

In [93]:
# Loop through pass_p4 (collected above) and calculate invariant dimass of passing

pass_score_dimass = np.array(())
pass_Ymass_events = np.array(())
for ievt,evt in enumerate(pass_p4):
    for i in range(len(evt)-1):
        for j in range(i+1, len(evt)):
            p4 = evt[i] + evt[j]
            m = p4.mass
            if ievt in pass_score_events and m < 450 and m > 300:
                pass_score_dimass = np.append(pass_score_dimass, m)
                pass_Ymass_events = np.append(pass_Ymass_events, ievt)

In [94]:
pass_score_trimass = np.array(())
for evt in pass_p4:
    for i in range(len(evt)-2):
        for j in range(i+1, len(evt)-1):
            for k in range(j+1, len(evt)):
                p4 = evt[i] + evt[j] + evt[k]
                m = p4.mass
                pass_score_trimass = np.append(pass_score_trimass, m)

In [95]:
plt.close()
bins = np.linspace(0,1000,100)
fig, ax = plt.subplots()
hist(ax, true_Y_m, bins=bins, label='no cut')
hist(ax, passing_Y_mass, bins=bins, label=f'score > {score_cut}')
ax.set_xlabel(r'$m_Y$ [GeV]')
ax.legend()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x7fb51032bd00>