# Mass Regression Notebook
In this notebook we will explore the first concepts of how to apply a neural network or deep learning to the 2-D mass plane.

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import uproot as ur
import awkward as ak

print('Numpy version: {}'.format(np.__version__))
print('Uproot version: {}'.format(ur.__version__))
print('Awkward version: {}'.format(ak.__version__))

Numpy version: 1.23.1
Uproot version: 4.3.3
Awkward version: 1.8.0


## Uproot files

In [3]:
rootfile_prefix = '/fast_scratch_1/atlas_images/XhhNTuple/'
rfile = 'user.zhenw.29137978._000001.MiniNTuple.root'

In [4]:
uprooted = ur.open(rootfile_prefix+rfile)
uprooted.keys()

['XhhMiniNtuple;1',
 'cutflow_XhhMiniNtuple;1',
 'cutflow_weighted_XhhMiniNtuple;1',
 'MetaData_EventCount_XhhMiniNtuple;1']

In [5]:
MNTuple = uprooted['XhhMiniNtuple']
# events.arrays(["px1", "py1", "pz1"])
MNTuple.show(name_width=32,
            interpretation_width=30)

name                             | typename                 | interpretation                
---------------------------------+--------------------------+-------------------------------
runNumber                        | int32_t                  | AsDtype('>i4')
eventNumber                      | int64_t                  | AsDtype('>i8')
lumiBlock                        | int32_t                  | AsDtype('>i4')
coreFlags                        | uint32_t                 | AsDtype('>u4')
bcid                             | int32_t                  | AsDtype('>i4')
mcEventNumber                    | int32_t                  | AsDtype('>i4')
mcChannelNumber                  | int32_t                  | AsDtype('>i4')
mcEventWeight                    | float                    | AsDtype('>f4')
NPV                              | int32_t                  | AsDtype('>i4')
actualInteractionsPerCrossing    | float                    | AsDtype('>f4')
averageInteractionsPerCrossing   | float    

## Load Utils

In [6]:
import os
import sys
cwd = os.getcwd()
path_head, path_tail = os.path.split(cwd)
sys.path.append(path_head+'/utils')
from ml_utils import dict_from_tree, DeltaR
from time import perf_counter as cput

## Load Data

In [7]:
branches = ['boosted_nGoodJets', "nboostedJets", "boostedJets_m",
            "boostedJets_pt", "boostedJets_phi", "boostedJets_eta",
           "truth_mHH", 'truthjet_antikt10_pt', 'truthjet_antikt10_eta',
           'truthjet_antikt10_phi', 'truthjet_antikt10_m']
np_branches = ['eventNumber']

In [8]:
t0 = cput()
hh4b_dict = dict_from_tree(MNTuple, branches, np_branches)
t1 = cput()
method_1_time = t1 - t0
print('Time to load arrays: {:8.4f} (s)'.format(method_1_time))

nEvents = len(hh4b_dict['eventNumber'])
print('{} Events'.format(nEvents))

Time to load arrays:   0.9776 (s)
339978 Events


In [9]:
gt_twoJets = np.zeros(nEvents, dtype=bool)
t0 = cput()
for i in range(nEvents):
    boostedJets_pt = ak.to_numpy(hh4b_dict['boostedJets_pt'][i])
    
    if len(boostedJets_pt) >=2:
        gt_twoJets[i] = True

n2jets = np.count_nonzero(gt_twoJets)
print(n2jets)
t1 = cput()
print(t1 - t0)

95892
28.644314205273986


# Fill arrays with inputs

In [10]:
evt_idx = np.arange(nEvents)[gt_twoJets]
matched_jets = []

t0 = cput()
for i, evt in enumerate(evt_idx):
    
    nTruthJets = ak.to_numpy(hh4b_dict['truthjet_antikt10_m'][evt]).shape[0]
    truthJet_coords = np.empty((nTruthJets, 2))
    # this can be vectorized easily (too tired)
    for j in range(nTruthJets):
        truthJet_coords[j,0] = hh4b_dict['truthjet_antikt10_eta'][evt][j]
        truthJet_coords[j,1] = hh4b_dict['truthjet_antikt10_phi'][evt][j]
    
    BoostedJet0_eta = hh4b_dict['boostedJets_eta'][evt][0]
    BoostedJet0_phi = hh4b_dict['boostedJets_phi'][evt][0]
    BoostedJet0_coords = np.array([BoostedJet0_eta, BoostedJet0_phi])
    LeadingJet_DR_arr = DeltaR(truthJet_coords, BoostedJet0_coords)
    # print(LeadingJet_DR_arr)
    LJ_DR = np.min(LeadingJet_DR_arr)
    LJ_DR_idx = np.argmin(LeadingJet_DR_arr)
    
    BoostedJet1_eta = hh4b_dict['boostedJets_eta'][evt][1]
    BoostedJet1_phi = hh4b_dict['boostedJets_phi'][evt][1]
    BoostedJet1_coords = np.array([BoostedJet1_eta, BoostedJet1_phi])
    subLeadingJet_DR_arr = DeltaR(truthJet_coords, BoostedJet1_coords)
    # print(subLeadingJet_DR_arr)
    SLJ_DR = np.min(subLeadingJet_DR_arr)
    SLJ_DR_idx = np.argmin(subLeadingJet_DR_arr)
    
    if SLJ_DR_idx != LJ_DR_idx:
        if LJ_DR < .1 and SLJ_DR < .1:
            matched_jets.append([i, LJ_DR_idx, SLJ_DR_idx])
    
    # print();print()

t1 = cput()
print('Time to complete jet matching: {:6.3f} (m)'.format((t1 - t0)/60))
print();print()
matched_jets = np.array(matched_jets)
for i in range(20):
    print(matched_jets[i])

print(matched_jets.shape)

Time to complete jet matching:  1.623 (m)


[0 0 1]
[1 0 1]
[2 0 1]
[3 0 1]
[4 0 1]
[5 0 1]
[6 0 1]
[7 0 1]
[8 0 1]
[9 0 1]
[10  0  1]
[11  1  0]
[12  0  1]
[13  0  1]
[14  0  1]
[15  0  1]
[16  0  1]
[17  0  1]
[18  0  1]
[19  1  0]
(94859, 3)


In [11]:
X = np.empty((matched_jets.shape[0],8))
Y = np.empty((matched_jets.shape[0],2))

In [12]:
for i in range(matched_jets.shape[0]):
    arr_slc = matched_jets[i]
    evt = arr_slc[0]
    ld_idx = arr_slc[1]
    sld_idx = arr_slc[2]
    
    print(evt)
    nboosturd = len(ak.to_numpy(hh4b_dict['boostedJets_pt'][evt]))
    print(nboosturd)
    
    # Leading
    X[evt,0] = hh4b_dict['boostedJets_m'][evt][0]
    X[evt,1] = hh4b_dict['boostedJets_pt'][evt][0]
    X[evt,2] = hh4b_dict['boostedJets_eta'][evt][0]
    X[evt,3] = hh4b_dict['boostedJets_phi'][evt][0]
    
    # Leading
    X[evt,4] = hh4b_dict['boostedJets_m'][evt][1]
    X[evt,5] = hh4b_dict['boostedJets_pt'][evt][1]
    X[evt,6] = hh4b_dict['boostedJets_eta'][evt][1]
    X[evt,7] = hh4b_dict['boostedJets_phi'][evt][1]
    
    # Truth Jet
    Y[evt,0] = hh4b_dict['truthjet_antikt10_m'][ld_idx]
    Y[evt,1] = hh4b_dict['truthjet_antikt10_m'][sld_idx]

0
1


ValueError: in NumpyArray attempting to get 1, index out of range

(https://github.com/scikit-hep/awkward-1.0/blob/1.8.0/src/libawkward/array/NumpyArray.cpp#L1217)

### Need to debug why this is happening???

In [13]:
X = np.random.uniform(low=-1, high=1.0, size=(matched_jets.shape[0],8))
Y = np.random.uniform(low=-1, high=1.0, size=(matched_jets.shape[0],2))

In [18]:
from ml_mass_regression_models import Dumb_Network
import tensorflow as tf
from tensorflow import keras

import os
os.environ['CUDA_VISIBLE_DEVICES'] = "1"
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

In [19]:
test_model = Dumb_Network(num_features=8)
test_model.compile(loss='mse', optimizer=keras.optimizers.Adam(
    learning_rate=.001))
test_model.summary()

Model: "dum-dum-net"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 8)]               0         
_________________________________________________________________
dense_0 (Dense)              (None, 100)               900       
_________________________________________________________________
activation_0 (Activation)    (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
activation_1 (Activation)    (None, 100)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
activation_2 (Activation)    (None, 100)               

In [21]:
history = test_model.fit(X,
              Y,
              epochs=10,
              batch_size=1000)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
