In [1]:
import numpy as np
import awkward as ak
import uproot_methods

In [2]:
import logging
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s: %(message)s')

In [3]:
def stack_arrays(a, keys, axis=-1):
    flat_arr = np.stack([a[k].flatten() for k in keys], axis=axis)
    return awkward.JaggedArray.fromcounts(a[keys[0]].counts, flat_arr)

In [4]:
def pad_array(a, maxlen, value=0., dtype='float32'):
    x = (np.ones((len(a), maxlen)) * value).astype(dtype)
    for idx, s in enumerate(a):
        if not len(s):
            continue
        trunc = s[:maxlen].astype(dtype)
        x[idx, :len(trunc)] = trunc
    return x

In [5]:
##and Professor suggests that we could use mass, classifacation for later application
def SetAKArr(filepath):
    with open(filepath, 'r') as file:
        lines = file.readlines()
    n_particles_ls = []
    px_ls = []
    py_ls = []
    pz_ls = []
    energy_ls = []
    mass_ls = []
    charge_ls = []
    _label1 = []
    _label2 = []
    _label3 = []
    _label4 = []
    _label5 = []
    
    n = 0
    for line in lines:
        if line.startswith('E'):
            if not n == 0:
                n_particles_ls.append(n)
                n = 0
            exp_inf = line.split()
#             _label1.append(int(exp_inf[1]))
#             _label2.append(1-int(exp_inf[1]))
#             _label1.append(1)
#             _label2.append(0)
            _label1.append(float(exp_inf[1]))
            _label2.append(float(exp_inf[2]))
            _label3.append(float(exp_inf[3]))
            _label4.append(float(exp_inf[4]))
            _label5.append(float(exp_inf[5]))
        else:
            par = line.split()
            ##particle +1
            n = n + 1
            px_ls.append(abs(float(par[2])))
            py_ls.append(abs(float(par[3])))
            pz_ls.append(abs(float(par[4])))
            energy_ls.append(abs(float(par[5])))
            mass_ls.append(abs(float(par[6])))  
            charge_ls.append(int(par[0]))
#             px_ls.append(6)
#             py_ls.append(2)
#             pz_ls.append(3)
#             energy_ls.append(4)
#             mass_ls.append(5)
    if not n == 0:
        n_particles_ls.append(n)
    px_arr = np.array(px_ls)
    py_arr = np.array(py_ls)
    pz_arr = np.array(pz_ls)
    energy_arr = np.array(energy_ls)
    mass_arr = np.array(mass_ls)
    charge_arr = np.array(charge_ls)
    n_particles = np.array(n_particles_ls)

#     print(n_particles)
    px = ak.JaggedArray.fromcounts(n_particles, px_arr)
    py = ak.JaggedArray.fromcounts(n_particles, py_arr)
    pz = ak.JaggedArray.fromcounts(n_particles, pz_arr)
    energy = ak.JaggedArray.fromcounts(n_particles, energy_arr)
    mass = ak.JaggedArray.fromcounts(n_particles, mass_arr)
    charge = ak.JaggedArray.fromcounts(n_particles, charge_arr)
    p4 = uproot_methods.TLorentzVectorArray.from_cartesian(px, py, pz, energy)
    ##Create an Order Dic
    from collections import OrderedDict
    v = OrderedDict()
    v['part_px'] = px
#     print(px)
    v['part_py'] = py
    v['part_pz'] = pz
    v['part_energy'] = energy
    v['part_mass'] = mass
    v['charge'] = charge
    v['part_e_log'] = np.log(energy)
    v['part_px_log'] = np.log(px)
    v['part_py_log'] = np.log(py)
    v['part_pz_log'] = np.log(pz)
    v['part_m_log'] = np.log(mass)
#     ls1 = [1,2,3,4]
#     ls2 = [5,6,7,8]
#     v['label'] = np.stack((_label1, _label2, _label3, _label4, _label5), axis=-1)
#     print(v['label'])
#     v['label'] = np.stack((_label1, _label2, _label3, _label4, _label5), axis = -1)
    v['label'] = np.stack(_label5, axis = -1)
#     print(v['label'])
    return v

In [6]:
class Dataset(object):
    def __init__(self, filepath, feature_dict = {}, label = 'label', pad_len=100, data_format='channel_first'):
        self.filepath = filepath
        self.feature_dict = feature_dict
        if len(feature_dict) == 0:
            feature_dict['points'] = ['part_px','part_py','part_pz']
            feature_dict['features'] = ['part_energy', 'part_mass', 'charge', 'part_px', 'part_py', 'part_pz']
            feature_dict['mask'] = ['part_energy']
        ##currently we use 'E' for experiments
        self.label = label
        self.pad_len = pad_len
        assert data_format in ('channel_first', 'channel_last')
        self.stack_axis = 1 if data_format=='channel_first' else -1
        self._values = {}
        self._label = None
        self._load()
        
    def _load(self):
        logging.info('Start loading file %s' % self.filepath)
#         counts = None
        a = SetAKArr(self.filepath)
        self._label = a[self.label]
        for k in self.feature_dict:
                cols = self.feature_dict[k]
                if not isinstance(cols, (list, tuple)):
                    cols = [cols]
                arrs = []
                for col in cols:
                    arrs.append(pad_array(a[col], self.pad_len))
                    ##check the dimesion of a[col], and it should be array.
                self._values[k] = np.stack(arrs, axis=self.stack_axis)
        logging.info('Finished loading file %s' % self.filepath)
        
        
        
    def __len__(self):
        return len(self._label)

    def __getitem__(self, key):
        if key==self.label:
            return self._label
        else:
            return self._values[key]
    
    @property
    def X(self):
        return self._values
    
    @property
    def y(self):
        return self._label

    def shuffle(self, seed=None):
        if seed is not None:
            np.random.seed(seed)
        shuffle_indices = np.arange(self.__len__())
        np.random.shuffle(shuffle_indices)
        for k in self._values:
            self._values[k] = self._values[k][shuffle_indices]
        self._label = self._label[shuffle_indices]

In [7]:
train_dataset = Dataset('train.txt', data_format='channel_last')
val_dataset = Dataset('val.txt', data_format='channel_last')
test_dataset = Dataset('test.txt', data_format = 'channel_last')

[2024-03-04 10:39:02,016] INFO: Start loading file train.txt
  result = getattr(ufunc, method)(*inputs, **kwargs)
[2024-03-04 10:39:02,221] INFO: Finished loading file train.txt
[2024-03-04 10:39:02,222] INFO: Start loading file val.txt
[2024-03-04 10:39:02,244] INFO: Finished loading file val.txt
[2024-03-04 10:39:02,245] INFO: Start loading file test.txt
[2024-03-04 10:39:02,267] INFO: Finished loading file test.txt


In [8]:
import tensorflow as tf
from tensorflow import keras
from tf_keras_model import get_particle_net, get_particle_net_lite

In [9]:
model_type = 'particle_net_lite' # choose between 'particle_net' and 'particle_net_lite'
##this shows the number of classes for classification
# num_classes = train_dataset.y.shape[1]
num_classes = 1
# print(num_classes)
input_shapes = {k:train_dataset[k].shape[1:] for k in train_dataset.X}
if 'lite' in model_type:
    model = get_particle_net_lite(num_classes, input_shapes)
else:
    model = get_particle_net(num_classes, input_shapes)

In [10]:
# Training parameters
batch_size = 1024 if 'lite' in model_type else 384
epochs = 30

In [11]:
def lr_schedule(epoch):
    lr = 1e-3
    if epoch > 10:
        lr *= 0.1
    elif epoch > 20:
        lr *= 0.01
    logging.info('Learning rate: %f'%lr)
    return lr

In [12]:
# model.compile(loss='categorical_crossentropy',
#               optimizer=keras.optimizers.Adam(learning_rate=lr_schedule(0)),
#               metrics=['accuracy'])
# model.compile(loss='log_cosh',
#               optimizer=keras.optimizers.Adam(learning_rate=lr_schedule(0)),
#               metrics=['accuracy'])
model.compile(loss='mean_squared_error',
              optimizer=keras.optimizers.Adam(learning_rate=lr_schedule(0)),
              metrics=['accuracy'])
model.summary()

[2024-03-04 10:39:07,617] INFO: Learning rate: 0.001000


Model: "ParticleNet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
mask (InputLayer)               [(None, 100, 1)]     0                                            
__________________________________________________________________________________________________
tf_op_layer_NotEqual (TensorFlo [(None, 100, 1)]     0           mask[0][0]                       
__________________________________________________________________________________________________
tf_op_layer_Cast (TensorFlowOpL [(None, 100, 1)]     0           tf_op_layer_NotEqual[0][0]       
__________________________________________________________________________________________________
tf_op_layer_Equal (TensorFlowOp [(None, 100, 1)]     0           tf_op_layer_Cast[0][0]           
________________________________________________________________________________________

In [13]:
from tensorflow.keras.callbacks import Callback
class LossLogger(Callback):
    def __init__(self, filename):
        super().__init__()
        self.filename = filename

    def on_epoch_end(self, epoch, logs=None):
        with open(self.filename, 'a') as f:
#             print("Epoch ", epoch + 1,": loss = ", logs["val_loss"], "\n", file = f)
#             if (epoch+1)%5==0 or epoch==0:
            print(logs["val_loss"], file = f)
#             f.write()
loss_logger = LossLogger('loss_log.txt')

In [14]:
# Prepare model model saving directory.
import os
save_dir = 'model_checkpoints'
model_name = '%s_model.{epoch:03d}.h5' % model_type
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

# Prepare callbacks for model saving and for learning rate adjustment.
# checkpoint = keras.callbacks.ModelCheckpoint(filepath='loss.txt',
#                              monitor='val_acc',
#                              verbose=1,
#                              save_best_only=True)
#I change the monitor from val_acc to val_loss
checkpoint = keras.callbacks.ModelCheckpoint(filepath=filepath,
                             monitor='val_loss',
                             verbose=1,
                             save_best_only=True)
lr_scheduler = keras.callbacks.LearningRateScheduler(lr_schedule)
progress_bar = keras.callbacks.ProgbarLogger()
callbacks = [checkpoint, lr_scheduler, loss_logger]
# callbacks = [lr_schedule]

In [15]:
train_dataset.shuffle()
model.fit(train_dataset.X, train_dataset.y,
          batch_size=batch_size,
#           epochs=epochs,
          epochs=30, # --- train only for 1 epoch here for demonstration ---
          validation_data=(val_dataset.X, val_dataset.y),
          shuffle=True,
          callbacks=callbacks)

[2024-03-04 10:39:11,085] INFO: Learning rate: 0.001000


Epoch 1/30
Epoch 00001: val_loss improved from inf to 26.89506, saving model to model_checkpoints/particle_net_lite_model.001.h5


[2024-03-04 10:39:16,208] INFO: Learning rate: 0.001000


Epoch 2/30
Epoch 00002: val_loss improved from 26.89506 to 25.91664, saving model to model_checkpoints/particle_net_lite_model.002.h5


[2024-03-04 10:39:20,120] INFO: Learning rate: 0.001000


Epoch 3/30
Epoch 00003: val_loss improved from 25.91664 to 24.66526, saving model to model_checkpoints/particle_net_lite_model.003.h5


[2024-03-04 10:39:24,027] INFO: Learning rate: 0.001000


Epoch 4/30
Epoch 00004: val_loss improved from 24.66526 to 23.08996, saving model to model_checkpoints/particle_net_lite_model.004.h5


[2024-03-04 10:39:27,938] INFO: Learning rate: 0.001000


Epoch 5/30
Epoch 00005: val_loss improved from 23.08996 to 21.48997, saving model to model_checkpoints/particle_net_lite_model.005.h5


[2024-03-04 10:39:31,867] INFO: Learning rate: 0.001000


Epoch 6/30
Epoch 00006: val_loss improved from 21.48997 to 20.68785, saving model to model_checkpoints/particle_net_lite_model.006.h5


[2024-03-04 10:39:35,830] INFO: Learning rate: 0.001000


Epoch 7/30
Epoch 00007: val_loss did not improve from 20.68785


[2024-03-04 10:39:39,815] INFO: Learning rate: 0.001000


Epoch 8/30
Epoch 00008: val_loss did not improve from 20.68785


[2024-03-04 10:39:43,781] INFO: Learning rate: 0.001000


Epoch 9/30
Epoch 00009: val_loss did not improve from 20.68785


[2024-03-04 10:39:47,918] INFO: Learning rate: 0.001000


Epoch 10/30
Epoch 00010: val_loss did not improve from 20.68785


[2024-03-04 10:39:52,149] INFO: Learning rate: 0.001000


Epoch 11/30
Epoch 00011: val_loss improved from 20.68785 to 20.39607, saving model to model_checkpoints/particle_net_lite_model.011.h5


[2024-03-04 10:39:56,448] INFO: Learning rate: 0.000100


Epoch 12/30
Epoch 00012: val_loss improved from 20.39607 to 20.25451, saving model to model_checkpoints/particle_net_lite_model.012.h5


[2024-03-04 10:40:00,528] INFO: Learning rate: 0.000100


Epoch 13/30
Epoch 00013: val_loss improved from 20.25451 to 20.10238, saving model to model_checkpoints/particle_net_lite_model.013.h5


[2024-03-04 10:40:04,557] INFO: Learning rate: 0.000100


Epoch 14/30
Epoch 00014: val_loss improved from 20.10238 to 19.93992, saving model to model_checkpoints/particle_net_lite_model.014.h5


[2024-03-04 10:40:08,602] INFO: Learning rate: 0.000100


Epoch 15/30
Epoch 00015: val_loss improved from 19.93992 to 19.76786, saving model to model_checkpoints/particle_net_lite_model.015.h5


[2024-03-04 10:40:12,613] INFO: Learning rate: 0.000100


Epoch 16/30
Epoch 00016: val_loss improved from 19.76786 to 19.58534, saving model to model_checkpoints/particle_net_lite_model.016.h5


[2024-03-04 10:40:16,588] INFO: Learning rate: 0.000100


Epoch 17/30
Epoch 00017: val_loss improved from 19.58534 to 19.39331, saving model to model_checkpoints/particle_net_lite_model.017.h5


[2024-03-04 10:40:20,660] INFO: Learning rate: 0.000100


Epoch 18/30
Epoch 00018: val_loss improved from 19.39331 to 19.18932, saving model to model_checkpoints/particle_net_lite_model.018.h5


[2024-03-04 10:40:24,663] INFO: Learning rate: 0.000100


Epoch 19/30
Epoch 00019: val_loss improved from 19.18932 to 18.97365, saving model to model_checkpoints/particle_net_lite_model.019.h5


[2024-03-04 10:40:28,692] INFO: Learning rate: 0.000100


Epoch 20/30
Epoch 00020: val_loss improved from 18.97365 to 18.74618, saving model to model_checkpoints/particle_net_lite_model.020.h5


[2024-03-04 10:40:32,699] INFO: Learning rate: 0.000100


Epoch 21/30
Epoch 00021: val_loss improved from 18.74618 to 18.50646, saving model to model_checkpoints/particle_net_lite_model.021.h5


[2024-03-04 10:40:36,886] INFO: Learning rate: 0.000100


Epoch 22/30
Epoch 00022: val_loss improved from 18.50646 to 18.25844, saving model to model_checkpoints/particle_net_lite_model.022.h5


[2024-03-04 10:40:41,079] INFO: Learning rate: 0.000100


Epoch 23/30
Epoch 00023: val_loss improved from 18.25844 to 18.00086, saving model to model_checkpoints/particle_net_lite_model.023.h5


[2024-03-04 10:40:45,600] INFO: Learning rate: 0.000100


Epoch 24/30
Epoch 00024: val_loss improved from 18.00086 to 17.73530, saving model to model_checkpoints/particle_net_lite_model.024.h5


[2024-03-04 10:40:50,181] INFO: Learning rate: 0.000100


Epoch 25/30
Epoch 00025: val_loss improved from 17.73530 to 17.45712, saving model to model_checkpoints/particle_net_lite_model.025.h5


[2024-03-04 10:40:54,427] INFO: Learning rate: 0.000100


Epoch 26/30
Epoch 00026: val_loss improved from 17.45712 to 17.17020, saving model to model_checkpoints/particle_net_lite_model.026.h5


[2024-03-04 10:40:58,650] INFO: Learning rate: 0.000100


Epoch 27/30
Epoch 00027: val_loss improved from 17.17020 to 16.87692, saving model to model_checkpoints/particle_net_lite_model.027.h5


[2024-03-04 10:41:02,852] INFO: Learning rate: 0.000100


Epoch 28/30
Epoch 00028: val_loss improved from 16.87692 to 16.57355, saving model to model_checkpoints/particle_net_lite_model.028.h5


[2024-03-04 10:41:07,069] INFO: Learning rate: 0.000100


Epoch 29/30
Epoch 00029: val_loss improved from 16.57355 to 16.26581, saving model to model_checkpoints/particle_net_lite_model.029.h5


[2024-03-04 10:41:11,272] INFO: Learning rate: 0.000100


Epoch 30/30
Epoch 00030: val_loss improved from 16.26581 to 15.95359, saving model to model_checkpoints/particle_net_lite_model.030.h5


<tensorflow.python.keras.callbacks.History at 0x7fe8af5c7748>

In [43]:
predictions = model.predict(test_dataset.X)
for prediction in predictions:
    print(prediction)

[2.2355587]
[2.263892]
[1.9904692]
[2.0716598]
[2.1739566]
[2.1478882]
[2.0731783]
[2.1058273]
[2.1574616]
[2.061681]
[2.2142441]
[2.153094]
[2.0303738]
[2.0304387]
[2.1040561]
[2.1510427]
[2.1811533]
[2.0896368]
[2.156884]
[2.0285914]
[2.0999713]
[2.1493244]
[2.1204877]
[2.005537]
[2.1498725]
[2.2014647]
[2.1142707]
[2.141198]
[2.2180314]
[2.1213305]
[2.1032257]
[2.0453799]
[2.101873]
[2.1368816]
[2.1217015]
[2.2747025]
[2.1699953]
[2.1023476]
[2.1776586]
[2.0868165]
[2.1228402]
[2.0104]
[2.169025]
[2.2086189]
[2.0947545]
[2.1251929]
[2.0240119]
[2.205694]
[2.1174688]
[2.2067692]
[2.0292854]
[2.1025047]
[2.106222]
[2.0701768]
[2.1069267]
[2.2311242]
[2.2329934]
[2.1579502]
[2.1621525]
[2.1256273]
[1.9916127]
[1.989718]
[2.0969675]
[2.0354187]
[2.061754]
[2.016888]
[2.2054718]
[2.1494567]
[2.0751963]
[2.2164342]
[2.0571394]
[2.092072]
[2.1033242]
[2.117317]
[2.2183459]
[2.1128912]
[2.1693864]
[2.2150085]
[2.2009718]
[2.1880624]
[2.216729]
[2.1379833]
[2.075752]
[2.1628091]
[2.144723]
[