In [1]:
import numpy as np
import awkward as ak
import uproot_methods

In [2]:
import logging
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s: %(message)s')

In [3]:
def stack_arrays(a, keys, axis=-1):
    flat_arr = np.stack([a[k].flatten() for k in keys], axis=axis)
    return awkward.JaggedArray.fromcounts(a[keys[0]].counts, flat_arr)

In [4]:
def pad_array(a, maxlen, value=0., dtype='float32'):
    x = (np.ones((len(a), maxlen)) * value).astype(dtype)
    for idx, s in enumerate(a):
        if not len(s):
            continue
        trunc = s[:maxlen].astype(dtype)
        x[idx, :len(trunc)] = trunc
    return x

In [5]:
##and Professor suggests that we could use mass, classifacation for later application
def SetAKArr(filepath):
    with open(filepath, 'r') as file:
        lines = file.readlines()
    n_particles_ls = []
    px_ls = []
    py_ls = []
    pz_ls = []
    energy_ls = []
    mass_ls = []
    charge_ls = []
    _label1 = []
    _label2 = []
    _label3 = []
    _label4 = []
    _label5 = []
    
    n = 0
    for line in lines:
        if line.startswith('E'):
            if not n == 0:
                n_particles_ls.append(n)
                n = 0
            exp_inf = line.split()
#             _label1.append(int(exp_inf[1]))
#             _label2.append(1-int(exp_inf[1]))
#             _label1.append(1)
#             _label2.append(0)
            _label1.append(float(exp_inf[1]))
            _label2.append(float(exp_inf[2]))
            _label3.append(float(exp_inf[3]))
            _label4.append(float(exp_inf[4]))
            _label5.append(float(exp_inf[5]))
        else:
            par = line.split()
            ##particle +1
            n = n + 1
            px_ls.append(abs(float(par[2])))
            py_ls.append(abs(float(par[3])))
            pz_ls.append(abs(float(par[4])))
            energy_ls.append(abs(float(par[5])))
            mass_ls.append(abs(float(par[6])))  
            charge_ls.append(int(par[0]))
#             px_ls.append(6)
#             py_ls.append(2)
#             pz_ls.append(3)
#             energy_ls.append(4)
#             mass_ls.append(5)
    if not n == 0:
        n_particles_ls.append(n)
    px_arr = np.array(px_ls)
    py_arr = np.array(py_ls)
    pz_arr = np.array(pz_ls)
    energy_arr = np.array(energy_ls)
    mass_arr = np.array(mass_ls)
    charge_arr = np.array(charge_ls)
    n_particles = np.array(n_particles_ls)

#     print(n_particles)
    px = ak.JaggedArray.fromcounts(n_particles, px_arr)
    py = ak.JaggedArray.fromcounts(n_particles, py_arr)
    pz = ak.JaggedArray.fromcounts(n_particles, pz_arr)
    energy = ak.JaggedArray.fromcounts(n_particles, energy_arr)
    mass = ak.JaggedArray.fromcounts(n_particles, mass_arr)
    charge = ak.JaggedArray.fromcounts(n_particles, charge_arr)
    p4 = uproot_methods.TLorentzVectorArray.from_cartesian(px, py, pz, energy)
    ##Create an Order Dic
    from collections import OrderedDict
    v = OrderedDict()
    v['part_px'] = px
#     print(px)
    v['part_py'] = py
    v['part_pz'] = pz
    v['part_energy'] = energy
    v['part_mass'] = mass
    v['charge'] = charge
    v['part_e_log'] = np.log(energy)
    v['part_px_log'] = np.log(px)
    v['part_py_log'] = np.log(py)
    v['part_pz_log'] = np.log(pz)
    v['part_m_log'] = np.log(mass)
#     ls1 = [1,2,3,4]
#     ls2 = [5,6,7,8]
#     v['label'] = np.stack((_label1, _label2, _label3, _label4, _label5), axis=-1)
#     print(v['label'])
#     v['label'] = np.stack((_label1, _label2, _label3, _label4, _label5), axis = -1)
    v['label'] = np.stack(_label5, axis = -1)
#     print(v['label'])
    return v

In [6]:
class Dataset(object):
    def __init__(self, filepath, feature_dict = {}, label = 'label', pad_len=100, data_format='channel_first'):
        self.filepath = filepath
        self.feature_dict = feature_dict
        if len(feature_dict) == 0:
            feature_dict['points'] = ['part_px','part_py','part_pz']
            feature_dict['features'] = ['part_energy', 'part_mass', 'charge', 'part_px', 'part_py', 'part_pz']
            feature_dict['mask'] = ['part_energy']
        ##currently we use 'E' for experiments
        self.label = label
        self.pad_len = pad_len
        assert data_format in ('channel_first', 'channel_last')
        self.stack_axis = 1 if data_format=='channel_first' else -1
        self._values = {}
        self._label = None
        self._load()
        
    def _load(self):
        logging.info('Start loading file %s' % self.filepath)
#         counts = None
        a = SetAKArr(self.filepath)
        self._label = a[self.label]
        for k in self.feature_dict:
                cols = self.feature_dict[k]
                if not isinstance(cols, (list, tuple)):
                    cols = [cols]
                arrs = []
                for col in cols:
                    arrs.append(pad_array(a[col], self.pad_len))
                    ##check the dimesion of a[col], and it should be array.
                self._values[k] = np.stack(arrs, axis=self.stack_axis)
        logging.info('Finished loading file %s' % self.filepath)
        
        
        
    def __len__(self):
        return len(self._label)

    def __getitem__(self, key):
        if key==self.label:
            return self._label
        else:
            return self._values[key]
    
    @property
    def X(self):
        return self._values
    
    @property
    def y(self):
        return self._label

    def shuffle(self, seed=None):
        if seed is not None:
            np.random.seed(seed)
        shuffle_indices = np.arange(self.__len__())
        np.random.shuffle(shuffle_indices)
        for k in self._values:
            self._values[k] = self._values[k][shuffle_indices]
        self._label = self._label[shuffle_indices]

In [7]:
train_dataset = Dataset('train.txt', data_format='channel_last')
val_dataset = Dataset('val.txt', data_format='channel_last')
test_dataset = Dataset('test.txt', data_format = 'channel_last')

[2024-03-07 08:57:45,079] INFO: Start loading file train.txt
  result = getattr(ufunc, method)(*inputs, **kwargs)
[2024-03-07 08:57:45,283] INFO: Finished loading file train.txt
[2024-03-07 08:57:45,284] INFO: Start loading file val.txt
[2024-03-07 08:57:45,306] INFO: Finished loading file val.txt
[2024-03-07 08:57:45,306] INFO: Start loading file test.txt
[2024-03-07 08:57:45,328] INFO: Finished loading file test.txt


In [8]:
import tensorflow as tf
from tensorflow import keras
from tf_keras_model import get_particle_net, get_particle_net_lite

In [9]:
model_type = 'particle_net_lite' # choose between 'particle_net' and 'particle_net_lite'
##this shows the number of classes for classification
# num_classes = train_dataset.y.shape[1]
num_classes = 1
# print(num_classes)
input_shapes = {k:train_dataset[k].shape[1:] for k in train_dataset.X}
if 'lite' in model_type:
    model = get_particle_net_lite(num_classes, input_shapes)
else:
    model = get_particle_net(num_classes, input_shapes)

In [10]:
# Training parameters
batch_size = 1024 if 'lite' in model_type else 384
epochs = 30

In [11]:
def lr_schedule(epoch):
    lr = 1e-3
    if epoch > 10:
        lr *= 0.1
    elif epoch > 20:
        lr *= 0.01
    logging.info('Learning rate: %f'%lr)
    return lr

In [20]:
# model.compile(loss='categorical_crossentropy',
#               optimizer=keras.optimizers.Adam(learning_rate=lr_schedule(0)),
#               metrics=['accuracy'])
# model.compile(loss='log_cosh',
#               optimizer=keras.optimizers.Adam(learning_rate=lr_schedule(0)),
#               metrics=['accuracy'])
model.compile(loss='mean_squared_error',
              optimizer=keras.optimizers.Adam(learning_rate=lr_schedule(0)),
              metrics=['accuracy'])
model.summary()

[2024-03-07 09:05:39,979] INFO: Learning rate: 0.001000


Model: "ParticleNet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
mask (InputLayer)               [(None, 100, 1)]     0                                            
__________________________________________________________________________________________________
tf_op_layer_NotEqual (TensorFlo [(None, 100, 1)]     0           mask[0][0]                       
__________________________________________________________________________________________________
tf_op_layer_Cast (TensorFlowOpL [(None, 100, 1)]     0           tf_op_layer_NotEqual[0][0]       
__________________________________________________________________________________________________
tf_op_layer_Equal (TensorFlowOp [(None, 100, 1)]     0           tf_op_layer_Cast[0][0]           
________________________________________________________________________________________

In [21]:
from tensorflow.keras.callbacks import Callback
class LossLogger(Callback):
    def __init__(self, filename):
        super().__init__()
        self.filename = filename

    def on_epoch_end(self, epoch, logs=None):
        with open(self.filename, 'a') as f:
#             print("Epoch ", epoch + 1,": loss = ", logs["val_loss"], "\n", file = f)
#             if (epoch+1)%5==0 or epoch==0:
            print(logs["val_loss"], file = f)
#             f.write()
loss_logger = LossLogger('MSE.txt')

In [22]:
# Prepare model model saving directory.
import os
save_dir = 'model_checkpoints'
model_name = '%s_model.{epoch:03d}.h5' % model_type
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

# Prepare callbacks for model saving and for learning rate adjustment.
checkpoint = keras.callbacks.ModelCheckpoint(filepath='loss.txt',
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True)
# I change the monitor from val_acc to val_loss
# checkpoint = keras.callbacks.ModelCheckpoint(filepath=filepath,
#                              monitor='val_loss',
#                              verbose=1,
#                              save_best_only=True)
lr_scheduler = keras.callbacks.LearningRateScheduler(lr_schedule)
progress_bar = keras.callbacks.ProgbarLogger()
callbacks = [checkpoint, lr_scheduler, loss_logger]
# callbacks = [lr_schedule]

In [23]:
train_dataset.shuffle()
model.fit(train_dataset.X, train_dataset.y,
          batch_size=batch_size,
#           epochs=epochs,
          epochs=70, ##- train only for 1 epoch here for demonstration ---
          validation_data=(val_dataset.X, val_dataset.y),
          shuffle=True,
          callbacks=callbacks)

[2024-03-07 09:05:44,411] INFO: Learning rate: 0.001000


Epoch 1/70





[2024-03-07 09:05:49,340] INFO: Learning rate: 0.001000


Epoch 2/70





[2024-03-07 09:05:53,120] INFO: Learning rate: 0.001000


Epoch 3/70





[2024-03-07 09:05:56,934] INFO: Learning rate: 0.001000


Epoch 4/70





[2024-03-07 09:06:00,798] INFO: Learning rate: 0.001000


Epoch 5/70





[2024-03-07 09:06:04,660] INFO: Learning rate: 0.001000


Epoch 6/70





[2024-03-07 09:06:08,575] INFO: Learning rate: 0.001000


Epoch 7/70





[2024-03-07 09:06:12,481] INFO: Learning rate: 0.001000


Epoch 8/70





[2024-03-07 09:06:16,423] INFO: Learning rate: 0.001000


Epoch 9/70





[2024-03-07 09:06:20,356] INFO: Learning rate: 0.001000


Epoch 10/70





[2024-03-07 09:06:24,270] INFO: Learning rate: 0.001000


Epoch 11/70





[2024-03-07 09:06:28,199] INFO: Learning rate: 0.000100


Epoch 12/70





[2024-03-07 09:06:32,104] INFO: Learning rate: 0.000100


Epoch 13/70





[2024-03-07 09:06:36,083] INFO: Learning rate: 0.000100


Epoch 14/70





[2024-03-07 09:06:40,051] INFO: Learning rate: 0.000100


Epoch 15/70





[2024-03-07 09:06:44,037] INFO: Learning rate: 0.000100


Epoch 16/70





[2024-03-07 09:06:47,984] INFO: Learning rate: 0.000100


Epoch 17/70





[2024-03-07 09:06:51,958] INFO: Learning rate: 0.000100


Epoch 18/70





[2024-03-07 09:06:55,944] INFO: Learning rate: 0.000100


Epoch 19/70





[2024-03-07 09:07:00,002] INFO: Learning rate: 0.000100


Epoch 20/70





[2024-03-07 09:07:04,124] INFO: Learning rate: 0.000100


Epoch 21/70





[2024-03-07 09:07:08,294] INFO: Learning rate: 0.000100


Epoch 22/70





[2024-03-07 09:07:12,419] INFO: Learning rate: 0.000100


Epoch 23/70





[2024-03-07 09:07:16,570] INFO: Learning rate: 0.000100


Epoch 24/70





[2024-03-07 09:07:20,720] INFO: Learning rate: 0.000100


Epoch 25/70





[2024-03-07 09:07:24,893] INFO: Learning rate: 0.000100


Epoch 26/70





[2024-03-07 09:07:29,074] INFO: Learning rate: 0.000100


Epoch 27/70





[2024-03-07 09:07:33,209] INFO: Learning rate: 0.000100


Epoch 28/70





[2024-03-07 09:07:37,363] INFO: Learning rate: 0.000100


Epoch 29/70





[2024-03-07 09:07:41,481] INFO: Learning rate: 0.000100


Epoch 30/70





[2024-03-07 09:07:45,620] INFO: Learning rate: 0.000100


Epoch 31/70





[2024-03-07 09:07:49,769] INFO: Learning rate: 0.000100


Epoch 32/70





[2024-03-07 09:07:53,907] INFO: Learning rate: 0.000100


Epoch 33/70





[2024-03-07 09:07:58,048] INFO: Learning rate: 0.000100


Epoch 34/70





[2024-03-07 09:08:02,173] INFO: Learning rate: 0.000100


Epoch 35/70





[2024-03-07 09:08:06,309] INFO: Learning rate: 0.000100


Epoch 36/70





[2024-03-07 09:08:10,462] INFO: Learning rate: 0.000100


Epoch 37/70





[2024-03-07 09:08:14,649] INFO: Learning rate: 0.000100


Epoch 38/70





[2024-03-07 09:08:18,805] INFO: Learning rate: 0.000100


Epoch 39/70





[2024-03-07 09:08:22,948] INFO: Learning rate: 0.000100


Epoch 40/70





[2024-03-07 09:08:27,139] INFO: Learning rate: 0.000100


Epoch 41/70





[2024-03-07 09:08:31,277] INFO: Learning rate: 0.000100


Epoch 42/70





[2024-03-07 09:08:35,421] INFO: Learning rate: 0.000100


Epoch 43/70





[2024-03-07 09:08:39,666] INFO: Learning rate: 0.000100


Epoch 44/70





[2024-03-07 09:08:43,821] INFO: Learning rate: 0.000100


Epoch 45/70





[2024-03-07 09:08:47,961] INFO: Learning rate: 0.000100


Epoch 46/70





[2024-03-07 09:08:52,104] INFO: Learning rate: 0.000100


Epoch 47/70





[2024-03-07 09:08:56,254] INFO: Learning rate: 0.000100


Epoch 48/70





[2024-03-07 09:09:00,393] INFO: Learning rate: 0.000100


Epoch 49/70





[2024-03-07 09:09:04,516] INFO: Learning rate: 0.000100


Epoch 50/70





[2024-03-07 09:09:08,656] INFO: Learning rate: 0.000100


Epoch 51/70





[2024-03-07 09:09:12,779] INFO: Learning rate: 0.000100


Epoch 52/70





[2024-03-07 09:09:16,907] INFO: Learning rate: 0.000100


Epoch 53/70





[2024-03-07 09:09:21,045] INFO: Learning rate: 0.000100


Epoch 54/70





[2024-03-07 09:09:25,229] INFO: Learning rate: 0.000100


Epoch 55/70





[2024-03-07 09:09:29,397] INFO: Learning rate: 0.000100


Epoch 56/70





[2024-03-07 09:09:33,550] INFO: Learning rate: 0.000100


Epoch 57/70





[2024-03-07 09:09:37,696] INFO: Learning rate: 0.000100


Epoch 58/70





[2024-03-07 09:09:42,077] INFO: Learning rate: 0.000100


Epoch 59/70





[2024-03-07 09:09:46,571] INFO: Learning rate: 0.000100


Epoch 60/70





[2024-03-07 09:09:51,005] INFO: Learning rate: 0.000100


Epoch 61/70





[2024-03-07 09:09:55,469] INFO: Learning rate: 0.000100


Epoch 62/70





[2024-03-07 09:09:59,920] INFO: Learning rate: 0.000100


Epoch 63/70





[2024-03-07 09:10:04,344] INFO: Learning rate: 0.000100


Epoch 64/70





[2024-03-07 09:10:08,728] INFO: Learning rate: 0.000100


Epoch 65/70





[2024-03-07 09:10:13,113] INFO: Learning rate: 0.000100


Epoch 66/70





[2024-03-07 09:10:17,547] INFO: Learning rate: 0.000100


Epoch 67/70





[2024-03-07 09:10:21,969] INFO: Learning rate: 0.000100


Epoch 68/70





[2024-03-07 09:10:26,421] INFO: Learning rate: 0.000100


Epoch 69/70





[2024-03-07 09:10:30,845] INFO: Learning rate: 0.000100


Epoch 70/70





<tensorflow.python.keras.callbacks.History at 0x7f472cca2358>

In [24]:
with open('MSE_pre.txt', 'w') as file:
    
    predictions = model.predict(test_dataset.X)
    for prediction in predictions:
        print(prediction[0], file = file)