# Test Model

In [13]:
'''
##=============================##
## Train Particle Flow Network ##
##=============================##

author: Russell Bate
russellbate@phas.ubc.ca
'''

## META-DATA ##
datapath_prefix = '/fast_scratch/atlas/'
module_path = '/home/russbate/MLPionCollaboration/LCStudies/util/'
herepath = '/home/russbate/MLPionCollaboration/LCStudies/regression/'
BATCH_SIZE=100
LEARNING_RATE=1e-3
EPOCHS=10
MODEL='PFN_wTNet'
GPU="7"
NEVENTS=int(2e4)

In [14]:
## General Python Imports
#======================================
import numpy as np
import pickle
import time as t
import sys
from time import perf_counter as cput
import argparse
from datetime import datetime
DATE = datetime.today().strftime('%Y-%m-%d')
print()

## Local ML Packages
#======================================
sys.path.append(module_path)
sys.path.append(herepath)
import deep_set_util as dsu
import pfn_models
from pfn_models import PFN_base, PFN_wDropout, PFN_wTNet




## TF environment

In [21]:
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras import backend as K
import os
os.environ['CUDA_VISIBLE_DEVICES'] = GPU
# os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

### Turn off eager

In [28]:
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

In [29]:
X = np.load(datapath_prefix+'X_STMC_full_norm.npy', mmap_mode='r+')[:NEVENTS,:,:]
Y = np.load(datapath_prefix+'Y_STMC_full_norm.npy', mmap_mode='r+')[:NEVENTS]

In [30]:
train_num, val_num, test_num = dsu.tvt_num(X, tvt=(70,15,15))

X_train = X[:train_num,:,:]
Y_train = Y[:train_num].reshape((train_num,1))

X_val = X[train_num:train_num+val_num,:,:]
Y_val = Y[train_num:train_num+val_num].reshape((val_num,1))

X_test = X[train_num+val_num:,:,:]
Y_test = Y[train_num+val_num:]

## Models

In [31]:
def point_mask_fn(X, mask_val=0.):
    return K.cast(K.any(K.not_equal(X, mask_val), axis=-1), K.dtype(X))

def tdist_block(x, mask, size: int, number: str):
    dense = layers.Dense(size)
    x = layers.TimeDistributed(dense, name='t_dist_'+number)(x, mask=mask)
    x = layers.Activation('relu', name='activation_'+number)(x)
    return x

def multiply(tensor1, tensor2):
    return K.dot(tensor1,tensor2)

def mat_mul(tensors):
    x, y = tensors
    return tf.linalg.matmul(x,y)

def cast_to_zero(tensors):
    ''' casts all cvalues that should be zero to zero in the modified tensor '''
    mod_input, input_tens = tensors
    full_mask = tf.logical_not(tf.math.equal(input_tens, 0.))
    reduced_mask = tf.experimental.numpy.any(full_mask, axis=-1)
    reduced_mask = tf.cast(reduced_mask, dtype=tf.float32)
    reduced_mask = tf.expand_dims(reduced_mask, axis=-1)
    return_tens = tf.math.multiply(mod_input, reduced_mask)
    return return_tens

In [32]:
def PFN_wTNet(num_points, num_features, name="PFN_wTNet"):
    
    inputs = keras.Input(shape=(num_points, num_features), name='input')

    #============== Masking for TNet =========================================#
    mask_tens = layers.Masking(mask_value=0.0, input_shape=(num_points,
                                num_features))(inputs)
    keras_mask = mask_tens._keras_mask
    #=========================================================================#

    #============== TNet =====================================================#
    block_0 = tdist_block(inputs, mask=keras_mask, size=50, number='0')
    block_1 = tdist_block(block_0, mask=keras_mask, size=100, number='1')
    block_2 = tdist_block(block_1, mask=keras_mask, size=100, number='2')
    
    block_2_masked = layers.Lambda(cast_to_zero, name='block_2_masked')(
        [block_2, inputs])
    
    max_pool = layers.MaxPool1D(pool_size=100, padding='valid',
                                name='tnet_0_MaxPool', strides=num_points)(
        block_2_masked)
    
    tnet_0_block_0 = layers.Dense(100, activation='relu',
                                  name='tnet_0_dense_0')(max_pool)
    
    tnet_0_block_1 = layers.Dense(50, activation='relu',
                                  name='tnet_0_dense_1')(tnet_0_block_0)
    
    vector_dense = layers.Dense(
        num_features**2,
        kernel_initializer='zeros',
        bias_initializer=keras.initializers.Constant(
            np.eye(num_features).flatten()),
        name='pre_matrix_0'
    )(tnet_0_block_1)
    
    mat_layer = layers.Reshape((num_features, num_features),
                               name='matrix_0')(vector_dense)

    mod_inputs = layers.Lambda(mat_mul, name='matrix_multiply_0')(
        [inputs, mat_layer])
    #=========================================================================#
    
    #============== T_Dist Phi Block =========================================#
    dense_0 = layers.Dense(100)
    t_dist_0 = layers.TimeDistributed(dense_0, name='t_dist_3')(mod_inputs)
    activation_0 = layers.Activation('relu', name="activation_3")(t_dist_0)
    
    dense_1 = layers.Dense(100)
    t_dist_1 = layers.TimeDistributed(dense_1, name='t_dist_4')(activation_0)
    activation_1 = layers.Activation('relu', name='activation_4')(t_dist_1)
    
    dense_2 = layers.Dense(128)
    t_dist_2 = layers.TimeDistributed(dense_2, name='t_dist_5')(activation_1)
    activation_2 = layers.Activation('relu', name='activation_5')(t_dist_2)
    #=========================================================================#
    
    #============== Aggregation Function (Summation) =========================#
    
    # This is important as it produces a layer tensor of 1s and 0s
    # to be dotted with the output of the activation
    lambda_layer = layers.Lambda(point_mask_fn,
                                name='mask')(inputs)

    sum_layer = layers.Dot(axes=(1,1), name='sum')(
        [lambda_layer, activation_2])
    #=========================================================================#

    #============== F Block ==================================================#
    dense_3 = layers.Dense(100, name='dense_6')(sum_layer)
    activation_3 = layers.Activation('relu', name="activation_6")(dense_3)
    
    dense_4 = layers.Dense(100, name='dense_7')(activation_3)
    activation_4 = layers.Activation('relu', name="activation_7")(dense_4)
    
    dense_5 = layers.Dense(100, name='dense_8')(activation_4)
    activation_5 = layers.Activation('relu', name="activation_8")(dense_5)
    
    dense_6 = layers.Dense(1, name='output')(activation_5)
    activation_6 = layers.Activation('linear', name="activation_9")(dense_6)
    #=========================================================================#
    
    return keras.Model(inputs=inputs, outputs=activation_6, name=name)

In [33]:
model = PFN_wTNet(num_points=X.shape[1], num_features=X.shape[2],
                     name=MODEL)
model.compile(loss='mse', optimizer=keras.optimizers.Adam(
    learning_rate=LEARNING_RATE))
model.summary()

Model: "PFN_wTNet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 1086, 5)]    0                                            
__________________________________________________________________________________________________
t_dist_0 (TimeDistributed)      (None, 1086, 50)     300         input[0][0]                      
__________________________________________________________________________________________________
activation_0 (Activation)       (None, 1086, 50)     0           t_dist_0[0][0]                   
__________________________________________________________________________________________________
t_dist_1 (TimeDistributed)      (None, 1086, 100)    5100        activation_0[0][0]               
__________________________________________________________________________________________

## Train Model

In [34]:
history = model.fit(X_train,
                  Y_train,
                  batch_size=BATCH_SIZE,
                  validation_data=(X_val, Y_val),
                  epochs=EPOCHS,
                  verbose=1
                  )

Train on 14000 samples, validate on 3000 samples
Epoch 1/10


2022-02-13 21:35:18.606120: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9672 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:b2:00.0, compute capability: 7.5
2022-02-13 21:35:24.999804: I tensorflow/stream_executor/cuda/cuda_dnn.cc:381] Loaded cuDNN version 8204






Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
