In [1]:
import tensorflow as tf
import numpy as np
import h5py
import matplotlib.pyplot as plt

import models.graph_nn as grap

In [2]:
%load_ext autoreload
%autoreload 2

# data preprocessing

In [3]:
def normalized_adjacency(A):

    D = np.array(np.sum(A, axis=2), dtype=np.float32) # compute outdegree (= rowsum)
    D = np.nan_to_num(np.power(D,-0.5), posinf=0, neginf=0) # normalize (**-(1/2))
    D = np.asarray([np.diagflat(dd) for dd in D]) # and diagonalize
    
    return np.matmul(D, np.matmul(A, D))

In [4]:
def make_adjacencies(particles):
    real_p = particles[:,:,0] > 0
    adjacencies = (real_p[:,:,np.newaxis] * real_p[:,np.newaxis,:]).astype('int')
    return adjacencies

# load data

In [5]:
filename = '/home/kinga/dev/datasamples/L1_anomaly_challenge/background_training_1M.h5'
ff = h5py.File(filename, 'r')
particles = np.asarray(ff.get('Particles'))

In [6]:
particles.shape

(1000000, 19, 4)

In [7]:
nodes_n = particles.shape[1]
feat_sz = particles.shape[2]

In [8]:
# features: array([b'Pt', b'Eta', b'Phi', b'Class'], dtype='|S5')
particles_train = particles[:3000]

In [9]:
A = make_adjacencies(particles_train)
A_tilde = normalized_adjacency(A)

  after removing the cwd from sys.path.


# build model: Autoencoder

In [10]:
gnn = grap.GraphAutoencoder(nodes_n=nodes_n, feat_sz=feat_sz, activation=tf.nn.tanh)
gnn.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), run_eagerly=True)

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input_features (InputLa [(None, 19, 4)]      0                                            
__________________________________________________________________________________________________
encoder_input_adjacency (InputL [(None, 19, 19)]     0                                            
__________________________________________________________________________________________________
graph_convolution (GraphConvolu (None, 19, 3)        12          encoder_input_features[0][0]     
                                                                 encoder_input_adjacency[0][0]    
__________________________________________________________________________________________________
graph_convolution_1 (GraphConvo (None, 19, 2)        6           graph_convolution[0][0

# train model

In [11]:
callbacks = [tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5, verbose=1)]
gnn.fit((particles_train, A_tilde), A, epochs=100, batch_size=128, validation_split=0.25, callbacks=callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.0012499999720603228.
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 00023: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 00028: ReduceLROnPlateau reducing learning rate to 0.0003124999930150807.
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 00038: ReduceLROnPlateau reducing learning rate to 0.00015624999650754035.
Epoch 39/100
Epoch 40/100
Epoch 41/

Epoch 77/100
Epoch 78/100
Epoch 00078: ReduceLROnPlateau reducing learning rate to 6.103515488575795e-07.
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 00083: ReduceLROnPlateau reducing learning rate to 3.0517577442878974e-07.
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 00088: ReduceLROnPlateau reducing learning rate to 1.5258788721439487e-07.
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 00093: ReduceLROnPlateau reducing learning rate to 7.629394360719743e-08.
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 00098: ReduceLROnPlateau reducing learning rate to 3.814697180359872e-08.
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x7f0bfa620748>

# predict

In [12]:
particles_test = particles[2000:3000]
A_test = make_adjacencies(particles_test)
A_tilde_test = normalized_adjacency(A_test)

  after removing the cwd from sys.path.


In [15]:
z, A_pred_probs = gnn((particles_test, A_tilde_test))

In [16]:
A_pred = (A_pred_probs > 0.5).numpy().astype('int') 