# MPNN

### Input preparation
- Search bonds : [see this](search_bonds.ipynb)
- Search angles : [see this](search_angles.ipynb)
- Generate input array : [see this](gen_input_graph.ipynb)
    
### Prepared files:
- Train
    - Nodes features
    - Edges features input
    - Edges output
- Test
    - Nodes features
    - Edges features input

In [1]:
%autosave 60

Autosaving every 60 seconds


In [2]:
use_multiGPU = False

In [3]:
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "0" # turn on/off GPU run
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # disable debugging logs

import numpy as np
import pandas as pd
import tensorflow as tf
import networkx
import ase
import ase.visualize

from tqdm import tqdm
from sklearn.utils import shuffle

print("Import done!")

Import done!


In [4]:
tf.test.is_gpu_available(
    cuda_only=False, min_cuda_compute_capability=None
)

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


2021-07-16 10:55:17.076979: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set


False

2021-07-16 10:55:17.092241: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-07-16 10:55:17.433709: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2021-07-16 10:55:17.433797: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (jwlogin23.juwels): /proc/driver/nvidia/version does not exist


In [5]:
# see a list of GPU devices
devices = tf.config.experimental.list_physical_devices("GPU")
devices

2021-07-16 10:55:17.489464: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set


[]

In [6]:
strategy = tf.distribute.MirroredStrategy(devices=['GPU:0', 'GPU:1', 'GPU:2', 'GPU:3'])

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


2021-07-16 10:55:17.502964: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set


In [4]:
# dir_dataset = "test/j-coupling-dataset/"
dir_dataset = "../from-kaggle/"
nodes_train = np.load(dir_dataset + "nodes_train.npz")['arr_0'][:1000]
in_edges_train = np.load(dir_dataset + "in_edges_train.npz")['arr_0'][:1000]
# out_edges_train = np.load(dir_dataset + "out_edges_train.npz")['arr_0'][:1000]

nodes_test = np.load(dir_dataset + "nodes_test.npz")['arr_0'][:500]
in_edges_test = np.load(dir_dataset + "in_edges_test.npz")['arr_0'][:500]

In [5]:
print(nodes_train.shape)
print(in_edges_train.shape)
# print(out_edges_train.shape)
print(nodes_test.shape)
print(in_edges_test.shape)

(1000, 29, 5)
(1000, 29, 29, 16)
(500, 29, 5)
(500, 29, 29, 16)


In [9]:
out_labels = np.random.randn(1000, 1)

In [7]:
out_labels.shape

(1000, 1)

In [11]:
# out_labels = out_edges_train.reshape(-1, out_edges_train.shape[1]*out_edges_train.shape[2], 1)
in_edges_train = in_edges_train.reshape(-1, in_edges_train.shape[1]*in_edges_train.shape[2], in_edges_train.shape[3])
in_edges_test = in_edges_test.reshape(-1, in_edges_test.shape[1]*in_edges_test.shape[2], in_edges_test.shape[3])

In [12]:
nodes_train, in_edges_train, out_labels = shuffle(nodes_train, in_edges_train, out_labels)

## Message Passing Neural Network

Implement according to Gilmer et al. https://arxiv.org/abs/1704.01212

### Build message parser

Define message functions $M_{t}$ in $m^{t+1}_{v} =\sum_{w \in N(v)} M_{t}(h^{t}_{v}, h^{t}_{w}, e_{v,w})$

$
\begin{align}
M_{t}(h^{t}_{v}, h^{t}_{w}, e_{v,w}) &= NN(e_{v w}) h^{t}_{w} \\
&= A_{e_{v w}} h^{t}_{w}
\end{align}
$

In [83]:
class Message_Passer(tf.keras.layers.Layer):
    def __init__(self, node_dim):
        super(Message_Passer, self).__init__()
        self.node_dim = node_dim
        self.nn = tf.keras.layers.Dense(units=self.node_dim*self.node_dim, activation=tf.nn.relu)
      
    def call(self, node_j, edge_ij):
        # Embed the edge as a matrix
        A = self.nn(edge_ij)
        
        # Reshape so matrix mult can be done
        A = tf.reshape(A, [-1, self.node_dim, self.node_dim])
        node_j = tf.reshape(node_j, [-1, self.node_dim, 1])
        
        # Multiply edge matrix by node and shape into message list
        messages = tf.linalg.matmul(A, node_j)
        messages = tf.reshape(messages, [-1, tf.shape(edge_ij)[1], self.node_dim])

        return messages

### Build aggregator

Aggregate all $M_{t}$ to update message being sent to node $v$ at state $t+1$

In [84]:
class Message_Agg(tf.keras.layers.Layer):
    def __init__(self):
        super(Message_Agg, self).__init__()
    
    def call(self, messages):
        return tf.math.reduce_sum(messages, 2)

### Build update function - GRU

$
h^{t+1}_{v} = U_{t}(h^{t}_{v}, m^{t+1}_{v}) \\
U_{t}(h^{t}_{v}, m^{t+1}_{v}) = GRU(h^{t}_{v}, m^{t+1}_{v})
$

In [85]:
class Update_Func_GRU(tf.keras.layers.Layer):
    def __init__(self, state_dim):
        super(Update_Func_GRU, self).__init__()
        self.concat_layer = tf.keras.layers.Concatenate(axis=1)
        self.GRU = tf.keras.layers.GRU(state_dim)
        
    def call(self, old_state, agg_messages):
        # Remember node dim
        n_nodes  = tf.shape(old_state)[1]
        node_dim = tf.shape(old_state)[2]
        
        # Reshape so GRU can be applied, concat so old_state and messages are in sequence
        old_state = tf.reshape(old_state, [-1, 1, tf.shape(old_state)[-1]])
        agg_messages = tf.reshape(agg_messages, [-1, 1, tf.shape(agg_messages)[-1]])
        concat = self.concat_layer([old_state, agg_messages])
        
        # Apply GRU and then reshape so it can be returned
        activation = self.GRU(concat)
        new_state = tf.reshape(activation, [-1, n_nodes, node_dim])
        
        return new_state

### Output layer
Define readout function ($R$) as a neural network:

$\hat{y} = R(\{h^{T}_{v} | v \in G \})$

In [108]:
class Node_Regressor(tf.keras.layers.Layer):
    def __init__(self, intermediate_dim):
        super(Node_Regressor, self).__init__()
        self.concat_layer = tf.keras.layers.Concatenate()
        self.hidden_layer_1 = tf.keras.layers.Dense(units=intermediate_dim, activation=tf.nn.relu)
        self.hidden_layer_2 = tf.keras.layers.Dense(units=intermediate_dim, activation=tf.nn.relu)
        self.output_layer = tf.keras.layers.Dense(units=1, activation=None)
        
    def call(self, nodes):
        n_nodes = tf.shape(nodes)[1]
        node_dim = tf.shape(nodes)[2]
        
        # Tile and reshape to match edges
        state_i = tf.tile(nodes, [1, 1, n_nodes])
        state_i = tf.reshape(state_i, [-1, n_nodes*n_nodes, node_dim])
        state_j = tf.tile(nodes, [1, n_nodes, 1])
        
        concat = self.concat_layer([state_i, state_j])
        act_1 = self.hidden_layer_1(concat)
        act_2 = self.hidden_layer_2(act_1)
        
        return self.output_layer(act_2)

In [113]:
class Edge_Regressor(tf.keras.layers.Layer):
    def __init__(self, intermediate_dim):
        super(Edge_Regressor, self).__init__()
        self.concat_layer = tf.keras.layers.Concatenate()
        self.hidden_layer_1 = tf.keras.layers.Dense(units=intermediate_dim, activation=tf.nn.relu)
        self.hidden_layer_2 = tf.keras.layers.Dense(units=intermediate_dim, activation=tf.nn.relu)
        self.output_layer = tf.keras.layers.Dense(units=1, activation=None)

    def call(self, nodes, edges):
        print(nodes.shape)
        n_nodes = tf.shape(nodes)[1]
        node_dim = tf.shape(nodes)[2]
        
        # Tile and reshape to match edges
        state_i = tf.tile(nodes, [1, 1, n_nodes])
        state_i = tf.reshape(state_i, [-1, n_nodes*n_nodes, node_dim])
        state_j = tf.tile(nodes, [1, n_nodes, 1])
        
        # concat nodes at state i and j, and edges, and then apply MLP
        print(state_i.shape)
        print(edges.shape)
        print(state_j.shape)
        concat = self.concat_layer([state_i, edges, state_j])
        print(concat.shape)
        act_1 = self.hidden_layer_1(concat)
        print(act_1.shape)
        act_2 = self.hidden_layer_2(act_1)
        print(act_2.shape)

        return self.output_layer(act_2)

In [114]:
nodes_train[:10].shape

(10, 29, 5)

In [115]:
ok = Edge_Regressor(512)
ok.call(nodes_train[:10], in_edges_train[:10])

(10, 29, 5)
(10, 29, 145)
(10, 841, 5)
(10, 841, 16)
(10, 841, 5)
(10, 841, 26)
(10, 841, 512)
(10, 841, 512)


<tf.Tensor: shape=(10, 841, 1), dtype=float32, numpy=
array([[[-0.04952043],
        [-0.02957275],
        [-0.02791196],
        ...,
        [ 0.        ],
        [ 0.        ],
        [ 0.        ]],

       [[ 0.02576611],
        [ 0.01177338],
        [ 0.05215303],
        ...,
        [ 0.        ],
        [ 0.        ],
        [ 0.        ]],

       [[ 0.02598085],
        [-0.04252946],
        [ 0.01289575],
        ...,
        [ 0.        ],
        [ 0.        ],
        [ 0.        ]],

       ...,

       [[ 0.02598085],
        [-0.0425198 ],
        [ 0.01286001],
        ...,
        [ 0.        ],
        [ 0.        ],
        [ 0.        ]],

       [[ 0.02598085],
        [-0.04249534],
        [-0.00568492],
        ...,
        [ 0.        ],
        [ 0.        ],
        [ 0.        ]],

       [[ 0.02598085],
        [-0.04252305],
        [ 0.01288661],
        ...,
        [ 0.        ],
        [ 0.        ],
        [ 0.        ]]], dtype=float32)>

### Build Single Message Passing Layer

In [90]:
class MP_Layer(tf.keras.layers.Layer):
    def __init__(self, state_dim):
        super(MP_Layer, self).__init__(self)
        self.message_passer = Message_Passer(node_dim=state_dim) 
        self.message_aggs = Message_Agg()
        self.update_functions = Update_Func_GRU(state_dim=state_dim)
        self.state_dim = state_dim

    def call(self, nodes, edges, mask):
        n_nodes = tf.shape(nodes)[1]
        node_dim = tf.shape(nodes)[2]

        state_j = tf.tile(nodes, [1, n_nodes, 1])

        messages = self.message_passer(state_j, edges)

        # Do this to ignore messages from non-existant nodes
        masked = tf.math.multiply(messages, mask)
        masked = tf.reshape(masked, [tf.shape(messages)[0], n_nodes, n_nodes, node_dim])
        agg_m = self.message_aggs(masked)
        
        updated_nodes = self.update_functions(nodes, agg_m)
        
        nodes_out = updated_nodes
        # Batch norm seems not to work. 
        # nodes_out = self.batch_norm(updated_nodes)
        
        return nodes_out

### Formulate MPNN

In [99]:
nod_input = tf.keras.Input(shape=(None,), name='nod_input')
adj_input = tf.keras.Input(shape=(None,), name='adj_input')

class MPNN(tf.keras.Model):
    def __init__(self, out_int_dim, state_dim, T):
        super(MPNN, self).__init__(self)   
        self.T = T
        self.embed = tf.keras.layers.Dense(units=state_dim, activation=tf.nn.relu)
        self.MP = MP_Layer(state_dim)
        self.edge_regressor  = Edge_Regressor(out_int_dim)
        #self.batch_norm = tf.keras.layers.BatchNormalization()

    def call(self, inputs=[nod_input, adj_input]):
        nodes = inputs[0]
        edges = inputs[1]
        # Get distances, and create mask wherever 0 (i.e. non-existant nodes)
        # This also masks node self-interactions...
        # This assumes distance is last
        len_edges = tf.shape(edges)[-1]

        _, x = tf.split(edges, [len_edges - 1, 1], 2)
        mask = tf.where(tf.equal(x, 0), x, tf.ones_like(x))

        # Embed node to be of the chosen node dimension
        nodes = self.embed(nodes)
        
        # nodes = self.batch_norm(nodes)
        # Run the T message passing steps
        for mp in tqdm(range(self.T)):
            nodes = self.MP(nodes, edges, mask)
            
        # Regress the output values
        con_edges = self.edge_regressor(nodes, edges)
        
        return con_edges

### Define metrics (loss)

Supported now:
- MSE
- Log MSE

In [100]:
def mse(orig , preds):
    # Mask values for which no scalar coupling exists
    mask  = tf.where(tf.equal(orig, 0), orig, tf.ones_like(orig))

    nums  = tf.boolean_mask(orig,  mask)
    preds = tf.boolean_mask(preds,  mask)

    reconstruction_error = tf.reduce_mean(tf.square(tf.subtract(nums, preds)))

    return reconstruction_error

In [101]:
def log_mse(orig , preds):
    # Mask values for which no scalar coupling exists
    mask  = tf.where(tf.equal(orig, 0), orig, tf.ones_like(orig))

    nums  = tf.boolean_mask(orig,  mask)
    preds = tf.boolean_mask(preds,  mask)

    reconstruction_error = tf.math.log(tf.reduce_mean(tf.square(tf.subtract(nums, preds))))

    return reconstruction_error

### Define callback

In [102]:
learning_rate = 0.001
def step_decay(epoch):
    initial_lrate = learning_rate
    drop = 0.1
    epochs_drop = 20.0
    lrate = initial_lrate * np.power(drop, np.floor((epoch)/epochs_drop))
    tf.print("Learning rate: ", lrate)
    return lrate

lrate = tf.keras.callbacks.LearningRateScheduler(step_decay)
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience = 15, restore_best_weights=True)

# lrate  =  tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=0.00001, verbose = 1)

### Define optimizer
For training model on multi-gpu, optimizer needs to be defined under the scope as well.

In [103]:
if not use_multiGPU:
    opt = tf.optimizers.Adam(learning_rate=learning_rate)
else:
    with strategy.scope():
        opt = tf.optimizers.Adam(learning_rate=learning_rate)

### Construct a model and compile

In [104]:
if not use_multiGPU:
    mpnn = MPNN(out_int_dim=512, state_dim=128, T=4)
    mpnn.compile(opt, mse, metrics = [mse, log_mse])
else:
    with strategy.scope():
        mpnn = MPNN(out_int_dim=512, state_dim=128, T=4)
        mpnn.compile(opt, mse, metrics=[mse, log_mse])

In [105]:
# test call
mpnn.call([nodes_train[:10], in_edges_train[:10]])

100%|██████████| 4/4 [00:00<00:00, 16.47it/s]

(10, 29, 128)
(10, 841, 128)
(10, 841, 16)
(10, 841, 128)
(10, 841, 272)
(10, 841, 512)
(10, 841, 512)
(10, 841, 1)
ok





<tf.Tensor: shape=(10, 841, 1), dtype=float32, numpy=
array([[[ 1.8868537e-04],
        [ 2.4864132e-02],
        [ 5.6891907e-03],
        ...,
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.0000000e+00]],

       [[ 4.6886897e-05],
        [ 2.4823228e-02],
        [ 6.0076960e-03],
        ...,
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.0000000e+00]],

       [[ 6.7952863e-04],
        [ 3.6223266e-02],
        [ 5.0405585e-03],
        ...,
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.0000000e+00]],

       ...,

       [[-9.5222786e-06],
        [ 3.5439461e-02],
        [ 4.3027271e-03],
        ...,
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.0000000e+00]],

       [[ 5.0220609e-04],
        [ 3.5291258e-02],
        [ 5.3370581e-03],
        ...,
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.0000000e+00]],

       [[ 7.2502252e-04],
        [ 3.5441000e-02],
        [ 6.3309888e-03],


### Start training

In [27]:
train_size = int(len(out_labels)*0.8)
batch_size = 16
epochs = 2

In [28]:
train_data_x = tf.data.Dataset.from_tensor_slices((nodes_train[:train_size], in_edges_train[:train_size]))
train_data_y = tf.data.Dataset.from_tensor_slices(out_labels[:train_size])

train_data = tf.data.Dataset.zip((train_data_x, train_data_y)).batch(batch_size)

valid_data_x = tf.data.Dataset.from_tensor_slices((nodes_train[train_size:], in_edges_train[train_size:]))
valid_data_y = tf.data.Dataset.from_tensor_slices(out_labels[train_size:])

valid_data = tf.data.Dataset.zip((valid_data_x, valid_data_y)).batch(batch_size)

# Disable AutoShard.
options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
train_data = train_data.with_options(options)
valid_data= valid_data.with_options(options)

In [71]:
mpnn.fit(train_data, 
         batch_size = batch_size, 
         epochs = epochs, 
         callbacks = [lrate, stop_early], 
         use_multiprocessing = True, 
         initial_epoch = 0, 
         verbose = 1, 
         validation_data = valid_data)

Epoch 1/2
Learning rate:  0.001


100%|██████████| 4/4 [00:00<00:00,  6.13it/s]


ValueError: in user code:

    /p/home/jusers/ketkaew1/juwels/jupyter/kernels/py38-tf/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /tmp/ipykernel_24066/1473428871.py:6 mse  *
        preds = tf.boolean_mask(preds,  mask)
    /p/home/jusers/ketkaew1/juwels/jupyter/kernels/py38-tf/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper  **
        return target(*args, **kwargs)
    /p/home/jusers/ketkaew1/juwels/jupyter/kernels/py38-tf/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:1831 boolean_mask_v2
        return boolean_mask(tensor, mask, name, axis)
    /p/home/jusers/ketkaew1/juwels/jupyter/kernels/py38-tf/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /p/home/jusers/ketkaew1/juwels/jupyter/kernels/py38-tf/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:1751 boolean_mask
        shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask)
    /p/home/jusers/ketkaew1/juwels/jupyter/kernels/py38-tf/lib/python3.8/site-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 841) and (None, 1) are incompatible


In [67]:
# mpnn.fit([nodes_train[:train_size], in_edges_train[:train_size]], 
#          y = out_labels[:train_size], 
#          batch_size = batch_size, 
#          epochs = epochs, 
#          callbacks = [lrate, stop_early], 
#          use_multiprocessing = True, 
#          initial_epoch = 0, 
#          verbose = 1, 
#          validation_data = ([nodes_train[train_size:], in_edges_train[train_size:]],
#                             out_labels[train_size:]))

Epoch 1/2
Learning rate:  0.001


2021-07-16 00:48:56.339532: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-07-16 00:48:56.353625: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2799890000 Hz
100%|██████████| 4/4 [00:00<00:00,  6.27it/s]


ValueError: in user code:

    /p/home/jusers/ketkaew1/juwels/jupyter/kernels/py38-tf/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /tmp/ipykernel_24066/1473428871.py:6 mse  *
        preds = tf.boolean_mask(preds,  mask)
    /p/home/jusers/ketkaew1/juwels/jupyter/kernels/py38-tf/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper  **
        return target(*args, **kwargs)
    /p/home/jusers/ketkaew1/juwels/jupyter/kernels/py38-tf/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:1831 boolean_mask_v2
        return boolean_mask(tensor, mask, name, axis)
    /p/home/jusers/ketkaew1/juwels/jupyter/kernels/py38-tf/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /p/home/jusers/ketkaew1/juwels/jupyter/kernels/py38-tf/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:1751 boolean_mask
        shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask)
    /p/home/jusers/ketkaew1/juwels/jupyter/kernels/py38-tf/lib/python3.8/site-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (16, 841) and (16, 1) are incompatible


### Prediction

In [68]:
preds = mpnn.predict([nodes_test, in_edges_test], use_multiprocessing=True, verbose=1)
np.save("preds_kernel.npy" , preds)

100%|██████████| 4/4 [00:00<00:00,  6.37it/s]




In [69]:
preds.shape

(500, 841, 1)