In [1]:
# implement normalization

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.python.keras.callbacks import TensorBoard 
from keras import backend as K

Using TensorFlow backend.


In [2]:
class GCN(layers.Layer):
    """
    A graph convolution model
    """
    tf.reset_default_graph()
    def __init__(self, output_dim,  **kwargs): # comment out inputs=[x, y],
        self.output_dim = output_dim
        super(GCN, self).__init__(**kwargs)

    def build(self, input_shape):
        # print(input_shape[0][2])
        shape = tf.TensorShape((input_shape[0][2], self.output_dim))
        shape = [int(shape[0]),int(shape[1])] # [50 , 32]
        
        with K.name_scope('Parameters'):
            with K.name_scope('Weight_' + str(shape[1])):
                self.kernel = self.add_weight(name='conv_weight',
                                      shape=shape,
                                      initializer='glorot_uniform',
                                      trainable=True,dtype=tf.float32)
            with K.name_scope('Bias_'+str(shape[1])):
                self.bias = self.add_weight(name='conv_bias',
                                    shape=[shape[1]],
                                    initializer='glorot_uniform',
                                    trainable=True,dtype=tf.float32)

    def call(self, input):
        X, A = input[0], input[1]
        dim = self.kernel.get_shape()[1]
        num_atoms = A.get_shape()[1]
        with K.name_scope('Prop.Rule_'+ str(X.get_shape()[2])):
            _b = tf.reshape(tf.tile(self.bias, [num_atoms]), [num_atoms, dim])
            _X = tf.einsum('ijk,kl->ijl', X, self.kernel) + _b
        #_X = get_skip_connection(_X, X)
            return _X

class G2N(layers.Layer):
    """
    A layer to sum the node feature to preserve the permutation invariance
    """
    def __init__(self, output_dim,  **kwargs):
        self.output_dim = output_dim
        super(G2N, self).__init__(**kwargs)

    def build(self, input_shape):
        shape = tf.TensorShape((input_shape[2], self.output_dim))
        shape = [int(shape[0]),int(shape[1])] # [50 , 32]
        
        with K.name_scope('Parameters'):
            with K.name_scope('Weight'+str(shape[1])):
                self.kernel = self.add_weight(name='permu_weight',
                                          shape=shape,
                                          initializer='glorot_uniform',
                                          trainable=True,dtype=tf.float32)

    def call(self, X):
        #print(X.get_shape())
        #print(self.kernel.get_shape())
        with K.name_scope('ATOMWISE'):
            Z = tf.einsum('ijk,kl->ijl', X, self.kernel)
            Z = tf.nn.relu(Z)
            Z = tf.nn.sigmoid(tf.reduce_sum(Z, 1))
            return Z

class mymodel(tf.keras.Model):
    """
    A generallized model for the prediction of properties from graph
    """
    def __init__(self):#, inputs=[x, y]):
        
        super(mymodel, self).__init__(name='')
        self.gcn1 = GCN(32) #, input_shape=[58]
        #self.gcn2 = GCN(32) #, input_shape=[58])
        self.gcn3 = GCN(32) #, input_shape=[58])
        self.g2n = G2N(64)#, input_shape=[58])
        self.dense1 = layers.Dense(64, activation=tf.nn.relu, input_shape=[64])
        self.dense2 = layers.Dense(64, activation=tf.nn.relu, input_shape=[64])
        self.dense3 = layers.Dense(1)

    def call(self, input):
        X, A = input[0], input[1]
        print(X,A)
        with K.name_scope('graph_conv-1.{}'.format(X.get_shape()[2])):
            x = tf.nn.relu(self.gcn1([X, A]))
        with K.name_scope('graph_conv-2.{}'.format(x.get_shape()[2])):
            #x = tf.nn.relu(self.gcn2(x))
            x = tf.nn.relu(self.gcn3([x, A]))
        with K.name_scope('Perm-Invariance'):
            x = tf.nn.relu(self.g2n(x))
        with K.name_scope('Latent_Space-64'):
            x = self.dense1(x)
        # molecule_embedding1 = tf.get_variable('embedding 1',[32, 64])
        # embedded_molecules = [tf.nn.embedding_lookup(molecule_embedding1, x)]
        with K.name_scope('Latent_Space-64'):
            x = self.dense2(x)
        # molecule_embedding2 = tf.get_variable('embedding 2',[64, 64])
        # embedded_molecules.append(tf.nn.embedded_molecules(molecule_embedding2,x))
        with K.name_scope('Output'):
            x = self.dense3(x)
        # molecule_embedding3 = tf.get_variable('embedding1',[64, 1])
        # embedded_molecules = tf.nn.embedding_lookup(molecule_embedding3,[100])
        
        return x    

def load_data(path="/Users/b_eebs/tf-keras/data/", ids=10000): 
    """
    load the source data from somewhere
    """
    features = np.load(path+'fea.npy')[:ids] 
    adj = np.load(path+'adj.npy')[:ids]
    prop = np.load(path+'prop.npy')[:ids]
    #features = np.reshape(features, [ids, features.shape[1]*features.shape[2]])
    return adj, features, prop

class Progress(keras.callbacks.Callback):
    """
    A simple function to show the progress
    """
    def on_epoch_end(self, epoch, logs):
        if epoch % 10 == 0:
             print('epoch-----', epoch, logs)



## Instead of writing script to plot specified values, we can use tensorboard to plot several parameters with an addition of a couple of lines.

## We can also plot the bias and weights along with their gradients.

## Embeddings are still in progress

In [None]:
## to do an embedding, nice to include embedding data, still looking into this.
# - load in a metadata set from another .npy file

## normally you have to restart the whole notebook before running, or find a way to dump all the embeddings. 
## K.clear_session() doesn't always work. Furthermore, if you want to re-reun the EXACT SAME training process, 
## delete your previous log file first. The results won't overwrite automatically.

adj, features, props = load_data() # before running you may have to change load_data path
print('shape of feature: ', np.shape(features))
model = mymodel()
lr = 0.001

"""

"""

tensorboard = TensorBoard(log_dir="./summaries/ADAM-0.001",histogram_freq=10,batch_size=100, 
                          write_graph=True, write_grads=True, update_freq='batch')

optimizer = tf.keras.optimizers.Adam(lr)

model.compile(loss='mean_squared_error',
              optimizer=optimizer,
              metrics=['mean_absolute_error', 'mean_squared_error'])

input = {'input_1': features, 'input_2': adj}

history = model.fit(x=[features, adj], y=props, batch_size=100, 
                    epochs=1000, validation_split=0.1, verbose=0,
                    callbacks=[tensorboard])
model.summary()

K.clear_session()



In [None]:
# -----------------------------------to test other optimizers--------------------------------------
## restart the whole kernel before running this cell

adj, features, props = load_data() # before running you may have to change load_data path

model = mymodel()
lr = 0.001

tensorboard = TensorBoard(log_dir="./summaries/ADAMMAX-0.001",histogram_freq=10,batch_size=100, 
                          write_graph=True, write_grads=True, update_freq='batch')

optimizer = tf.keras.optimizers.Adamax(lr)
model.compile(loss='mean_squared_error',
              optimizer=optimizer,
              metrics=['mean_absolute_error', 'mean_squared_error'])
input = {'input_1': features, 'input_2': adj}
history = model.fit(x=[features, adj], y=props, batch_size=100, 
                    epochs=1000, validation_split=0.1, verbose=0,
                    callbacks=[tensorboard])



model.summary()
K.clear_session()

In [None]:
# restart kernel

adj, features, props = load_data() # before running you may have to change load_data path

model = mymodel()
lr = 0.001

tensorboard = TensorBoard(log_dir="./summaries/SGD-0.001",histogram_freq=10,batch_size=100, 
                          write_graph=True, write_grads=True, update_freq='batch')
optimizer = tf.keras.optimizers.Adam(lr)
model.compile(loss='mean_squared_error',
              optimizer=optimizer,
              metrics=['mean_absolute_error', 'mean_squared_error'])
input = {'input_1': features, 'input_2': adj}
history = model.fit(x=[features, adj], y=props, batch_size=100, 
                    epochs=1000, validation_split=0.1, verbose=0,
                    callbacks=[tensorboard])

In [4]:
## restart kernel

adj, features, props = load_data() # before running you may have to change load_data path

model = mymodel()
lr = 0.001

tensorboard = TensorBoard(log_dir="./summaries/RMS-0.001",histogram_freq=10,batch_size=100, 
                          write_graph=True, write_grads=True, update_freq='batch')
optimizer = tf.keras.optimizers.RMSprop(lr)
model.compile(loss='mean_squared_error',
              optimizer=optimizer,
              metrics=['mean_absolute_error', 'mean_squared_error'])
input = {'input_1': features, 'input_2': adj}
history = model.fit(x=[features, adj], y=props, batch_size=100, 
                    epochs=1000, validation_split=0.1, verbose=0,
                    callbacks=[tensorboard])
model.summary()
K.clear_session()

Instructions for updating:
Colocations handled automatically by placer.
Tensor("input_1:0", shape=(?, 50, 58), dtype=float32) Tensor("input_2:0", shape=(?, 50, 50), dtype=float32)
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gcn_2 (GCN)                  multiple                  1888      
_________________________________________________________________
gcn_3 (GCN)                  multiple                  1056      
_________________________________________________________________
g2n_1 (G2N)                  multiple                  2048      
_________________________________________________________________
dense_3 (Dense)              multiple                  4160      
_________________________________________________________________
dense_4 (Dense)              multiple                  4160     

# To do:
### - interpret plots
### - embedding
### - augmented gcn tensorboard
### - look at geometric torch
### - look at our pytorch implementation
### - undergrad research conference abstract 
### - look into runtime metadata!

https://jhui.github.io/2017/03/12/TensorBoard-visualize-your-learning/