Code implementation references:

1) The paper "Beltrami Flow and Neural Diffusion on Graphs" by Chamberlain et al.

2) https://keras.io/examples/graph/gat_node_classification/

In [None]:
import tensorflow as tf
from tensorflow import keras
import keras.layers as layers
import numpy as np
import pandas as pd
import os
import sklearn
import warnings

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", 6)
pd.set_option("display.max_rows", 6)
np.random.seed(2)

In [None]:
# Download Cora dataset
zip_file = keras.utils.get_file(
    fname="cora.tgz",
    origin="https://linqs-data.soe.ucsc.edu/public/lbc/cora.tgz",
    extract=True,
)

data_dir = os.path.join(os.path.dirname(zip_file), "cora")

# Get citations and papers
citations = pd.read_csv(
    os.path.join(data_dir, "cora.cites"),
    sep="\t",
    header=None,
    names=["target", "source"],
)

papers = pd.read_csv(
    os.path.join(data_dir, "cora.content"),
    sep="\t",
    header=None,
    names=["paper_id"] + [f"term_{idx}" for idx in range(1433)] + ["subject"],
)

class_values = sorted(papers["subject"].unique())
class_idx = {name: id for id, name in enumerate(class_values)}
paper_idx = {name: idx for idx, name in enumerate(sorted(papers["paper_id"]))}

papers["paper_id"] = papers["paper_id"].apply(lambda name: paper_idx[name])
citations["source"] = citations["source"].apply(lambda name: paper_idx[name])
citations["target"] = citations["target"].apply(lambda name: paper_idx[name])
papers["subject"] = papers["subject"].apply(lambda value: class_idx[value])

print(citations)
print(papers)

In [None]:
# Obtain random indices
random_indices = np.random.permutation(range(papers.shape[0]))

# 50/50 split
train_data = papers.iloc[random_indices[: len(random_indices) // 2]]
test_data = papers.iloc[random_indices[len(random_indices) // 2 :]]

In [None]:
# Obtain paper indices which will be used to gather node states
# from the graph later on when training the model
train_indices = train_data["paper_id"].to_numpy()
test_indices = test_data["paper_id"].to_numpy()

# Obtain ground truth labels corresponding to each paper_id
train_labels = train_data["subject"].to_numpy()
test_labels = test_data["subject"].to_numpy()

# Define nodes, edges, features, and joint (nodes & features) tensors
nodes = tf.convert_to_tensor(papers.sort_values("paper_id").iloc[:,0:1])
edges = tf.convert_to_tensor(citations[["target", "source"]])
features = tf.convert_to_tensor(papers.sort_values("paper_id").iloc[:, 1:-1])
joint = tf.convert_to_tensor(np.hstack([nodes, features]))

# Print shapes of the graph
print("Nodes shape:", nodes.shape)
print("Edges shape:", edges.shape)
print("Node features shape:", features.shape)
print("Joint shape:", joint.shape)
print(nodes)
print(edges)
print(joint)

In [None]:
# Prepare data for diffusivity calculation
def data_prep(edges, features):
  # Get the features of each of the two nodes that share one edge
  feats_i = np.zeros((features.get_shape()[0], features.get_shape()[1]))
  feats_j = np.zeros((features.get_shape()[0], features.get_shape()[1]))
  feats_i_update = np.zeros((features.get_shape()[0], features.get_shape()[1]))
  feats_j_update = np.zeros((features.get_shape()[0], features.get_shape()[1]))
  edges_copy = edges.numpy()
  nodes_i = edges_copy[:, 0:1]
  nodes_j = edges_copy[:, 1:2]
  for node_i, node_j in zip(nodes_i, nodes_j):
    id_i = node_i[0]
    id_j = node_j[0]
    feat_i = features[id_i]
    feat_j = features[id_j]
    feats_i = np.vstack((feats_i, feat_i))
    feats_i_update = feats_i[3:, :]
    feats_j = np.vstack((feats_j, feat_j))
    nodes_i = tf.convert_to_tensor(nodes_i)
    nodes_j = tf.convert_to_tensor(nodes_j)
    feats_i = tf.convert_to_tensor(feats_i)
    feats_j = tf.convert_to_tensor(feats_j)
  feats_i = feats_i[3:, :]
  feats_j_update = feats_j[3:, :]
  feats_i_update = tf.convert_to_tensor(feats_i_update)
  feats_j_update = tf.convert_to_tensor(feats_j_update)
  return feats_i_update, feats_j_update

print(data_prep(edges, features))

In [None]:
joint = tf.cast(joint, dtype=tf.double)

# Calculate diffusivity, and propagate it over the graph
def forward_diffusion(edges, alpha=1):
  nodes_i = edges[:, 0:1]
  nodes_j = edges[:, 1:2]
  feats_i_update, feats_j_update = data_prep(edges, features)
  diff = tf.zeros([1, 1], dtype=tf.float64)

  for node_i, node_j, feat_i, feat_j in zip(nodes_i, nodes_j, feats_i_update, feats_j_update):
    node_i = tf.cast(node_i, dtype=tf.int32)
    node_j = tf.cast(node_j, dtype=tf.int32)
    one_node_feat = np.array([])
    for f_i, f_j in zip(feat_i, feat_j):
      f_i = tf.cast(f_i, dtype=tf.int32)
      f_j = tf.cast(f_j, dtype=tf.int32)
      grad = float((f_j - f_i) / (node_j - node_i))
      one_node_feat = np.append(one_node_feat, grad)
    one_node_feat = tf.convert_to_tensor(one_node_feat)
    one_node_feat = tf.reshape(one_node_feat, [1, feats_i_update.get_shape()[1]])
    diff_node = 1 + (alpha**2) * tf.linalg.matmul(one_node_feat, tf.transpose(one_node_feat))
    diff_node = 1 / tf.sqrt(diff_node)
    diff = tf.concat([diff, diff_node], 0)
  
  diff = np.delete(diff.numpy(), 0, 0)
  diff = tf.convert_to_tensor(tf.math.l2_normalize(diff, 0))
  return diff

In [None]:
# Apply diffusion to positions and features one time
class BeltramiLayer(layers.Layer):
  def __init__(self, units, kernel_initializer="glorot_uniform", kernel_regularizer=None, **kwargs):
    super().__init__(**kwargs)
    self.edges = edges
    self.units = units
    self.kernel_initializer = keras.initializers.get(kernel_initializer)
    self.kernel_regularizer = keras.regularizers.get(kernel_regularizer)
  
  def build(self, input_shape):
    self.kernel = self.add_weight(shape=(input_shape[0][-1], self.units), trainable=True, 
                                  initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, name="kerne")
    self.kernel_attention = self.add_weight(shape=(self.units*2, 1), trainable=True, 
                                            initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, name="kerne_attention")
    self.built = True
  
  def call(self, edges, joint):
    nodes_i = edges[:, 0:1]
    nodes_j = edges[:, 1:2]

    # Linearly transform nodes & features (joint tensor)
    joint_transformed = tf.matmul(joint, self_kernel)

    # Calculate and normalize diffusivity (equivalent of attention score)
    diff_t = forward_diffusion(edges)

    # Apply diffusivity to each node
    joint_transposed = tf.transpose(joint_transformed)
    joint_diffused = np.array([])
    for all_nodes_feat in joint_transposed:
      all_nodes_feat_after = np.array([])
      for sing_node_feat, diff_one_feat in zip(all_nodes_feat, diff_t):
        sing_node_feat_after = float(sing_node_feat) * float(diff_one_feat)
        all_nodes_feat_after = all_nodes_feat_after.append(sing_node_feat_after)
      joint_diffused = joint_diffused.append(joint_diffused, all_nodes_feat_after, axis=0)
    # Nodes match back to edges - change edges
    return joint_diffused

In [None]:
# Build BLEND network
class BLEND(keras.Model):
  def __init__(self, edges, joint, input_dim, output_dim, hidden_units, num_layers, **kwargs):
    super.__init__(**kwargs)
    self.edges = edges
    self.joint = joint
    self.input_dim = input_dim
    self.output_dim = output_dim
    self.preprocess = layers.Dense(hidden_units, activation="relu")
    self.attention_layers = [BeltramiLayer(hidden_units) for _ in range(num_layers)]
    self.output_layer = layers.Dense(output_dim)
  
  def call(self, joint):
    diff_joint = self.preprocess(joint)

    for attention_layer in self.attention_layers:
      diff_joint = attention_layer([diff_joint, edges]) + diff_joint
    
    output = self.output_layer(diff_joint)
    return output