In [1]:
import numpy as np
import tensorflow as tf
import spektral

In [2]:
dataset = spektral.datasets.Citation("cora")

  self._set_arrayXarray(i, j, x)


In [3]:
adj_matrix = dataset[0].a.todense() + np.eye(dataset[0].n_nodes) # add self loop
adj_matrix = adj_matrix.astype('float32') # 2708 * 2708 # citation relationships
node_features = dataset[0].x # 2708 * 1433, 1433 terms noted for each paper
node_labels = dataset[0].y # 2708 * 7, 7 classes for node classification
train_mask = dataset.mask_tr
val_mask = dataset.mask_va
test_mask = dataset.mask_te

In [5]:
def softmax_cross_entropy(logits, labels, mask):
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    loss *= mask
    return tf.reduce_mean(loss)

In [6]:
def accuracy(logits, labels, mask):
    # equivalent expression: np.sum(accuracy_all*mask)/np.sum(mask)
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask) # adjust mask proportion to sum up to 2708
    accuracy_all *= mask # apply mask to accuracy
    return tf.reduce_mean(accuracy_all) # eq. sum of accuracy / 2708

In [7]:
def gnn(node_features, adj_matrix, transform, activation):
    # weight first, then aggregate neighbor features
    #weighted_features = transform(node_features) # apply weights to node features
    #aggregated_features = tf.matmul(adj_matrix, weighted_features) # aggregated neighbor node features
    #return activation(aggregated_features) # apply activation function
    # aggregate neighbor features first, then weight
    aggregated_features = tf.matmul(adj_matrix, node_features) # aggregated neighbor node features
    weighted_features = transform(aggregated_features) # apply weights to node features
    return activation(weighted_features) # apply activation function

In [8]:
def train_cora(node_features, adj_matrix, gnn, hidden_units, epochs, learning_rate):
    layer_1 = tf.keras.layers.Dense(hidden_units) # hidden layer: apply weights
    layer_2 = tf.keras.layers.Dense(7) # 7 classes for nodes
    
    def cora_gnn(node_features, adj_matrix):
        hidden = gnn(node_features, adj_matrix, layer_1, tf.nn.relu)
        logits = gnn(hidden, adj_matrix, layer_2, tf.identity)
        return logits
    
    optimizer = tf.keras.optimizers.Adam(learning_rate) # use Adam optimizer
    
    best_accuracy = 0.0
    for ep in range(epochs + 1):
        with tf.GradientTape() as t:
            logits = cora_gnn(node_features, adj_matrix)
            loss = softmax_cross_entropy(logits, node_labels, train_mask)
            variables = t.watched_variables()
            grads = t.gradient(loss, variables)
            optimizer.apply_gradients(zip(grads, variables)) # apply gradients to variables
            val_accuracy = accuracy(logits, node_labels, val_mask)
            test_accuracy = accuracy(logits, node_labels, test_mask)
            if val_accuracy > best_accuracy:
                best_accuracy = val_accuracy
                print('Epoch', ep, '| Training loss:', loss.numpy(), '| Val accuracy:'
                      , val_accuracy.numpy(), '| Test accuracy', test_accuracy.numpy())

In [9]:
# GNN: A H W
train_cora(node_features, adj_matrix, gnn, 100, 200, 0.01)

Epoch 0 | Training loss: 4.304349 | Val accuracy: 0.11999998 | Test accuracy 0.118
Epoch 1 | Training loss: 12.151947 | Val accuracy: 0.38999996 | Test accuracy 0.399
Epoch 2 | Training loss: 18.13162 | Val accuracy: 0.40199998 | Test accuracy 0.417
Epoch 3 | Training loss: 9.70142 | Val accuracy: 0.52 | Test accuracy 0.546
Epoch 4 | Training loss: 5.537958 | Val accuracy: 0.648 | Test accuracy 0.63600004
Epoch 8 | Training loss: 1.39016 | Val accuracy: 0.65 | Test accuracy 0.662
Epoch 9 | Training loss: 1.1165088 | Val accuracy: 0.678 | Test accuracy 0.674
Epoch 16 | Training loss: 0.2278719 | Val accuracy: 0.686 | Test accuracy 0.70600003
Epoch 17 | Training loss: 0.16955851 | Val accuracy: 0.714 | Test accuracy 0.735
Epoch 18 | Training loss: 0.10948553 | Val accuracy: 0.726 | Test accuracy 0.754
Epoch 19 | Training loss: 0.06772305 | Val accuracy: 0.728 | Test accuracy 0.7569999
Epoch 20 | Training loss: 0.063029215 | Val accuracy: 0.732 | Test accuracy 0.76699996
Epoch 21 | Traini

In [10]:
# GCN: D^-0.5 A D^-0.5 H W
deg_matrix = tf.reduce_sum(adj_matrix, axis=-1)
norm_deg_matrix = tf.linalg.diag(1.0 / tf.sqrt(deg_matrix))
norm_adj_matrix = tf.matmul(norm_deg_matrix, tf.matmul(adj_matrix, norm_deg_matrix))
train_cora(node_features, norm_adj_matrix, gnn, 100, 200, 0.01)

Epoch 0 | Training loss: 1.9443457 | Val accuracy: 0.222 | Test accuracy 0.20700002
Epoch 1 | Training loss: 1.5881351 | Val accuracy: 0.69 | Test accuracy 0.7289999
Epoch 2 | Training loss: 1.1884322 | Val accuracy: 0.72599995 | Test accuracy 0.75899994
Epoch 3 | Training loss: 0.8102507 | Val accuracy: 0.76799995 | Test accuracy 0.774
Epoch 4 | Training loss: 0.52205074 | Val accuracy: 0.784 | Test accuracy 0.8039998
Epoch 5 | Training loss: 0.3270284 | Val accuracy: 0.786 | Test accuracy 0.8109998


# Below is a customized GCN Model capsulating the learning process above. However, this model was buggy so I just leave it here to show the concept. A working version of the model was implemented in the notebook named gcn_model

In [48]:
class GraphConvLayer(tf.keras.layers.Layer):
    def __init__(
        self,
        hidden_units,
        *args,
        **kwargs,
    ):
        super(GraphConvLayer, self).__init__(*args, **kwargs)

        self.update_fn = tf.keras.layers.Dense(hidden_units)
        self.hidden_units = hidden_units

    def aggregate(self, node_features, adj_matrix):
        # aggregated neighbor node features
        aggregated_features = tf.matmul(adj_matrix, node_features)
        
        return aggregated_features

    def call(self, inputs):
        """Process the inputs to produce the node_embeddings.

        inputs: a tuple of three elements: node_repesentations, edges, edge_weights.
        Returns: node_embeddings of shape [num_nodes, representation_dim].
        """
        print("num:", self.hidden_units)
        node_repesentations, adj_matrix = inputs
        # Aggregate the neighbour messages.
        aggregated_features = self.aggregate(node_repesentations, adj_matrix)
        # Update the node embedding with the weights.
        return self.update_fn(aggregated_features)

In [49]:
class GCNNodeClassifier(tf.keras.Model):
    def __init__(
        self,
        graph_info,
        num_classes,
        hidden_units,
        *args,
        **kwargs,
    ):
        super(GCNNodeClassifier, self).__init__(*args, **kwargs)

        # Unpack graph_info to three elements: node_features, edges, and edge_weight.
        node_features, adj_matrix = graph_info
        self.node_features = node_features
        self.adj_matrix = adj_matrix

        # Create the convoluted layer.
        self.conv = tf.keras.layers.Dense(hidden_units, name="graph_conv1")
        # Create a compute logits layer.
        self.compute_logits = tf.keras.layers.Dense(num_classes, name="logits")

    def call(self, input_node_indices):
        # GCN layer.
        x = self.conv((self.node_features))
        # Classification layer
        x1 = self.compute_logits((x))
        # Fetch node embeddings for the input node_indices.
        node_embeddings = tf.squeeze(tf.gather(x1, input_node_indices))
        return node_embeddings

In [50]:
gnn_model = GCNNodeClassifier(
    graph_info=(node_features, adj_matrix),
    num_classes=7,
    hidden_units=32,
    name="gnn_model",
)

print("GNN output shape:", gnn_model([1, 10, 100]))

gnn_model.summary()

GNN output shape: tf.Tensor(
[[-0.32148695  0.10195416 -0.5454991   0.0115348  -0.08195534 -0.21125859
   0.15940002]
 [ 0.16814412 -0.03495014 -0.06088027 -0.2042849  -0.11284803  0.21043402
  -0.02413301]
 [-0.13113461  0.02389363 -0.10501588 -0.12333894  0.08416583  0.24914908
  -0.05936877]], shape=(3, 7), dtype=float32)
Model: "gnn_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
graph_conv1 (Dense)          multiple                  45888     
_________________________________________________________________
logits (Dense)               multiple                  231       
Total params: 46,119
Trainable params: 46,119
Non-trainable params: 0
_________________________________________________________________
