# GCN 

gcn model code referenced by 
https://github.com/dmlc/dgl/tree/master/examples/tensorflow/gcn

Original tensorflow gcn code! 
실제 코드를 python notebook으로 만들었다. |


### Setting

In [1]:
# !pip install dgl # cpu 
!pip install dgl-cu110 # gpu 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting dgl-cu110
  Downloading dgl_cu110-0.6.1-cp38-cp38-manylinux1_x86_64.whl (39.9 MB)
[K     |████████████████████████████████| 39.9 MB 1.2 MB/s 
Installing collected packages: dgl-cu110
Successfully installed dgl-cu110-0.6.1


In [2]:
import os
os.environ['DGLBACKEND'] = 'tensorflow'

import dgl
print(dgl.backend.backend_name)

Using backend: tensorflow


tensorflow


In [3]:
import time 
import numpy as np 
import pandas as pd 
import networkx as nx 
import requests 
from matplotlib import pyplot as plt 
import tensorflow as tf 
from tensorflow.keras import layers 
from dgl import DGLGraph 
from dgl.data import CiteseerGraphDataset, register_data_args, load_data

In [4]:
data = CiteseerGraphDataset() 

Downloading /root/.dgl/citeseer.zip from https://data.dgl.ai/dataset/citeseer.zip...
Extracting file to /root/.dgl/citeseer


  r_inv = np.power(rowsum, -1).flatten()


Finished data loading and preprocessing.
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done saving data into cached files.


In [5]:
g = data[0]
print(g)

Graph(num_nodes=3327, num_edges=9228,
      ndata_schemes={'train_mask': Scheme(shape=(), dtype=tf.bool), 'val_mask': Scheme(shape=(), dtype=tf.bool), 'test_mask': Scheme(shape=(), dtype=tf.bool), 'label': Scheme(shape=(), dtype=tf.int64), 'feat': Scheme(shape=(3703,), dtype=tf.float32)}
      edata_schemes={})


In [6]:
device = "/gpu:0"

In [7]:
with tf.device(device): 
  g = g.to(device)
  features = g.ndata["feat"]
  labels = g.ndata["label"]
  train_mask = g.ndata["train_mask"]
  val_mask = g.ndata["val_mask"]
  test_mask = g.ndata["test_mask"]
  in_feats = features.shape[1] 
  n_classes = data.num_classes
  n_edges = g.number_of_edges() 
  print("---data info---")
  print("Edges %d Classes %d " % (
      n_edges, n_classes, 
  ))
  print("Train samples %d " % (
      train_mask.numpy().sum(), 
  ))
  print("valsamples %d test samples %d" % (
      val_mask.numpy().sum(), test_mask.numpy().sum()
  ))

---data info---
Edges 9228 Classes 6 
Train samples 120 
valsamples 500 test samples 1000


In [11]:
n_edges = g.number_of_edges() 
degs = tf.cast(tf.identity(g.in_degrees()), dtype=tf.float32)
norm = tf.math.pow(degs, -0.5) 
norm = tf.where(tf.math.is_inf(norm), tf.zeros_like(norm), norm)

In [15]:
print(n_edges)
print(g.in_degrees())
print(degs)
print(norm.shape)
norm

9228
tf.Tensor([1 5 1 ... 3 1 1], shape=(3327,), dtype=int64)
tf.Tensor([1. 5. 1. ... 3. 1. 1.], shape=(3327,), dtype=float32)
(3327,)


<tf.Tensor: shape=(3327,), dtype=float32, numpy=
array([1.        , 0.4472136 , 1.        , ..., 0.57735026, 1.        ,
       1.        ], dtype=float32)>

In [16]:
g.ndata["norm"] = tf.expand_dims(norm, -1) 
g.ndata["norm"]

<tf.Tensor: shape=(3327, 1), dtype=float32, numpy=
array([[1.        ],
       [0.4472136 ],
       [1.        ],
       ...,
       [0.57735026],
       [1.        ],
       [1.        ]], dtype=float32)>

### GCN Model

In [17]:
from dgl.nn.tensorflow import GraphConv 
from tensorflow.keras import layers 

class GCN(tf.keras.Model): 
  def __init__(
      self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout
  ): 
    super(GCN, self).__init__() 
    self.g = g 
    self.layer_list = [] 
    #input layer 
    self.layer_list.append(
        GraphConv(in_feats, n_hidden, activation=activation)
    ) 
    for i in range(n_layers - 1): 
      self.layer_list.append( 
          GraphConv(n_hidden, n_hidden, activation=activation)
      )
      self.layer_list.append(GraphConv(n_hidden, n_classes)) 
      self.dropout = layers.Dropout(dropout) 
  
  def call(self, features) : 
    h = features 
    for i, layer in enumerate(self.layer_list): 
      if i != 0: 
        h = self.dropout(h) 
      h = layer(self.g, h) 
    return h  

In [18]:
n_hidden = 16 
n_layers = 1 
weight_decay = 5e-4 
dropout = 0.5

model = GCN(g, in_feats, n_hidden, n_classes, n_layers, tf.nn.relu, dropout )

In [19]:
lr = 1e-2
loss_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True) 
optimizer = tf.keras.optimizers.Adam(learning_rate = lr, epsilon=1e-8)

In [20]:
def evaluate(model, features, labels, mask): 
  logits = model(features, training=False) 
  logits = logits[mask] 
  labels = labels[mask] 
  indices = tf.math.argmax(logits, axis=1) 
  acc = tf.reduce_mean(tf.cast(indices == labels, dtype=tf.float32)) 
  return acc.numpy().item() 

In [30]:
dur = [] 
for epoch in range(100): # epoch 
  if epoch >= 3: 
    t0 = time.time() 
  with tf.GradientTape() as tape: 
    logits = model(features) 
    loss_value = loss_func(labels[train_mask], logits[train_mask]) 
    for weight in model.trainable_weights: 
      loss_value = loss_value + weight_decay * tf.nn.l2_loss(weight) 
    grads = tape.gradient(loss_value, model.trainable_weights) 
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
  if epoch >= 3: 
    dur.append(time.time() - t0)
  
  acc = evaluate(model, features, labels, val_mask) 
  if epoch % 10 == 0: 
    print(
                  "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
                  "ETputs(KTEPS) {:.2f}".format(
                      epoch,
                      np.mean(dur),
                      loss_value.numpy().item(),
                      acc,
                      n_edges / np.mean(dur) / 1000,
                  )
              )


Epoch 00000 | Time(s) nan | Loss 1.8539 | Accuracy 0.6020 | ETputs(KTEPS) nan
Epoch 00010 | Time(s) 0.0305 | Loss 1.8451 | Accuracy 0.6040 | ETputs(KTEPS) 303.02
Epoch 00020 | Time(s) 0.0308 | Loss 1.8369 | Accuracy 0.6040 | ETputs(KTEPS) 299.18
Epoch 00030 | Time(s) 0.0271 | Loss 1.8292 | Accuracy 0.6000 | ETputs(KTEPS) 339.90
Epoch 00040 | Time(s) 0.0281 | Loss 1.8220 | Accuracy 0.6000 | ETputs(KTEPS) 328.64
Epoch 00050 | Time(s) 0.0286 | Loss 1.8152 | Accuracy 0.6000 | ETputs(KTEPS) 322.29
Epoch 00060 | Time(s) 0.0295 | Loss 1.8088 | Accuracy 0.6020 | ETputs(KTEPS) 312.59
Epoch 00070 | Time(s) 0.0284 | Loss 1.8029 | Accuracy 0.6060 | ETputs(KTEPS) 325.40
Epoch 00080 | Time(s) 0.0272 | Loss 1.7973 | Accuracy 0.6100 | ETputs(KTEPS) 338.73
Epoch 00090 | Time(s) 0.0262 | Loss 1.7920 | Accuracy 0.6080 | ETputs(KTEPS) 352.43


In [31]:
acc = evaluate(model, features, labels, test_mask) 
print("Test accuracy {:.4f}".format(acc))

Test accuracy 0.6370
