

Graph Convolutional Network
====================================

This is a gentle introduction to implement Graph Convolutional
Networks (Kipf & Welling et al., [Semi-Supervised Classification with Graph
Convolutional Networks](https://arxiv.org/pdf/1609.02907.pdf)).

<!-- ![GCN](gcn.png) -->
<img src="gcn.png" align="center"/>

## CORA dataset
The [Cora dataset](https://relational.fit.cvut.cz/dataset/CORA) consists of __2708__ scientific publications classified into one of seven classes. The citation network consists of __5429__ links. Each publication in the dataset is described by a 0/1-valued word vector indicating the absence/presence of the corresponding word from the dictionary. The dictionary consists of __1433__ unique words.

<img src="https://www.researchgate.net/profile/Davide_Boscaini/publication/310953386/figure/fig1/AS:669401682563079@1536609206304/Predictions-obtained-applying-MoNet-over-the-Cora-dataset-Marker-fill-color-represents.png">

In [1]:
!pip install -q --upgrade git+https://github.com/mlss-skoltech/tutorials_week2.git#subdirectory=graph_neural_networks

In [10]:
import pkg_resources

ZIP_PATH = pkg_resources.resource_filename('gnnutils', 'data/data.zip')
DATA_PATH = './data'

!unzip -u {ZIP_PATH} -d ./



Archive:  /anaconda3/lib/python3.7/site-packages/gnnutils/data/data.zip


In [11]:
import os,sys,inspect
import os
import joblib
import tensorflow as tf
import numpy as np
import h5py
import scipy.sparse.linalg as la
import scipy.sparse as sp
import scipy
import time
import pickle

import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
%matplotlib inline

import scipy.io as sio
from gnnutils import process_data

the Frobenius norm:
$$\| A \|_F = \left( \sum_{i,j=1}^n | a_{ij} |^2 \right)^{1/2}$$.

In [12]:
def frobenius_norm(tensor):
    square_tensor = tf.square(tensor)
    tensor_sum = tf.reduce_sum(square_tensor)
    frobenius_norm = tf.sqrt(tensor_sum)
    return frobenius_norm

Helpful function to convert numpy array column ordered sparse representation to tensorflow sparse tensor format

In [13]:
def convert_coo_to_sparse_tensor(L):
    indices = np.column_stack((L.row, L.col))
    L = tf.SparseTensor(indices, L.data.astype('float32'), L.shape)
    L = tf.sparse_reorder(L)
    return L

In [14]:
class GCN:
    
    """
    The neural network model.
    """
     
    def __init__(self, A, X, Y, num_hidden_feat, learning_rate=5e-2, gamma=1e-3, idx_gpu = '/gpu:2'):
        
        self.num_hidden_feat = num_hidden_feat
        self.learning_rate = learning_rate
        self.gamma=gamma
        with tf.Graph().as_default() as g:
                self.graph = g
                
                with tf.device(idx_gpu):
                        #definition of constant matrices
                        self.A = convert_coo_to_sparse_tensor(A.tocoo())
                        self.X = tf.constant(X, dtype=tf.float32) 
                        self.Y = tf.constant(Y, dtype=tf.float32)
                        
                        self.W0 = tf.get_variable("W0", shape=[X.shape[1], self.num_hidden_feat], initializer=tf.contrib.layers.xavier_initializer())
                        self.W1 = tf.get_variable("W1", shape=[self.num_hidden_feat, Y.shape[1]], initializer=tf.contrib.layers.xavier_initializer())
                        
                        #placeholder definition
                        self.idx_nodes = tf.placeholder(tf.int32)
                        self.keep_prob = tf.placeholder(tf.float32)
                        
                        #model definition
                        self.l_input = tf.nn.dropout(self.X,  self.keep_prob)
                        
                        self.X0_tilde = tf.sparse_tensor_dense_matmul(self.A, self.l_input)
                        self.X0 = tf.matmul(self.X0_tilde, self.W0)
                        self.X0 = tf.nn.relu(self.X0)
                        self.X0 = tf.nn.dropout(self.X0,  self.keep_prob)
                        
                        self.X1_tilde = tf.sparse_tensor_dense_matmul(self.A, self.X0)
                        self.logits = tf.matmul(self.X1_tilde, self.W1)
                        
                        self.l_out = tf.gather(self.logits, self.idx_nodes)
                        self.c_Y = tf.gather(self.Y, self.idx_nodes)
                        
                        #loss function definition
                        self.l2_reg = tf.nn.l2_loss(self.W0)
                        self.data_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.l_out, labels=self.c_Y)) 
                        self.loss = self.data_loss + self.gamma*self.l2_reg
                        
                        #solver definition
                        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
                        self.opt_step = self.optimizer.minimize(self.loss)
                        
                        #predictions and accuracy extraction
                        self.c_predictions = tf.argmax(tf.nn.softmax(self.l_out), 1)
                        self.accuracy = tf.contrib.metrics.accuracy(self.c_predictions, tf.argmax(self.c_Y, 1))
                        
                        #gradients computation
                        self.trainable_variables = tf.trainable_variables()
                        self.var_grad = tf.gradients(self.loss, tf.trainable_variables())
                        self.norm_grad = frobenius_norm(tf.concat([tf.reshape(g, [-1]) for g in self.var_grad], 0))
                        
                        #session creation
                        config = tf.ConfigProto(allow_soft_placement = True)
                        config.gpu_options.allow_growth = True
                        self.session = tf.Session(config=config)

                        #session initialization
                        init = tf.global_variables_initializer()
                        self.session.run(init)

We define hyperparameters of training procedure and load data using our utility library *process_data*.

In [15]:
#learning parameters and path dataset

num_total_iter_training = 3000
learning_rate = 1e-2
val_test_interval = 1
num_hidden_feat = 16
gamma = 5e-4

In [16]:
#dataset loading
A, X, Y, train_idx, val_idx, test_idx = process_data.load_data("cora", path_to_data=DATA_PATH)
X = process_data.preprocess_features(X)

(2708, 2708)
(2708, 1433)





$$\mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
        \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta}$$

where $$\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}$$ denotes the
    adjacency matrix with inserted self-loops and
    $$\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}$$ its diagonal degree matrix.

In [5]:
# compute GCN adj matrix
A_tilde = sp.csr_matrix(A)
A_tilde.setdiag(1)
D = A_tilde.sum(axis=1)

D_rows, D_cols = D.nonzero()
D_vals = [D[i,j] for i, j in zip(D_rows, D_cols)]
D_vals = np.reciprocal(np.sqrt(np.asarray(D_vals)))

D_inv_sqrt = sp.csr_matrix((D_vals, (range(len(D_vals)), range(len(D_vals)))))

A_tilde = D_inv_sqrt.dot(A_tilde).dot(D_inv_sqrt)
A_tilde = A_tilde.tocsr()
A_tilde.eliminate_zeros()

  self[i, j] = values


In [None]:
num_exp = 10 #number of times training GCN over the given dataset

list_all_acc = []
list_all_cost_val_avg  = []
list_all_data_cost_val_avg = []
list_all_acc_val_avg   = []
list_all_cost_test_avg = []
list_all_acc_test_avg  = []

num_done = 0
for seed in range(num_exp):
    GCNN = GCN(A_tilde, X, Y, num_hidden_feat, learning_rate=learning_rate, gamma=gamma)

    cost_train_avg      = []
    grad_norm_train_avg = []
    acc_train_avg       = []
    cost_test_avg       = []
    grad_norm_test_avg  = []
    acc_test_avg        = []
    cost_val_avg        = []
    data_cost_val_avg   = []
    acc_val_avg         = []
    iter_test           = []
    list_training_time = list()

    #Training code
    for i in range(num_total_iter_training):
        if (len(cost_train_avg) % val_test_interval) == 0:
            #Print last training performance
            if (len(cost_train_avg)>0):
                print("[TRN] epoch = %03i, cost = %3.2e, |grad| = %.2e, acc = %3.2e (%03.2fs)" % \
                (len(cost_train_avg), cost_train_avg[-1], grad_norm_train_avg[-1], acc_train_avg[-1], time.time() - tic))

            #Validate the model
            tic = time.time()
            
            feed_dict = {GCNN.idx_nodes: val_idx, GCNN.keep_prob:1.0}
            acc_val, cost_val, data_cost_val = GCNN.session.run([GCNN.accuracy, GCNN.loss, GCNN.data_loss], feed_dict)
            
            data_cost_val_avg.append(data_cost_val)
            cost_val_avg.append(cost_val)
            acc_val_avg.append(acc_val)
            print("[VAL] epoch = %03i, data_cost = %3.2e, cost = %3.2e, acc = %3.2e (%03.2fs)" % \
                (len(cost_train_avg), data_cost_val_avg[-1], cost_val_avg[-1], acc_val_avg[-1],  time.time() - tic))

            #Test the model
            tic = time.time()
            
            feed_dict = {GCNN.idx_nodes: test_idx, GCNN.keep_prob:1.0}
            acc_test, cost_test = GCNN.session.run([GCNN.accuracy, GCNN.loss], feed_dict)
            
            cost_test_avg.append(cost_test)
            acc_test_avg.append(acc_test)
            print("[TST] epoch = %03i, cost = %3.2e, acc = %3.2e (%03.2fs)" % \
                (len(cost_train_avg), cost_test_avg[-1], acc_test_avg[-1],  time.time() - tic))
            iter_test.append(len(cost_train_avg))

        tic = time.time()

        tic = time.time()
        feed_dict = {GCNN.idx_nodes: train_idx, GCNN.keep_prob: 0.5}
        
        _, current_training_loss, norm_grad, current_acc_training = GCNN.session.run([GCNN.opt_step, GCNN.loss, GCNN.norm_grad, GCNN.accuracy], feed_dict) 

        training_time = time.time() - tic   

        cost_train_avg.append(current_training_loss)
        grad_norm_train_avg.append(norm_grad)
        acc_train_avg.append(current_acc_training)

    #Compute and print statistics of the last realized experiment
    list_all_acc.append(100*(np.asarray(acc_test_avg)[np.asarray(data_cost_val_avg)==np.min(data_cost_val_avg)]))
    list_all_cost_val_avg.append(cost_val_avg)
    list_all_data_cost_val_avg.append(data_cost_val_avg)
    list_all_acc_val_avg.append(acc_val_avg)
    list_all_cost_test_avg.append(cost_test_avg)
    list_all_acc_test_avg.append(acc_test_avg)

    print('Num done: %d' % num_done)
    print('Max accuracy on test set achieved: %f%%' % np.max(np.asarray(acc_test_avg)*100))
    print('Max suggested accuracy: %f%%' % (100*(np.asarray(acc_test_avg)[np.asarray(data_cost_val_avg)==np.min(data_cost_val_avg)]),))
    print('Current mean: %f%%' % np.mean(list_all_acc))
    print('Current std: %f' % np.std(list_all_acc))

    num_done += 1

[VAL] epoch = 000, data_cost = 1.95e+00, cost = 1.95e+00, acc = 1.14e-01 (0.04s)
[TST] epoch = 000, cost = 1.95e+00, acc = 9.80e-02 (0.04s)
[TRN] epoch = 001, cost = 1.95e+00, |grad| = 1.45e-02, acc = 1.14e-01 (0.08s)
[VAL] epoch = 001, data_cost = 1.94e+00, cost = 1.95e+00, acc = 2.46e-01 (0.00s)
[TST] epoch = 001, cost = 1.95e+00, acc = 2.61e-01 (0.00s)
[TRN] epoch = 002, cost = 1.95e+00, |grad| = 1.56e-02, acc = 3.21e-01 (0.01s)
[VAL] epoch = 002, data_cost = 1.94e+00, cost = 1.95e+00, acc = 3.42e-01 (0.00s)
[TST] epoch = 002, cost = 1.95e+00, acc = 4.02e-01 (0.00s)
[TRN] epoch = 003, cost = 1.94e+00, |grad| = 1.95e-02, acc = 4.50e-01 (0.01s)
[VAL] epoch = 003, data_cost = 1.94e+00, cost = 1.94e+00, acc = 3.92e-01 (0.00s)
[TST] epoch = 003, cost = 1.94e+00, acc = 4.41e-01 (0.00s)
[TRN] epoch = 004, cost = 1.93e+00, |grad| = 2.32e-02, acc = 4.93e-01 (0.01s)
[VAL] epoch = 004, data_cost = 1.93e+00, cost = 1.94e+00, acc = 4.18e-01 (0.00s)
[TST] epoch = 004, cost = 1.94e+00, acc = 4.72e

In [10]:
#Print average performance
print(np.mean(list_all_acc))
print(np.std(list_all_acc))

81.979996
0.48538783
