In [0]:
import os,sys,inspect
import os
import joblib
import tensorflow as tf
import numpy as np
import h5py
import scipy.sparse.linalg as la
import scipy.sparse as sp
import scipy
import time
import pickle

import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
%matplotlib inline

import scipy.io as sio
import process_data

In [3]:
class GAT:
    
    def frobenius_norm(self, tensor):
        square_tensor = tf.square(tensor)
        tensor_sum = tf.reduce_sum(square_tensor)
        frobenius_norm = tf.sqrt(tensor_sum)
        return frobenius_norm
    
    #def convert_coo_to_sparse_tensor(self, L):
    #    indices = np.column_stack((L.row, L.col))
   #     L = tf.SparseTensor(indices, L.data.astype('float32'), L.shape)
    #    L = tf.sparse_reorder(L)
    #    return L
    
    def coo_to_sparse_tensor(self,X):
        indices = np.mat([X.row, X.col]).transpose()
        return tf.SparseTensorValue(indices, X.data, X.shape)
      
    def concat(self,x,y):
        if x is None:
            return y
        if y is None:
            return x
        return tf.concat([x,y],axis=1)
    
    def __variable(self,shape,reg=True,name='weight'):
        v = tf.get_variable(name=name,shape=shape,dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer())
        if reg:
            self.reg_variables.append(v)
        return v
    
    def __bias(self,shape,reg=True,name='bias'):
        v = tf.get_variable(name,shape=shape,dtype=tf.float32,initializer=tf.keras.initializers.Zeros())
        if reg:
            self.reg_variables.append(v)
        return v
        
    def __init__(self, A, X, Y,
                 num_hidden_feat,
                 K,
                 learning_rate,
                 gamma, # l2 regularization parameter
                 idx_gpu = '/GPU:0'):
        
        self.num_hidden_feat = num_hidden_feat
        self.learning_rate = learning_rate
        self.gamma=gamma
        self.num_layers=len(num_hidden_feat)
        
        self.K = K
        self.reg_variables = []
        
        with tf.Graph().as_default() as g:
                self.graph = g
                
                with tf.device(idx_gpu):
                        # dimensions
                        M = A.shape[0]
                  
                        #definition of constant matrices
                        self.A = self.coo_to_sparse_tensor(A.tocoo())
                        self.X = tf.constant(X, dtype=tf.float32) 
                        self.Y = tf.constant(Y, dtype=tf.float32)
                        
                        #placeholder definition
                        self.idx_nodes = tf.placeholder(tf.int32)
                        self.keep_prob = tf.placeholder(tf.float32)
                        
                        #model definition
                        
                        #M = self.A.get_shape().as_list()[0]
                        M = tf.shape(self.A)[0]
                        A_ = tf.sparse.add(self.A,tf.sparse.eye(M,M,dtype=tf.float32))
                        
                        H_prev = self.X
                        
                        for layer in range(self.num_layers):
                          
                          with tf.variable_scope('layer_{0}'.format(layer)):

                            Fout = self.num_hidden_feat[layer]
                            Fin = H_prev.get_shape().as_list()[1]

                            H_prev = tf.nn.dropout(H_prev, self.keep_prob)
                            H = None

                            for k in range(self.K[layer]):
                                
                                with tf.variable_scope('head_{0}'.format(k)):
                                
                                    W = self.__variable([Fin,Fout],name='W')
                                    A_1 = self.__variable([Fout,1],name='A_1')
                                    A_2 = self.__variable([Fout,1],name='A_2')
                                    b_1 = self.__bias([1],name='b_1')
                                    b_2 = self.__bias([1],name='b_2')

                                    U = tf.matmul(H_prev,W) # M x Fout

                                    gamma_1 = tf.matmul(U,A_1) + b_1 # M x 1
                                    gamma_2 = tf.matmul(U,A_2) + b_2 # M x 1

                                    #phi = tf.add(
                                    #      tf.multiply(A_,gamma_1),
                                    #      tf.multiply(A_,tf.transpose(gamma_2))) # M x M
                                    
                                    phi = tf.sparse.add(
                                        A_.__mul__(gamma_1),
                                        A_.__mul__(tf.transpose(gamma_2))) # M x M
                                    
                                    phi = tf.SparseTensorValue(
                                        indices = phi.indices,
                                        values = tf.nn.leaky_relu(phi.values),
                                        dense_shape = [M,M]) # M x M

                                    #phi = tf.nn.leaky_relu(phi,alpha=0.2)

                                    #psi = tf.exp(phi) # M x M
                                    
                                    #psi = A_.__mul__(psi)

                                    #theta = tf.sparse.reduce_sum(psi,axis=-1)

                                    #Q = psi.__mul__(tf.reciprocal(theta))
                                    
                                    Q = tf.sparse.softmax(phi)

                                    H_temp = tf.sparse.matmul(Q,U) # M x Fout                          
                                    H_temp = tf.nn.dropout(H_temp, self.keep_prob)
                                    
                                    H = self.concat(H,H_temp)
                            
                            H_prev = H

                            if layer != self.num_layers-1 : # it is not the last layer
                                H_prev = tf.nn.elu(H_prev)
                        
                        self.logits = H_prev
                        
                        self.l_out = tf.gather(self.logits, self.idx_nodes)
                        self.c_Y = tf.gather(self.Y, self.idx_nodes)
                        
                        #loss function definition
                        
                        with tf.name_scope('loss'):
                            self.l2_reg = 0
                            for W in self.reg_variables:
                                self.l2_reg += tf.nn.l2_loss(W)

                            self.data_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.l_out, labels=self.c_Y)) 
                            self.loss = self.data_loss + self.gamma*self.l2_reg

                        #solver definition
                        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
                        self.opt_step = self.optimizer.minimize(self.loss)
                        
                        #predictions and accuracy extraction
                        self.c_predictions = tf.argmax(tf.nn.softmax(self.l_out), 1)
                        self.accuracy = tf.contrib.metrics.accuracy(self.c_predictions, tf.argmax(self.c_Y, 1))
                        
                        #gradients computation
                        self.trainable_variables = tf.trainable_variables()
                        self.var_grad = tf.gradients(self.loss, tf.trainable_variables())
                        
                        #for i,t in enumerate(tf.trainable_variables()):
                        #    print(str(t) + " " + str(self.var_grad[i]))
                        
                        self.norm_grad = self.frobenius_norm(tf.concat([tf.reshape(g, [-1]) for g in self.var_grad], 0))
                        
                        #session creation
                        config = tf.ConfigProto(allow_soft_placement = True)
                        config.gpu_options.allow_growth = True
                        self.session = tf.Session(config=config)

                        #session initialization
                        init = tf.global_variables_initializer()
                        self.session.run(init)
                        
#learning parameters and path dataset

num_total_iter_training = 3000
learning_rate = 0.005
val_test_interval = 1
num_hidden_feat = [8,7]
K = [8,1]
gamma = 0.0005
    
#dataset loadina

A, X, Y, train_idx, val_idx, test_idx = process_data.load_data("cora")
X = process_data.preprocess_features(X)

# compute GCN adj matrix
A_tilde = sp.csr_matrix(A,dtype=np.float32)
A_tilde.setdiag(1)
D = A_tilde.sum(axis=1)

D_rows, D_cols = D.nonzero()
D_vals = [D[i,j] for i, j in zip(D_rows, D_cols)]
D_vals = np.reciprocal(np.sqrt(np.asarray(D_vals)))

D_inv_sqrt = sp.csr_matrix((D_vals, (range(len(D_vals)), range(len(D_vals)))))

A_tilde = D_inv_sqrt.dot(A_tilde).dot(D_inv_sqrt)
A_tilde = A_tilde.tocsr()
A_tilde.eliminate_zeros()

# Training

num_exp = 10 #number of times training GCN over the given dataset

list_all_acc = []
list_all_cost_val_avg  = []
list_all_data_cost_val_avg = []
list_all_acc_val_avg   = []
list_all_cost_test_avg = []
list_all_acc_test_avg  = []

num_done = 0
for seed in range(num_exp):
    model = GAT(A=A_tilde, X=X, Y=Y, num_hidden_feat=num_hidden_feat, K=K, learning_rate=learning_rate, gamma=gamma)

    cost_train_avg      = []
    grad_norm_train_avg = []
    acc_train_avg       = []
    cost_test_avg       = []
    grad_norm_test_avg  = []
    acc_test_avg        = []
    cost_val_avg        = []
    data_cost_val_avg   = []
    acc_val_avg         = []
    iter_test           = []
    list_training_time = list()

    #Training code
    for i in range(num_total_iter_training):
        if (len(cost_train_avg) % val_test_interval) == 0:

            #Validate the model
            
            feed_dict = {model.idx_nodes: val_idx, model.keep_prob:1.0}
            acc_val, cost_val, data_cost_val = \
                model.session.run([model.accuracy, model.loss, model.data_loss], feed_dict)
            
            data_cost_val_avg.append(data_cost_val)
            cost_val_avg.append(cost_val)
            acc_val_avg.append(acc_val)

            #Test the model
            
            feed_dict = {model.idx_nodes: test_idx, model.keep_prob:1.0}
            acc_test, cost_test = model.session.run([model.accuracy, model.loss], feed_dict)
            
            cost_test_avg.append(cost_test)
            acc_test_avg.append(acc_test)
            iter_test.append(len(cost_train_avg))

        tic = time.time()
        feed_dict = {model.idx_nodes: train_idx, model.keep_prob: 0.5}
        
        _, current_training_loss, norm_grad, current_acc_training = \
            model.session.run([model.opt_step, model.loss, model.norm_grad, model.accuracy], feed_dict) 

        training_time = time.time() - tic   

        cost_train_avg.append(current_training_loss)
        grad_norm_train_avg.append(norm_grad)
        acc_train_avg.append(current_acc_training)

    #Compute and print statistics of the last realized experiment
    list_all_acc.append(100*(np.asarray(acc_test_avg)[np.asarray(data_cost_val_avg)==np.min(data_cost_val_avg)]))
    list_all_cost_val_avg.append(cost_val_avg)
    list_all_data_cost_val_avg.append(data_cost_val_avg)
    list_all_acc_val_avg.append(acc_val_avg)
    list_all_cost_test_avg.append(cost_test_avg)
    list_all_acc_test_avg.append(acc_test_avg)

    print('Num done: %d' % num_done)
    print('Max accuracy on test set achieved: %f%%' % np.max(np.asarray(acc_test_avg)*100))
    print('Max suggested accuracy: %f%%' % (100*(np.asarray(acc_test_avg)[np.asarray(data_cost_val_avg)==np.min(data_cost_val_avg)]),))
    print('Current mean: %f%%' % np.mean(list_all_acc))
    print('Current std: %f' % np.std(list_all_acc))

    num_done += 1

(2708, 2708)
(2708, 1433)


  self._set_arrayXarray(i, j, x)


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
Use tf.cast instead.
Num done: 0
Max accuracy on test set achieved: 82.800003%
Max suggested accuracy: 81.599998%
Current mean: 81.599998%
Current std: 0.000000
Num done: 1
Max accuracy on test set achieved: 82.599998%
Max suggested accur

In [4]:
#Print average performance
print(np.mean(list_all_acc))
print(np.std(list_all_acc))

81.78999
0.3700002
