In [1]:
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf

from utils import *
from models import GCN, MLP,HGCN
from coarsen import *
import copy
import matplotlib.pyplot as plt
import pickle as pkl

  from ._conv import register_converters as _register_converters


In [2]:
# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
tf.app.flags.DEFINE_string('f', '', 'kernel')
flags.DEFINE_string('dataset', 'hateful', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_string('model', 'hgcn', 'Model string.')  # 'hgcn', 'gcn', 'gcn_cheby', 'dense'
flags.DEFINE_float('learning_rate', 0.03, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 100, 'Number of epochs to train.')
flags.DEFINE_integer('seed1', 123, 'random seed for numpy.')
flags.DEFINE_integer('seed2', 123, 'random seed for tf.')
flags.DEFINE_integer('hidden', 32, 'Number of units in hidden layer 1.')    
flags.DEFINE_integer('node_wgt_embed_dim', 5, 'Number of units for node weight embedding.')   
flags.DEFINE_float('dropout', 0.9, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 7e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 1000, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')
flags.DEFINE_integer('coarsen_level', 4, 'Maximum coarsen level.')
flags.DEFINE_integer('max_node_wgt', 50, 'Maximum node_wgt to avoid super-node being too large.')
flags.DEFINE_integer('channel_num', 4, 'Number of channels')


In [3]:
# Set random seed
seed1 = FLAGS.seed1
seed2 = FLAGS.seed2
np.random.seed(seed1)
tf.set_random_seed(seed2)

In [4]:
# Load data
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(FLAGS.dataset)

In [5]:
# Some preprocessing
features = preprocess_features(features)
if FLAGS.model == 'gcn': 
    support = [preprocess_adj(adj)]  # Not used
    num_supports = 1
    model_func = GCN
elif FLAGS.model == 'gcn_cheby':
    support = chebyshev_polynomials(adj, FLAGS.max_degree)  # Not used
    num_supports = 1 + FLAGS.max_degree
    model_func = GCN
elif FLAGS.model == 'dense':
    support = [preprocess_adj(adj)]  # Not used
    num_supports = 1
    model_func = MLP
elif FLAGS.model == 'hgcn':
    support = [preprocess_adj(adj)]  
    num_supports = 1
    model_func = HGCN    
else:
    raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

graph, mapping = read_graph_from_adj(adj, FLAGS.dataset)
print('total nodes:', graph.node_num)

total nodes: 7315


In [6]:
# Step-1: Graph Coarsening.
original_graph = graph
transfer_list = []
adj_list = [copy.copy(graph.A)]
node_wgt_list = [copy.copy(graph.node_wgt)]
for i in range(FLAGS.coarsen_level):
    match, coarse_graph_size = generate_hybrid_matching(FLAGS.max_node_wgt, graph)
    coarse_graph = create_coarse_graph(graph, match, coarse_graph_size)
    transfer_list.append(copy.copy(graph.C))
    graph = coarse_graph
    adj_list.append(copy.copy(graph.A))  
    node_wgt_list.append(copy.copy(graph.node_wgt))
    print('There are %d nodes in the %d coarsened graph' %(graph.node_num, i+1))
    
print("\n")
print('layer_index ', 1)
print('input shape:   ', features[-1])

There are 5838 nodes in the 1 coarsened graph
There are 4922 nodes in the 2 coarsened graph
There are 4313 nodes in the 3 coarsened graph
There are 3918 nodes in the 4 coarsened graph


layer_index  1
input shape:    (7315, 320)


In [7]:
for i in range(len(adj_list)):
    adj_list[i] = [preprocess_adj(adj_list[i])]

# Define placeholders
placeholders = {
    'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)),
    'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
    'labels_mask': tf.placeholder(tf.int32),
    'dropout': tf.placeholder_with_default(0., shape=()),
    'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
}

# Create model
model = model_func(placeholders, input_dim=features[2][1], logging=True, transfer_list = transfer_list, adj_list = adj_list, node_wgt_list = node_wgt_list)

# Initialize session
sess = tf.Session()

output shape:     [5838, 32]
layer_index  2
input shape:    [5838, 32]
output shape:     [4922, 32]
layer_index  3
input shape:    [4922, 32]
output shape:     [4313, 32]
layer_index  4
input shape:    [4313, 32]
output shape:     [3918, 32]
layer_index  5
input shape:    [3918, 32]
output shape:     [4313, 32]
layer_index  6
input shape:    [4313, 32]
output shape:     [4922, 32]
layer_index  7
input shape:    [4922, 32]
output shape:     [5838, 32]
layer_index  8
input shape:    [5838, 32]
output shape:     [7315, 32]
layer_index  9
input shape:    [7315, 32]
output shape:     [7315, 3]
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [8]:
# Define model evaluation function
def evaluate(features, labels, mask, placeholders):
    t_test = time.time()
    feed_dict_val = construct_feed_dict(features, labels, mask, placeholders)
    outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
    return outs_val[0], outs_val[1], (time.time() - t_test)

# Init variables
sess.run(tf.global_variables_initializer())

cost_val = []
acc_val = []


cost_train = []
acc_train = []

cost_test = []
acc_test = []
best_fcn = 0

In [9]:
# Train model
for epoch in range(FLAGS.epochs):

    t = time.time()
    # Construct feed dictionary
    feed_dict = construct_feed_dict(features,  y_train, train_mask, placeholders)
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})

    # Training step
    outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)


    # Validation
    cost, acc, duration = evaluate(features, y_val, val_mask, placeholders)
    cost_val.append(cost)
    
    # Test
    test_cost, test_acc, test_duration = evaluate(features, y_test, test_mask, placeholders)
    cost_train.append(outs[1])
    acc_train.append(outs[2])    
    cost_test.append(test_cost)
    acc_test.append(test_acc)
    acc_val.append(acc)
    if test_acc > best_fcn:
        best_fcn = test_acc

    # Print results
    print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
          "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
          "val_acc=", "{:.5f}".format(acc), "test_acc=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(time.time() - t),"      best test_acc=", "{:.5f}".format(best_fcn),)

    if epoch > FLAGS.early_stopping and cost_val[-1] > np.mean(cost_val[-(FLAGS.early_stopping+1):-1]):
        print("Early stopping...")
        break

print("Optimization Finished!")

Epoch: 0001 train_loss= 229.97504 train_acc= 0.32516 val_loss= 1.51765 val_acc= 0.46400 test_acc= 0.56967 time= 2.83311       best test_acc= 0.56967
Epoch: 0002 train_loss= 52.73110 train_acc= 0.44756 val_loss= 1.55825 val_acc= 0.46200 test_acc= 0.57889 time= 1.80510       best test_acc= 0.57889
Epoch: 0003 train_loss= 10.09617 train_acc= 0.49862 val_loss= 1.60707 val_acc= 0.46600 test_acc= 0.58197 time= 1.75042       best test_acc= 0.58197
Epoch: 0004 train_loss= 11.63850 train_acc= 0.53116 val_loss= 1.66016 val_acc= 0.46400 test_acc= 0.58504 time= 1.74035       best test_acc= 0.58504
Epoch: 0005 train_loss= 5.92467 train_acc= 0.52941 val_loss= 1.71398 val_acc= 0.46400 test_acc= 0.58607 time= 1.73197       best test_acc= 0.58607
Epoch: 0006 train_loss= 5.69362 train_acc= 0.53442 val_loss= 1.76373 val_acc= 0.46600 test_acc= 0.58709 time= 1.79435       best test_acc= 0.58709
Epoch: 0007 train_loss= 4.66948 train_acc= 0.52966 val_loss= 1.81008 val_acc= 0.47200 test_acc= 0.58607 time= 1.8

KeyboardInterrupt: 

In [None]:
############################### test acc for every epoch
mat = np.array(acc_test)
# print(np.max(mat))

if FLAGS.dataset == 'cora':
    val_index_best =  np.argmin(np.array(cost_val))
    print('best epoch:   ',val_index_best)
    print('test result:  ',mat[val_index_best])

elif FLAGS.dataset == 'citeseer' or FLAGS.dataset == 'pubmed':
    val_index_best =  np.argmax(np.array(acc_val))
    print('best epoch:   ',val_index_best)
    print('test result:  ',mat[val_index_best])