In [5]:
from __future__ import division
from __future__ import print_function

import time
import tensorflow as tf
from utils import *
from models import GCN, MLP
import easydict

In [6]:
# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
tf.app.flags.DEFINE_string('f', '', 'kernel')
flags.DEFINE_string('dataset', 'hateful', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed'
flags.DEFINE_string('model', 'gcn', 'Model string.')  # 'gcn', 'gcn_cheby', 'dense'
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 20, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.5, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 10, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 2, 'Maximum Chebyshev polynomial degree.')



args = easydict.EasyDict({
        "dataset": "hateful",
        "model": "gcn",
        "learning_rate": 0.01,
        "hidden1": 20,
        "epochs" : 200,
        "dropout": 0.5,
        "weight_decay": 5e-4,
        "early_stopping": 10,
        "max_degree": 2
})

In [7]:
# Load data
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(FLAGS.dataset)

In [8]:
features = preprocess_features(features)
if FLAGS.model == 'gcn':
    support = [preprocess_adj(adj)]
    num_supports = 1
    model_func = GCN
elif FLAGS.model == 'gcn_cheby':
    support = chebyshev_polynomials(adj, FLAGS.max_degree)
    num_supports = 1 + FLAGS.max_degree
    model_func = GCN
elif FLAGS.model == 'dense':
    support = [preprocess_adj(adj)]  # Not used
    num_supports = 1
    model_func = MLP
else:
    raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

In [9]:
# Define placeholders
placeholders = {
    'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
    'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)),
    'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
    'labels_mask': tf.placeholder(tf.int32),
    'dropout': tf.placeholder_with_default(0., shape=()),
    'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
}

In [10]:
# Create model
model = model_func(placeholders, input_dim=features[2][1], logging=True)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [11]:
# Initialize session
sess = tf.Session()

In [12]:
# Define model evaluation function
def evaluate(features, support, labels, mask, placeholders):
    t_test = time.time()
    feed_dict_val = construct_feed_dict(features, support, labels, mask, placeholders)
    outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
    return outs_val[0], outs_val[1], (time.time() - t_test)

In [13]:
# Init variables
sess.run(tf.global_variables_initializer())

cost_val = []

In [14]:
# Train model
for epoch in range(FLAGS.epochs):

    t = time.time()
    # Construct feed dictionary
    feed_dict = construct_feed_dict(features, support, y_train, train_mask, placeholders)
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})

    # Training step
    outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)

    # Validation
    cost, acc, duration = evaluate(features, support, y_val, val_mask, placeholders)
    cost_val.append(cost)

    # Print results
    print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
          "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
          "val_acc=", "{:.5f}".format(acc), "time=", "{:.5f}".format(time.time() - t))

    if epoch > FLAGS.early_stopping and cost_val[-1] > np.mean(cost_val[-(FLAGS.early_stopping+1):-1]):
        print("Early stopping...")
        break

print("Optimization Finished!")

Epoch: 0001 train_loss= 1.12286 train_acc= 0.51464 val_loss= 1.08760 val_acc= 0.45200 time= 3.30514
Epoch: 0002 train_loss= 1.23645 train_acc= 0.58898 val_loss= 1.07177 val_acc= 0.45800 time= 2.42036
Epoch: 0003 train_loss= 1.13121 train_acc= 0.59950 val_loss= 1.05773 val_acc= 0.46200 time= 2.63296
Epoch: 0004 train_loss= 1.21780 train_acc= 0.60100 val_loss= 1.04237 val_acc= 0.46000 time= 2.53329
Epoch: 0005 train_loss= 1.09237 train_acc= 0.61051 val_loss= 1.02620 val_acc= 0.45800 time= 2.58085
Epoch: 0006 train_loss= 1.11170 train_acc= 0.60125 val_loss= 1.01272 val_acc= 0.46000 time= 2.54851
Epoch: 0007 train_loss= 1.00389 train_acc= 0.60075 val_loss= 1.00185 val_acc= 0.45800 time= 2.52544
Epoch: 0008 train_loss= 0.99915 train_acc= 0.60500 val_loss= 0.99241 val_acc= 0.46200 time= 2.51229
Epoch: 0009 train_loss= 0.98976 train_acc= 0.59574 val_loss= 0.98636 val_acc= 0.46000 time= 2.51819
Epoch: 0010 train_loss= 0.99151 train_acc= 0.60250 val_loss= 0.98191 val_acc= 0.46000 time= 2.52904


In [15]:
# Testing
test_cost, test_acc, test_duration = evaluate(features, support, y_test, test_mask, placeholders)
print("Test set results:", "cost=", "{:.5f}".format(test_cost),
      "accuracy=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(test_duration))

Test set results: cost= 0.93349 accuracy= 0.58299 time= 1.28519
