In [1]:
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from utils import *
from models import GCN

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)



In [2]:
# get paths to available grahs metadata
adj_mat, features_mat, train_labels_mat, test_labels_mat, val_labels_mat, train_mask_mat, test_mask_mat, val_mask_mat = load_data("txt_graph2216_21012020", 80000)

In [3]:
print(adj_mat.shape)
print(features_mat.shape)
print(train_labels_mat.shape)
print(test_labels_mat.shape)
print(val_labels_mat.shape)
print(train_mask_mat.shape)
print(test_mask_mat.shape)
print(val_mask_mat.shape)

(463317, 463317)
(463317, 100)
(463317, 5)
(463317, 5)
(463317, 5)
(463317, 5)
(463317, 5)
(463317, 5)


In [10]:
#delete all flags before declaration new one
del_all_flags(tf.flags.FLAGS)

# Settings
flags = tf.app.flags
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('mode', '', 'kernel') # for line by line mode
tf.app.flags.DEFINE_string('port', '', 'kernel') # for line by line mode
tf.app.flags.DEFINE_string('f', '', 'kernel') # for jupyter notebook

flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 32, 'Number of units in hidden layer 1.') #16
flags.DEFINE_float('dropout', 0.5, 'Dropout rate (1 - keep probability).') #0.5
flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 10, 'Tolerance for early stopping (# of epochs).')

# Some preprocessing
features = preprocess_features(features_mat)
support = [preprocess_adj(adj_mat)]
num_supports = 1
model_func = GCN


In [11]:
# Define placeholders
placeholders = {
    'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
    'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)),
    'labels': tf.placeholder(tf.float32, shape=(None, train_labels_mat.shape[1])),
    'labels_mask': tf.placeholder(tf.int32),
    'dropout': tf.placeholder_with_default(0., shape=()),
    'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
}

# Create model
model = model_func(placeholders, input_dim=features[2][1], logging=True)

In [12]:
# Initialize session
sess = tf.Session()
# Init variables
sess.run(tf.global_variables_initializer())

In [None]:
# Train model
cost_val = []

for epoch in range(FLAGS.epochs):

    t = time.time()
    # Construct feed dictionary
    feed_dict = construct_feed_dict(features, support, train_labels_mat, train_mask_mat[:,1], placeholders)
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})

    # Training step
    outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)

    # Validation
    cost, acc, duration = evaluate(features, support, val_mask_mat, val_mask_mat[:,1], placeholders, sess, model)
    cost_val.append(cost)

    # Print results
    print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
          "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
          "val_acc=", "{:.5f}".format(acc), "time=", "{:.5f}".format(time.time() - t))

    if epoch > FLAGS.early_stopping and cost_val[-1] > np.mean(cost_val[-(FLAGS.early_stopping+1):-1]):
        print("Early stopping...")
        break

print("Optimization Finished!")

Epoch: 0001 train_loss= 4.32870 train_acc= 0.17825 val_loss= 13.06805 val_acc= 0.36766 time= 11.53591
Epoch: 0002 train_loss= 4.02245 train_acc= 0.17789 val_loss= 13.89180 val_acc= 0.39706 time= 10.83106
Epoch: 0003 train_loss= 3.63020 train_acc= 0.18312 val_loss= 13.53844 val_acc= 0.39259 time= 10.80459
Epoch: 0004 train_loss= 3.22804 train_acc= 0.19120 val_loss= 12.41184 val_acc= 0.37676 time= 10.84739
Epoch: 0005 train_loss= 2.92034 train_acc= 0.19462 val_loss= 11.27649 val_acc= 0.31688 time= 11.12530
Epoch: 0006 train_loss= 3.07298 train_acc= 0.20087 val_loss= 10.60520 val_acc= 0.29701 time= 10.92273
Epoch: 0007 train_loss= 2.79790 train_acc= 0.20654 val_loss= 10.31294 val_acc= 0.29566 time= 10.97037
Epoch: 0008 train_loss= 2.83820 train_acc= 0.20877 val_loss= 9.72185 val_acc= 0.28333 time= 10.87753
Epoch: 0009 train_loss= 2.41827 train_acc= 0.21014 val_loss= 9.45780 val_acc= 0.28092 time= 11.40712
Epoch: 0010 train_loss= 2.33026 train_acc= 0.21302 val_loss= 9.47966 val_acc= 0.2772

In [9]:
# Testing
test_cost, test_acc, test_duration = evaluate(features, support, test_labels_mat, test_mask_mat[:,1], placeholders, sess, model)
print("Test set results:", "cost=", "{:.5f}".format(test_cost),
      "accuracy=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(test_duration))

Test set results: cost= 1.60940 accuracy= 0.31509 time= 6.00551
