In [10]:
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from utils import *
from models import GCN, MLP

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)



In [11]:
# get paths to available grahs metadata
adj_mat, features_mat, train_labels_mat, test_labels_mat, val_labels_mat, train_mask_mat, test_mask_mat, val_mask_mat = load_data("txt_graph2216_21012020", 80)

In [12]:
print(adj_mat.shape)
print(features_mat.shape)
print(train_labels_mat.shape)
print(test_labels_mat.shape)
print(val_labels_mat.shape)
print(train_mask_mat.shape)
print(test_mask_mat.shape)
print(val_mask_mat.shape)

(16542, 16542)
(16542, 100)
(16542, 5)
(16542, 5)
(16542, 5)
(16542, 5)
(16542, 5)
(16542, 5)


In [13]:
#delete all flags before declaration new one
del_all_flags(tf.flags.FLAGS)

# Settings
flags = tf.app.flags
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('mode', '', 'kernel') # for line by line mode
tf.app.flags.DEFINE_string('port', '', 'kernel') # for line by line mode
tf.app.flags.DEFINE_string('f', '', 'kernel') # for jupyter notebook

flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.5, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 50, 'Tolerance for early stopping (# of epochs).')

# Some preprocessing
features = preprocess_features(features_mat)
support = [preprocess_adj(adj_mat)]
num_supports = 1
model_func = GCN


In [14]:
# Define placeholders
placeholders = {
    'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
    'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)),
    'labels': tf.placeholder(tf.float32, shape=(None, train_labels_mat.shape[1])),
    'labels_mask': tf.placeholder(tf.int32),
    'dropout': tf.placeholder_with_default(0., shape=()),
    'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
}

# Create model
model = model_func(placeholders, input_dim=features[2][1], logging=True)

In [15]:
# Initialize session
sess = tf.Session()
# Init variables
sess.run(tf.global_variables_initializer())

In [16]:
model

<models.GCN at 0x7f3c004a9e48>

In [19]:
# Train model
cost_val = []

for epoch in range(FLAGS.epochs):

    t = time.time()
    # Construct feed dictionary
    feed_dict = construct_feed_dict(features, support, train_labels_mat, train_mask_mat[:,1], placeholders)
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})

    # Training step
    outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)

    # Validation
    cost, acc, duration = evaluate(features, support, val_mask_mat, val_mask_mat[:,1], placeholders, sess, model)
    cost_val.append(cost)

    # Print results
    print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
          "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
          "val_acc=", "{:.5f}".format(acc), "time=", "{:.5f}".format(time.time() - t))

    if epoch > FLAGS.early_stopping and cost_val[-1] > np.mean(cost_val[-(FLAGS.early_stopping+1):-1]):
        print("Early stopping...")
        break

print("Optimization Finished!")

Epoch: 0001 train_loss= 1.63588 train_acc= 0.27825 val_loss= 45.02081 val_acc= 0.16773 time= 0.32626
Epoch: 0002 train_loss= 1.63750 train_acc= 0.27575 val_loss= 44.60149 val_acc= 0.16942 time= 0.30999
Epoch: 0003 train_loss= 1.67574 train_acc= 0.27047 val_loss= 44.08991 val_acc= 0.17281 time= 0.31172
Epoch: 0004 train_loss= 1.63165 train_acc= 0.28333 val_loss= 43.70113 val_acc= 0.17535 time= 0.29996
Epoch: 0005 train_loss= 1.63680 train_acc= 0.26918 val_loss= 43.30085 val_acc= 0.17831 time= 0.31037
Epoch: 0006 train_loss= 1.65396 train_acc= 0.28121 val_loss= 42.99154 val_acc= 0.17704 time= 0.30349
Epoch: 0007 train_loss= 1.66311 train_acc= 0.27232 val_loss= 42.77729 val_acc= 0.17577 time= 0.30056
Epoch: 0008 train_loss= 1.63266 train_acc= 0.27001 val_loss= 42.83186 val_acc= 0.17493 time= 0.30015
Epoch: 0009 train_loss= 1.62589 train_acc= 0.28639 val_loss= 42.90338 val_acc= 0.17027 time= 0.30440
Epoch: 0010 train_loss= 1.64319 train_acc= 0.26372 val_loss= 43.20482 val_acc= 0.16222 time

In [21]:
# Testing
test_cost, test_acc, test_duration = evaluate(features, support, test_labels_mat, test_mask_mat[:,1], placeholders, sess, model)
print("Test set results:", "cost=", "{:.5f}".format(test_cost),
      "accuracy=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(test_duration))

Test set results: cost= 1.63567 accuracy= 0.10729 time= 0.18900
