In [1]:
import torch
import numpy as np
import math
import time
import argparse
import scipy.sparse as sp
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """

    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj):
        support = torch.mm(input, self.weight)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'

In [3]:
class GCN(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout):
        super(GCN, self).__init__()

        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.gc2(x, adj)
        return F.log_softmax(x, dim=1)

In [4]:
def encode_onehot(labels):
    classes = set(labels)
    classes_dict = {c: np.identity(len(classes))[i, :] for i, c in
                    enumerate(classes)}
    labels_onehot = np.array(list(map(classes_dict.get, labels)),
                             dtype=np.int32)
    return labels_onehot

def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx


def accuracy(output, labels):
    preds = output.max(1)[1].type_as(labels)
    correct = preds.eq(labels).double()
    correct = correct.sum()
    return correct / len(labels)


def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

In [5]:
def load_data(path="./data/cora/", dataset="cora"):
    """Load citation network dataset (cora only for now)"""
    print('Loading {} dataset...'.format(dataset))

    idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset),
                                        dtype=np.dtype(str))
    features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)
    labels = encode_onehot(idx_features_labels[:, -1])

    # build graph
    idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
    idx_map = {j: i for i, j in enumerate(idx)}
    edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset),
                                    dtype=np.int32)
    edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
                     dtype=np.int32).reshape(edges_unordered.shape)
    adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                        shape=(labels.shape[0], labels.shape[0]),
                        dtype=np.float32)

    # build symmetric adjacency matrix
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

    features = normalize(features)
    adj = normalize(adj + sp.eye(adj.shape[0]))

    idx_train = range(140)
    idx_val = range(200, 500)
    idx_test = range(500, 1500)

    features = torch.FloatTensor(np.array(features.todense()))
    labels = torch.LongTensor(np.where(labels)[1])
    adj = sparse_mx_to_torch_sparse_tensor(adj)

    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)

    return adj, features, labels, idx_train, idx_val, idx_test

In [6]:
# Load data
adj, features, labels, idx_train, idx_val, idx_test = load_data()

Loading cora dataset...


In [7]:
seed = 42
epochs = 200
weightDecay = 5e-4
learningRate = 0.01
hidden = 16
dropout = 0.5

model = GCN(nfeat=features.shape[1],
            nhid=hidden,
            nclass=labels.max().item() + 1,
            dropout=dropout)
optimizer = optim.Adam(model.parameters(),
                       lr=learningRate, weight_decay=weightDecay)

In [8]:
def train(epoch):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    output = model(features, adj)
    loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    acc_train = accuracy(output[idx_train], labels[idx_train])
    loss_train.backward()
    optimizer.step()

    # Evaluate validation set performance separately,
    # deactivates dropout during validation run.
    model.eval()
    output = model(features, adj)

    loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    acc_val = accuracy(output[idx_val], labels[idx_val])
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'acc_val: {:.4f}'.format(acc_val.item()),
          'time: {:.4f}s'.format(time.time() - t))

In [9]:
def test():
    model.eval()
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:",
          "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))

In [10]:
# Train model
t_total = time.time()
for epoch in range(epochs):
    train(epoch)
print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

Epoch: 0001 loss_train: 1.9979 acc_train: 0.0857 loss_val: 1.9591 acc_val: 0.1033 time: 0.0179s
Epoch: 0002 loss_train: 1.9739 acc_train: 0.0857 loss_val: 1.9368 acc_val: 0.1033 time: 0.0140s
Epoch: 0003 loss_train: 1.9501 acc_train: 0.1643 loss_val: 1.9152 acc_val: 0.3800 time: 0.0100s
Epoch: 0004 loss_train: 1.9329 acc_train: 0.1929 loss_val: 1.8946 acc_val: 0.3500 time: 0.0100s
Epoch: 0005 loss_train: 1.9098 acc_train: 0.3000 loss_val: 1.8750 acc_val: 0.3500 time: 0.0110s
Epoch: 0006 loss_train: 1.8829 acc_train: 0.2429 loss_val: 1.8567 acc_val: 0.3500 time: 0.0100s
Epoch: 0007 loss_train: 1.8703 acc_train: 0.2643 loss_val: 1.8399 acc_val: 0.3500 time: 0.0110s
Epoch: 0008 loss_train: 1.8521 acc_train: 0.2500 loss_val: 1.8249 acc_val: 0.3500 time: 0.0110s
Epoch: 0009 loss_train: 1.8140 acc_train: 0.3071 loss_val: 1.8113 acc_val: 0.3500 time: 0.0100s
Epoch: 0010 loss_train: 1.8152 acc_train: 0.2857 loss_val: 1.7992 acc_val: 0.3500 time: 0.0110s
Epoch: 0011 loss_train: 1.7981 acc_train

Epoch: 0088 loss_train: 0.8743 acc_train: 0.8214 loss_val: 1.0472 acc_val: 0.7700 time: 0.0120s
Epoch: 0089 loss_train: 0.8639 acc_train: 0.8357 loss_val: 1.0395 acc_val: 0.7733 time: 0.0120s
Epoch: 0090 loss_train: 0.8431 acc_train: 0.8000 loss_val: 1.0328 acc_val: 0.7767 time: 0.0100s
Epoch: 0091 loss_train: 0.7865 acc_train: 0.8429 loss_val: 1.0262 acc_val: 0.7833 time: 0.0120s
Epoch: 0092 loss_train: 0.8160 acc_train: 0.8357 loss_val: 1.0192 acc_val: 0.7900 time: 0.0120s
Epoch: 0093 loss_train: 0.8104 acc_train: 0.8571 loss_val: 1.0114 acc_val: 0.7900 time: 0.0110s
Epoch: 0094 loss_train: 0.8142 acc_train: 0.8500 loss_val: 1.0042 acc_val: 0.7967 time: 0.0100s
Epoch: 0095 loss_train: 0.7982 acc_train: 0.8286 loss_val: 0.9957 acc_val: 0.7967 time: 0.0110s
Epoch: 0096 loss_train: 0.8197 acc_train: 0.8500 loss_val: 0.9881 acc_val: 0.8033 time: 0.0110s
Epoch: 0097 loss_train: 0.8027 acc_train: 0.8571 loss_val: 0.9809 acc_val: 0.8000 time: 0.0100s
Epoch: 0098 loss_train: 0.7933 acc_train

Epoch: 0176 loss_train: 0.5099 acc_train: 0.9143 loss_val: 0.7345 acc_val: 0.8033 time: 0.0110s
Epoch: 0177 loss_train: 0.5396 acc_train: 0.8929 loss_val: 0.7323 acc_val: 0.8067 time: 0.0110s
Epoch: 0178 loss_train: 0.4887 acc_train: 0.9571 loss_val: 0.7303 acc_val: 0.8067 time: 0.0120s
Epoch: 0179 loss_train: 0.4548 acc_train: 0.9429 loss_val: 0.7282 acc_val: 0.8033 time: 0.0120s
Epoch: 0180 loss_train: 0.4537 acc_train: 0.9500 loss_val: 0.7269 acc_val: 0.8067 time: 0.0120s
Epoch: 0181 loss_train: 0.4569 acc_train: 0.9571 loss_val: 0.7258 acc_val: 0.8067 time: 0.0120s
Epoch: 0182 loss_train: 0.4692 acc_train: 0.9429 loss_val: 0.7252 acc_val: 0.8100 time: 0.0130s
Epoch: 0183 loss_train: 0.4522 acc_train: 0.9286 loss_val: 0.7248 acc_val: 0.8033 time: 0.0120s
Epoch: 0184 loss_train: 0.4359 acc_train: 0.9500 loss_val: 0.7248 acc_val: 0.8067 time: 0.0110s
Epoch: 0185 loss_train: 0.4356 acc_train: 0.9429 loss_val: 0.7240 acc_val: 0.8067 time: 0.0100s
Epoch: 0186 loss_train: 0.4148 acc_train

In [11]:
# Testing
test()

Test set results: loss= 0.7389 accuracy= 0.8290
