<h2> Graph Convolutional Networks for Semi Supervised Node Classification</h2>

This code extends https://github.com/tkipf/gcn to visualize, in real time how the karate club node embeddings change during semi-supervised training, when only one point from each community is labelled.

references: <br>
https://en.wikipedia.org/wiki/Zachary%27s_karate_club<br>
https://arxiv.org/abs/1609.02907<br>
http://tkipf.github.io/graph-convolutional-networks/<br>

In [1]:
from __future__ import division
from __future__ import print_function

import time
import argparse
import numpy as np

import matplotlib.pyplot as plt

from sklearn.manifold import TSNE


import torch
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module

from utils import normalize, sparse_mx_to_torch_sparse_tensor, accuracy
from scipy.sparse import csr_matrix, coo_matrix, diags,eye

In [2]:
class graph_convolution_layer(Module):

    def __init__(self, in_features, out_features):
        super(graph_convolution_layer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        self.bias = Parameter(torch.FloatTensor(out_features))
        self.weight.data.uniform_(-1, 1)
        self.bias.data.uniform_(-1, 1)


    def forward(self, input, adj):
        support = torch.mm(input, self.weight)
        output = torch.spmm(adj, support)
        return output + self.bias

class GCN(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout):
        super(GCN, self).__init__()

        self.gc1 = graph_convolution_layer(nfeat, nhid)
        self.gc2 = graph_convolution_layer(nhid, nclass)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.sigmoid(self.gc1(x, adj))
        embedding = x
        x = F.dropout(x, self.dropout, training=self.training)
       
        x = self.gc2(x, adj)
        return embedding, F.log_softmax(x, dim=1)


In [11]:
seed = 42
epochs = 500
lr = 0.01
weight_decay = 5e-4
hidden = 2 # A hidden size of 2! -> this is the x and y axis for the node embedding visualization
dropout = 0.5

np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x195608e2070>

In [5]:
"""Load karate club dataset"""
print('Loading karate club dataset...')

path="data/karate_club/"

edges = np.loadtxt("{}edges.txt".format(path), dtype=np.int32) - 1  # 0-based indexing
features = eye(np.max(edges+1), dtype=np.float32).tocsr()
idx_labels = np.loadtxt("{}mod-based-clusters.txt".format(path), dtype=np.int32)
idx_labels = idx_labels[idx_labels[:, 0].argsort()]

labels = idx_labels[:, 1]
classes = np.unique(labels)
one_hot_classes = dict(zip(classes, np.squeeze(np.eye(len(classes))[list(range(len(classes)))])))
one_hot_labels = np.array([one_hot_classes[i] for i in labels])

adj = coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                    shape=(labels.shape[0], labels.shape[0]), dtype=np.float32)

# build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

adj = normalize(adj + eye(adj.shape[0]))




print('Dataset has {} nodes, {} edges, {} features, {} classes'.format(adj.shape[0], edges.shape[0], features.shape[1],5))


idx_train = [0, 2, 4, 8] # Labelling one point from each community/class!
idx_val = range(0, len(labels))
idx_test = range(0, len(labels))

features = torch.FloatTensor(np.array(features.todense()))
labels = torch.LongTensor(np.where(one_hot_labels)[1])
adj = sparse_mx_to_torch_sparse_tensor(adj)

idx_train = torch.LongTensor(idx_train)
idx_val = torch.LongTensor(idx_val)
idx_test = torch.LongTensor(idx_test)


Loading karate club dataset...
Dataset has 34 nodes, 78 edges, 34 features, 5 classes


In [6]:

# Model and optimizer
model = GCN(nfeat=features.shape[1], nhid=hidden, nclass=labels.max().item() + 1, dropout=dropout)
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

print (model)

GCN(
  (gc1): graph_convolution_layer()
  (gc2): graph_convolution_layer()
)


In [10]:

def save_plot(embedding, edges, labels, epoch):
    X = np.array(embedding.detach().numpy())
    X_tsne = X
    
    labelled =  X_tsne[[0,2,4,8]] 



    fig = plt.gcf()
    fig.set_size_inches(20, 20)
    
    axes = plt.gca()


    color_map = {0:'red', 1:'green', 2:'blue', 3:'magenta', 4:'black', 5:'orange', 6:'pink'}
    colors = [color_map[i] for i in labels.numpy()]

    def connectpoints(x,y,p1,p2):
        x1, x2 = x[p1], x[p2]
        y1, y2 = y[p1], y[p2]
        plt.plot([x1,x2],[y1,y2],'k-',alpha=0.2)

    for edge in edges:
        connectpoints(X_tsne.T[0], X_tsne.T[1], edge[0], edge[1])



    plt.scatter(X_tsne.T[0], X_tsne.T[1], c = colors, alpha=0.5, s=200)
    plt.scatter(labelled.T[0], labelled.T[1], c = 'grey', alpha=0.5, s=500)
    plt.savefig('karate_club_figures/' + str(epoch) + '.jpg') # A karate_club_figures folder must be present for this to work ;)
    plt.clf()



def test():
    model.eval()
    embedding, output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:",
          "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))



t_total = time.time()
for epoch in range(epochs):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    embedding, output = model(features, adj)
    
    save_plot(embedding, edges, labels, epoch)
    
    loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    acc_train = accuracy(output[idx_train], labels[idx_train])
    loss_train.backward()
    optimizer.step()


    model.eval()
    embedding, output = model(features, adj)

    loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    acc_val = accuracy(output[idx_val], labels[idx_val])
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'time: {:.4f}s'.format(time.time() - t))

print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Testing
test()


Epoch: 0001 loss_train: 1.4594 time: 0.5096s
Epoch: 0002 loss_train: 1.2891 time: 0.5947s
Epoch: 0003 loss_train: 1.5205 time: 0.6117s
Epoch: 0004 loss_train: 1.3924 time: 0.6113s
Epoch: 0005 loss_train: 1.2763 time: 0.8026s
Epoch: 0006 loss_train: 1.4293 time: 0.9054s
Epoch: 0007 loss_train: 1.3136 time: 1.0889s
Epoch: 0008 loss_train: 1.4225 time: 0.9347s
Epoch: 0009 loss_train: 1.2076 time: 0.8147s
Epoch: 0010 loss_train: 1.4864 time: 0.6499s
Epoch: 0011 loss_train: 1.5138 time: 0.6641s
Epoch: 0012 loss_train: 1.3796 time: 0.6598s
Epoch: 0013 loss_train: 1.4106 time: 0.5790s
Epoch: 0014 loss_train: 1.3457 time: 0.5949s
Epoch: 0015 loss_train: 1.3062 time: 0.5685s
Epoch: 0016 loss_train: 1.3834 time: 0.6647s
Epoch: 0017 loss_train: 1.4501 time: 0.5920s
Epoch: 0018 loss_train: 1.4153 time: 0.5810s
Epoch: 0019 loss_train: 1.3356 time: 0.6430s
Epoch: 0020 loss_train: 1.5039 time: 0.6046s
Epoch: 0021 loss_train: 1.2610 time: 0.5968s
Epoch: 0022 loss_train: 1.3833 time: 0.6064s
Epoch: 002

Epoch: 0183 loss_train: 1.1412 time: 0.5581s
Epoch: 0184 loss_train: 0.8979 time: 0.6247s
Epoch: 0185 loss_train: 1.0906 time: 0.5965s
Epoch: 0186 loss_train: 1.4828 time: 0.6085s
Epoch: 0187 loss_train: 0.9550 time: 0.6707s
Epoch: 0188 loss_train: 1.1982 time: 0.7617s
Epoch: 0189 loss_train: 0.9586 time: 0.5812s
Epoch: 0190 loss_train: 1.1373 time: 0.6485s
Epoch: 0191 loss_train: 1.1099 time: 0.5997s
Epoch: 0192 loss_train: 0.9083 time: 0.5928s
Epoch: 0193 loss_train: 1.2370 time: 0.6263s
Epoch: 0194 loss_train: 0.8694 time: 0.5872s
Epoch: 0195 loss_train: 1.0120 time: 0.6448s
Epoch: 0196 loss_train: 1.1935 time: 0.5893s
Epoch: 0197 loss_train: 0.9584 time: 0.7965s
Epoch: 0198 loss_train: 1.0655 time: 0.6546s
Epoch: 0199 loss_train: 0.8778 time: 0.6708s
Epoch: 0200 loss_train: 0.9405 time: 0.6019s
Epoch: 0201 loss_train: 0.8740 time: 0.6602s
Epoch: 0202 loss_train: 1.1591 time: 0.6778s
Epoch: 0203 loss_train: 0.9281 time: 0.5739s
Epoch: 0204 loss_train: 1.0835 time: 0.5975s
Epoch: 020

Epoch: 0365 loss_train: 0.8311 time: 0.8546s
Epoch: 0366 loss_train: 0.7179 time: 0.6802s
Epoch: 0367 loss_train: 1.4436 time: 0.8814s
Epoch: 0368 loss_train: 1.0275 time: 0.6056s
Epoch: 0369 loss_train: 0.8757 time: 0.9889s
Epoch: 0370 loss_train: 0.9085 time: 0.7889s
Epoch: 0371 loss_train: 1.1653 time: 0.9158s
Epoch: 0372 loss_train: 0.9626 time: 0.6580s
Epoch: 0373 loss_train: 0.9525 time: 0.5892s
Epoch: 0374 loss_train: 0.7999 time: 0.6812s
Epoch: 0375 loss_train: 0.6928 time: 0.8780s
Epoch: 0376 loss_train: 0.9036 time: 0.9413s
Epoch: 0377 loss_train: 0.8122 time: 0.7388s
Epoch: 0378 loss_train: 1.1653 time: 0.7100s
Epoch: 0379 loss_train: 0.7949 time: 0.9110s
Epoch: 0380 loss_train: 1.3154 time: 0.7129s
Epoch: 0381 loss_train: 1.0556 time: 0.6323s
Epoch: 0382 loss_train: 0.7833 time: 0.5671s
Epoch: 0383 loss_train: 0.7647 time: 0.5942s
Epoch: 0384 loss_train: 0.6806 time: 0.5676s
Epoch: 0385 loss_train: 0.6458 time: 0.6428s
Epoch: 0386 loss_train: 0.6788 time: 0.5362s
Epoch: 038

<Figure size 1440x1440 with 0 Axes>