In [1]:
import torch
import torch.nn as nn
from dgl.nn.pytorch import GraphConv

import argparse
import time
import numpy as np
import torch.nn.functional as F
import dgl
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset

Using backend: pytorch


In [2]:
class GCN(nn.Module):
    def __init__(self,
                 g,
                 in_feats,
                 n_hidden,
                 n_classes,
                 n_layers,
                 activation,
                 dropout):
        super(GCN, self).__init__()
        self.g = g
        self.layers = nn.ModuleList()
        # input layer
        self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
        # hidden layers
        for i in range(n_layers - 1):
            self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
        # output layer
        self.layers.append(GraphConv(n_hidden, n_classes))
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, features):
        h = features
        for i, layer in enumerate(self.layers):
            if i != 0:
                h = self.dropout(h)
            h = layer(self.g, h)
        return h

In [3]:
def evaluate(model, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)


def main(args):
    # load and preprocess dataset
    if args['dataset'] == 'cora':
        data = CoraGraphDataset()
    elif args['dataset'] == 'citeseer':
        data = CiteseerGraphDataset()
    elif args['dataset'] == 'pubmed':
        data = PubmedGraphDataset()
    else:
        raise ValueError('Unknown dataset: {}'.format(args['dataset']))
    
    g = data[0]
    if args['gpu'] < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(args['gpu'])
    
    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    in_feats = features.shape[1]
    n_classes = data.num_classes
    n_edges = data[0].number_of_edges()
    
    # add self loop
    if args['self_loop']:
        g = dgl.remove_self_loop(g)
        g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()

    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    # create GCN model
    model = GCN(g,
                in_feats,
                args['n_hidden'],
                n_classes,
                args['n_layers'],
                F.relu,
                args['dropout'])

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    # initialize graph
    dur = []
    for epoch in range(args['n_epochs']):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, features, labels, val_mask)

    acc = evaluate(model, features, labels, test_mask)
    print("Test accuracy {:.2%}".format(acc))

In [4]:
if __name__ == '__main__':
    SEED = 49
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED) 
    args = {}
    args['dataset'] = 'cora'
    args['dropout'] = 0.5
    args['gpu'] = 0
    args['lr'] = 1e-1
    args['n_epochs'] = 200
    args['n_hidden'] = 32
    args['n_layers'] = 1
    args['weight_decay'] = 5e-4
    args['self_loop'] = False

    main(args)

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Test accuracy 81.00%


In [5]:
if __name__ == '__main__':
    SEED = 26
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED) 
    args = {}
    args['dataset'] = 'citeseer'
    args['dropout'] = 0.5
    args['gpu'] = 0
    args['lr'] = 1e-1
    args['n_epochs'] = 200
    args['n_hidden'] = 32
    args['n_layers'] = 1
    args['weight_decay'] = 5e-4
    args['self_loop'] = False

    main(args)

  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Test accuracy 70.00%


In [6]:
if __name__ == '__main__':
    SEED = 2
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED) 
    args = {}
    args['dataset'] = 'pubmed'
    args['dropout'] = 0.5
    args['gpu'] = 0
    args['lr'] = 1e-1
    args['n_epochs'] = 200
    args['n_hidden'] = 32
    args['n_layers'] = 1
    args['weight_decay'] = 5e-4
    args['self_loop'] = False

    main(args)

  NumNodes: 19717
  NumEdges: 88651
  NumFeats: 500
  NumClasses: 3
  NumTrainingSamples: 60
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Test accuracy 78.00%
