In [1]:
import numpy as np
import networkx as nx
import time
import torch
import torch.nn.functional as F
import dgl
from dgl.data import register_data_args
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset

import torch.nn as nn
import dgl.function as fn
from dgl.nn import GATConv

Using backend: pytorch


In [2]:
class EarlyStopping:
    def __init__(self, patience=10):
        self.patience = patience
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def step(self, acc, model):
        score = acc
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(model)
        elif score < self.best_score:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(model)
            self.counter = 0
        return self.early_stop

    def save_checkpoint(self, model):
        '''Saves model when validation loss decrease.'''
        torch.save(model.state_dict(), 'es_checkpoint.pt')


In [3]:
class GAT(nn.Module):
    def __init__(self,
                 g,
                 num_layers,
                 in_dim,
                 num_hidden,
                 num_classes,
                 heads,
                 activation,
                 feat_drop,
                 attn_drop,
                 negative_slope,
                 residual):
        super(GAT, self).__init__()
        self.g = g
        self.num_layers = num_layers
        self.gat_layers = nn.ModuleList()
        self.activation = activation
        # input projection (no residual)
        self.gat_layers.append(GATConv(
            in_dim, num_hidden, heads[0],
            feat_drop, attn_drop, negative_slope, False, self.activation))
        # hidden layers
        for l in range(1, num_layers):
            # due to multi-head, the in_dim = num_hidden * num_heads
            self.gat_layers.append(GATConv(
                num_hidden * heads[l-1], num_hidden, heads[l],
                feat_drop, attn_drop, negative_slope, residual, self.activation))
        # output projection
        self.gat_layers.append(GATConv(
            num_hidden * heads[-2], num_classes, heads[-1],
            feat_drop, attn_drop, negative_slope, residual, None))

    def forward(self, inputs):
        h = inputs
        for l in range(self.num_layers):
            h = self.gat_layers[l](self.g, h).flatten(1)
        # output projection
        logits = self.gat_layers[-1](self.g, h).mean(1)
        return logits

In [4]:
def accuracy(logits, labels):
    _, indices = torch.max(logits, dim=1)
    correct = torch.sum(indices == labels)
    return correct.item() * 1.0 / len(labels)


def evaluate(model, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask]
        return accuracy(logits, labels)


def main(args):
    # load and preprocess dataset
    if args['dataset'] == 'cora':
        data = CoraGraphDataset()
    elif args['dataset'] == 'citeseer':
        data = CiteseerGraphDataset()
    elif args['dataset'] == 'pubmed':
        data = PubmedGraphDataset()
    else:
        raise ValueError('Unknown dataset: {}'.format(args['dataset']))

    g = data[0]
    if args['gpu'] < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(args['gpu'])

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    num_feats = features.shape[1]
    n_classes = data.num_classes
    n_edges = g.number_of_edges()

    # add self loop
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()
    # create model
    heads = ([args['num_heads']] * args['num_layers']) + [args['num_out_heads']]
    model = GAT(g,
                args['num_layers'],
                num_feats,
                args['num_hidden'],
                n_classes,
                heads,
                F.elu,
                args['in_drops'],
                args['attn_drops'],
                args['negative_slope'],
                args['residual'])

    if args['early_stop']:
        stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(
        model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])

    # initialize graph
    dur = []
    for epoch in range(args['epochs']):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        if args['fastmode']:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if args['early_stop']:
                if stopper.step(val_acc, model):
                    break

    if args['early_stop']:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))

In [5]:
if __name__ == '__main__':
    SEED = 22
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED) 
    args = {}
    args['gpu'] = 0
    args['epochs'] = 200
    args['num_heads'] = 8
    args['num_out_heads'] = 1
    args['num_layers'] = 1
    args['num_hidden'] = 8
    args['residual'] = False
    args['in_drops'] = 0.6
    args['attn_drops'] = 0.6
    args['lr'] = 5e-3
    args['weight_decay'] = 5e-4
    args['negative_slope'] = 0.2
    args['dataset'] = 'cora'
    args['early_stop'] = False
    args['fastmode'] = False

    main(args)

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Test Accuracy 0.8310


In [6]:
if __name__ == '__main__':
    SEED = 923
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED) 
    args = {}
    args['gpu'] = 0
    args['epochs'] = 200
    args['num_heads'] = 8
    args['num_out_heads'] = 1
    args['num_layers'] = 1
    args['num_hidden'] = 8
    args['residual'] = False
    args['in_drops'] = 0.6
    args['attn_drops'] = 0.6
    args['lr'] = 5e-3
    args['weight_decay'] = 5e-4
    args['negative_slope'] = 0.2
    args['dataset'] = 'citeseer'
    args['early_stop'] = False
    args['fastmode'] = False

    main(args)

  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Test Accuracy 0.7160


In [7]:
if __name__ == '__main__':
    SEED = 1 
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED) 
    args = {}
    args['gpu'] = 0
    args['epochs'] = 200
    args['num_heads'] = 8
    args['num_out_heads'] = 1
    args['num_layers'] = 1
    args['num_hidden'] = 8
    args['residual'] = False
    args['in_drops'] = 0.6
    args['attn_drops'] = 0.6
    args['lr'] = 5e-3
    args['weight_decay'] = 5e-4
    args['negative_slope'] = 0.2
    args['dataset'] = 'pubmed'
    args['early_stop'] = False
    args['fastmode'] = False

    main(args)

  NumNodes: 19717
  NumEdges: 88651
  NumFeats: 500
  NumClasses: 3
  NumTrainingSamples: 60
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Test Accuracy 0.7780
