In [1]:
#%%
! pip install dgl-cu101 ogb
ROOT = "/kaggle/input/dgl-ogbnarxiv/"

Collecting dgl-cu101
  Downloading dgl_cu101-0.5.2-cp37-cp37m-manylinux1_x86_64.whl (25.5 MB)
[K     |████████████████████████████████| 25.5 MB 9.2 MB/s 
[?25hCollecting ogb
  Downloading ogb-1.2.3-py3-none-any.whl (55 kB)
[K     |████████████████████████████████| 55 kB 2.1 MB/s 
Collecting outdated>=0.2.0
  Downloading outdated-0.2.0.tar.gz (4.0 kB)
Collecting littleutils
  Downloading littleutils-0.2.2.tar.gz (6.6 kB)
Building wheels for collected packages: outdated, littleutils
  Building wheel for outdated (setup.py) ... [?25l- \ done
[?25h  Created wheel for outdated: filename=outdated-0.2.0-py3-none-any.whl size=4960 sha256=5057849d45265e39a0635afd13ca3225c61c07da1e28c07506923126bea1ea21
  Stored in directory: /root/.cache/pip/wheels/6f/cd/a2/e49170b2cf59e88b952f3414f25a54d9f16f033bded4aaab26
  Building wheel for littleutils (setup.py) ... [?25l- done
[?25h  Created wheel for littleutils: filename=littleutils-0.2.2-py3-none-any.whl size=7048 sha256=965

In [2]:
from dgl.data.utils import load_graphs
import numpy as np
import random
import os
import math
import time
import torch
import torch as th
import torch.nn as nn
import torch.nn.functional as F
import dgl
import dgl.function as fn
from dgl import DGLGraph
from dgl.nn import GraphConv, SAGEConv
from ogb.nodeproppred import Evaluator
from torch.optim import AdamW
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import ReduceLROnPlateau

x,_ = load_graphs(ROOT + "data.bin")
graph = x[0]

train_idx = torch.load(ROOT + 'train_idx.pt')
test_idx = torch.load(ROOT + 'test_idx.pt')
val_idx = torch.load(ROOT + 'val_idx.pt')
labels = torch.load(ROOT + 'labels.pt')

splitted_idx = {'train':train_idx, 'test':test_idx, 'valid':val_idx}

device = torch.device('cuda')

DGL backend not selected or invalid.  Assuming PyTorch for now.
Using backend: pytorch


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


In [3]:
in_feats = graph.ndata["feat"].shape[1]
n_classes = (labels.max() + 1).item()
print (in_feats, n_classes)

128 40


In [4]:
def set_random_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
    print ('random seed set to be ' + str(seed))
    
def cross_entropy(x, labels):
    x = x.to(device)
    labels = labels.to(device)
    y = F.cross_entropy(x, labels[:, 0], reduction="none")
    y = th.log(0.5 + y) - math.log(0.5)
    return th.mean(y)

def compute_acc(pred, labels, evaluator):
    return evaluator.eval({"y_pred": pred.argmax(dim=-1, keepdim=True), "y_true": labels})["acc"]

def add_labels(feat, labels, idx):
    onehot = th.zeros([feat.shape[0], n_classes]).to(device)
    onehot[idx, labels[idx, 0]] = 1
    return th.cat([feat, onehot], dim=-1)

def train(model, graph, labels, train_idx, optimizer):
    model.train()
    feat = graph.ndata["feat"]
    mask_rate = 0.5
    mask = th.rand(train_idx.shape) < mask_rate
    train_labels_idx = train_idx[mask]
    train_pred_idx = train_idx[~mask]
    feat = add_labels(feat, labels, train_labels_idx)
    optimizer.zero_grad()
    pred = model(graph, feat)
    loss = cross_entropy(pred[train_pred_idx], labels[train_pred_idx])
    loss.backward()
    optimizer.step()
    return loss, pred

@th.no_grad()
def evaluate(model, graph, labels, train_idx, val_idx, test_idx, evaluator):
    model.eval()
    feat = graph.ndata["feat"]
    feat = add_labels(feat, labels, train_idx)
    pred = model(graph, feat)
    train_loss = cross_entropy(pred[train_idx], labels[train_idx])
    val_loss = cross_entropy(pred[val_idx], labels[val_idx])
    test_loss = cross_entropy(pred[test_idx], labels[test_idx])

    return (
        compute_acc(pred[train_idx], labels[train_idx], evaluator),
        compute_acc(pred[val_idx], labels[val_idx], evaluator),
        compute_acc(pred[test_idx], labels[test_idx], evaluator),
        train_loss,
        val_loss,
        test_loss,
    )

In [5]:
set_random_seed(0)

class Block(nn.Module):
    def __init__(self,in_feats,out_feats,num_channels=8):
        super(Block, self).__init__()
        self.gc = GraphConv(in_feats, out_feats)
        
    def forward(self, g, node_state):
        node_states = self.gc(g, node_state)
        node_states = F.relu(node_states)
        node_states = F.dropout(node_states, p=0.5, training=self.training)
        return node_states

class Net(nn.Module):
    def __init__(self, in_feats, n_hidden, out_feats,num_paths=2):
        super(Net, self).__init__()
        self.num_paths = num_paths
        self.init = Block(in_feats, n_hidden)
        self.lyrs_0 = nn.ModuleList()
        self.lyrs_1 = nn.ModuleList()
        for _ in range(5):
            self.lyrs_0.append(Block(n_hidden, n_hidden))

        #self.lyrs_1.append(Block(n_hidden, n_hidden//2))
        #self.lyrs_1.append(Block(n_hidden//2, n_hidden))
        self.lin = nn.Linear(n_hidden, out_feats)
        
    def forward(self, g, feat):
        node_state = feat
        out = self.init(g, node_state)
        for i in range(5):
            _out = out
            out = self.lyrs_0[i](g, out)
            out += _out
        
        #out +=  _out
        #for i in range(2,5):
        #    out = self.lyrs_0[i](g, out)
        
        #for lyr in self.lyrs_1:
        #    _out = lyr(g, _out)
            
        #out = torch.stack([out, _out],dim=1).sum(1)
        
        out = self.lin(out)
        return out

random seed set to be 0


In [6]:
warmup_epochs = 10
num_epochs = 1000
patience = 250
log_every = 10
lr = 1e-2
weight_decay = 0

model = Net(in_feats=128 + n_classes, n_hidden=256, out_feats=40).to(device)
graph = graph.to(device)

optimizer = AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
min_lr = 1e-3
scheduler = ReduceLROnPlateau(optimizer, 'max', factor=0.7, patience=150,  verbose=True, cooldown=0, min_lr=min_lr)
print ('scheduler min_lr', min_lr)
criterion = nn.BCEWithLogitsLoss()
evaluator = Evaluator('ogbn-arxiv')


scheduler min_lr 0.001


In [7]:
dur = []
best_score = 0.
best_epoch = 0
num_patient_epochs = 0
model_folder = './saved_models/'
model_path = model_folder + "model.pt"
log_path = "log.txt"

def printw(line):
    with open(log_path,'a') as f:
        f.write(line+"\n")
    print(line)

if not os.path.exists(model_folder):
    os.makedirs(model_folder)
    
print("Warming up for {:d} epochs...".format(warmup_epochs))
t0 = time.time()
for _ in range(warmup_epochs):
    loss, pred = train(model, graph, labels, train_idx, optimizer)
    # optimizer.step()
    scheduler.step(loss)
dur = time.time() - t0 
print("Done in {:.2f} sec".format(dur))

for epoch in range(1, num_epochs + 1):
    t0 = time.time()
    loss, pred = train(model, graph, labels, train_idx, optimizer)
    acc = compute_acc(pred[train_idx], labels[train_idx], evaluator)
    train_acc, val_acc, test_acc, train_loss, val_loss, test_loss = evaluate(model, graph, labels, train_idx, val_idx, test_idx, evaluator)
    # optimizer.step()
    # scheduler.step(test_acc)
    dur = time.time() - t0   

    # Early stop
    if test_acc > best_score:
        torch.save(model.state_dict(), model_path)
        best_score = test_acc
        best_epoch = epoch
        num_patient_epochs = 0
        scheduler.step(test_acc)
    else:
        num_patient_epochs += 1
        scheduler.step(val_acc)

    if (epoch%log_every) == 0:
        printw('Epoch {:d}\t loss {:.4f}\t train_acc {:.4f}\t val_acc {:.4f}\t test_acc {:.4f}\t dur {:.2f}s\t patience {:d}'.format(epoch, loss, train_acc, val_acc,test_acc, dur, num_patient_epochs))

    if num_patient_epochs == patience:
        break

Warming up for 10 epochs...
Done in 3.50 sec
Epoch 10	 loss 1.6625	 train_acc 0.3318	 val_acc 0.3484	 test_acc 0.3373	 dur 0.41s	 patience 0
Epoch 20	 loss 1.4571	 train_acc 0.4264	 val_acc 0.4214	 test_acc 0.4261	 dur 0.41s	 patience 0
Epoch 30	 loss 1.3347	 train_acc 0.4543	 val_acc 0.4707	 test_acc 0.4785	 dur 0.41s	 patience 0
Epoch 40	 loss 1.2196	 train_acc 0.5359	 val_acc 0.5382	 test_acc 0.5345	 dur 0.41s	 patience 1
Epoch 50	 loss 1.1301	 train_acc 0.6093	 val_acc 0.5925	 test_acc 0.5959	 dur 0.41s	 patience 0
Epoch 60	 loss 1.0345	 train_acc 0.6634	 val_acc 0.6219	 test_acc 0.6116	 dur 0.41s	 patience 1
Epoch 70	 loss 0.9553	 train_acc 0.7072	 val_acc 0.6550	 test_acc 0.6441	 dur 0.41s	 patience 0
Epoch 80	 loss 0.8951	 train_acc 0.7260	 val_acc 0.6706	 test_acc 0.6592	 dur 0.41s	 patience 3
Epoch 90	 loss 0.8582	 train_acc 0.7427	 val_acc 0.6824	 test_acc 0.6726	 dur 0.41s	 patience 0
Epoch 100	 loss 0.8382	 train_acc 0.7544	 val_acc 0.6835	 test_acc 0.6716	 dur 0.41s	 patie

In [8]:
printw("Best test score: " +  str(best_score))
printw("at"+ str(best_epoch))

Best test score: 0.7141740221797008
at704
