In [1]:
import time
import argparse
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from math import ceil

import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.optim.lr_scheduler import StepLR

from utils import load_file, preprocessing, get_vocab, load_embeddings, create_gows, accuracy, generate_batches, AverageMeter
from models import MPAD

In [2]:
class dotdict(dict):
    """dot.notation access to dictionary attributes"""

    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

### Load Data

In [12]:
args = {}
args['path_to_dataset'] = '../datasets/subjectivity.txt'
args['path_to_embeddings'] = "../GoogleNews-vectors-negative300.bin"
args['no_cuda'] = False
args['epochs'] = 200
args['lr'] = 0.001
args['hidden'] = 64
args['penultimate'] = 64
args['message_passing_layers']=2
args['window_size'] = 2
args['directed'] = True
args['use_master_node'] = True
args['normalize'] = True
args['dropout'] = 0.5
args['batch_size'] = 128
args['patience'] = 20
args = dotdict(args)

In [9]:
# Read data
docs, class_labels = load_file(args.path_to_dataset)
docs = preprocessing(docs)

enc = LabelEncoder()
class_labels = enc.fit_transform(class_labels)

nclass = np.unique(class_labels).size
y = list()
for i in range(len(class_labels)):
    t = np.zeros(1)
    t[0] = class_labels[i]
    y.append(t)

In [None]:
vocab = get_vocab(docs)
embeddings = load_embeddings("../GoogleNews-vectors-negative300.bin", vocab)

In [13]:
adj, features, _ = create_gows(docs, vocab, args.window_size, args.directed, args.normalize, args.use_master_node)

In [22]:
print("ADJ: ",adj[0])
print("FEAT: ",features[0])

ADJ:    (0, 21)	1.0
  (1, 21)	0.5
  (1, 0)	0.5
  (2, 21)	0.5
  (2, 1)	0.5
  (3, 21)	0.5
  (3, 2)	0.5
  (4, 21)	0.5
  (4, 3)	0.5
  (5, 21)	0.3333333333333333
  (5, 10)	0.3333333333333333
  (5, 4)	0.3333333333333333
  (6, 21)	0.5
  (6, 5)	0.5
  (7, 21)	0.3333333333333333
  (7, 17)	0.3333333333333333
  (7, 6)	0.3333333333333333
  (8, 21)	0.5
  (8, 7)	0.5
  (9, 21)	0.5
  (9, 8)	0.5
  (10, 21)	0.5
  (10, 9)	0.5
  (11, 21)	0.5
  (11, 5)	0.5
  :	:
  (19, 21)	0.5
  (19, 18)	0.5
  (20, 21)	0.5
  (20, 19)	0.5
  (21, 20)	0.047619047619047616
  (21, 19)	0.047619047619047616
  (21, 18)	0.047619047619047616
  (21, 17)	0.047619047619047616
  (21, 16)	0.047619047619047616
  (21, 15)	0.047619047619047616
  (21, 14)	0.047619047619047616
  (21, 13)	0.047619047619047616
  (21, 12)	0.047619047619047616
  (21, 11)	0.047619047619047616
  (21, 10)	0.047619047619047616
  (21, 9)	0.047619047619047616
  (21, 8)	0.047619047619047616
  (21, 7)	0.047619047619047616
  (21, 6)	0.047619047619047616
  (21, 5)	0.0476190

"Adj" is a list of sparse tensors. "Features" is a list of np arrays where "features[0]" corresponds to a single graph, size = num nodes. 

In [31]:
kf = KFold(n_splits=2, shuffle=True)
it = 0
accs = list()
train_index, test_index =  kf.split(y)
train_index = train_index[0]
test_index = test_index[0]

In [32]:
idx = np.random.permutation(train_index)
train_index = idx[:int(idx.size*0.9)].tolist()
val_index = idx[int(idx.size*0.9):].tolist()

In [33]:
n_train = len(train_index)
n_val = len(val_index)
n_test = len(test_index)

print("TRAIN: ",n_train)
print("VAL: ",n_val)
print("TEST: ",n_test)

TRAIN:  4500
VAL:  500
TEST:  5000


In [34]:
adj_train = [adj[i] for i in train_index]
features_train = [features[i] for i in train_index]
y_train = [y[i] for i in train_index]

In [35]:
adj_val = [adj[i] for i in val_index]
features_val = [features[i] for i in val_index]
y_val = [y[i] for i in val_index]

In [36]:
adj_test = [adj[i] for i in test_index]
features_test = [features[i] for i in test_index]
y_test = [y[i] for i in test_index]

In [37]:
adj_train, features_train, batch_n_graphs_train, y_train = generate_batches(adj_train, features_train, y_train, args.batch_size, args.use_master_node)
adj_val, features_val, batch_n_graphs_val, y_val = generate_batches(adj_val, features_val, y_val, args.batch_size, args.use_master_node)
adj_test, features_test, batch_n_graphs_test, y_test = generate_batches(adj_test, features_test, y_test, args.batch_size, args.use_master_node)

In [41]:
adj_train[2]

tensor(indices=tensor([[   1,   44,    2,  ..., 5721, 5722, 5723],
                       [   0,    0,    1,  ..., 5759, 5759, 5759]]),
       values=tensor([0.3333, 0.0714, 0.3333,  ..., 0.5000, 0.5000, 0.5000]),
       size=(5760, 5760), nnz=7826, layout=torch.sparse_coo)

In [42]:
n_train_batches = ceil(n_train/args.batch_size)
n_val_batches = ceil(n_val/args.batch_size)
n_test_batches = ceil(n_test/args.batch_size)

In [43]:
# Model and optimizer
model = MPAD(embeddings.shape[1], args.message_passing_layers, args.hidden, args.penultimate, nclass, args.dropout, embeddings, args.use_master_node)

parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = optim.Adam(parameters, lr=args.lr)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

In [44]:
model

MPAD(
  (embedding): Embedding(21323, 300)
  (mps): ModuleList(
    (0): MessagePassing(
      (mlp1): MLP(
        (linears): ModuleList(
          (0): Linear(in_features=300, out_features=64, bias=True)
          (1): Linear(in_features=64, out_features=64, bias=True)
        )
        (batch_norms): ModuleList(
          (0): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (mlp2): MLP(
        (linears): ModuleList(
          (0): Linear(in_features=64, out_features=64, bias=True)
          (1): Linear(in_features=64, out_features=64, bias=True)
        )
        (batch_norms): ModuleList(
          (0): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (fc1_update): Linear(in_features=64, out_features=64, bias=True)
      (fc2_update): Linear(in_features=64, out_features=64, bias=True)
      (fc1_reset): Linear(in_features=64, out_features=64, bias=True)
      (fc2_reset):

In [45]:
if args.cuda:
    model.cuda()
    adj_train = [x.cuda() for x in adj_train]
    features_train = [x.cuda() for x in features_train]
    batch_n_graphs_train = [x.cuda() for x in batch_n_graphs_train]
    y_train = [x.cuda() for x in y_train]
    adj_val = [x.cuda() for x in adj_val]
    features_val = [x.cuda() for x in features_val]
    batch_n_graphs_val = [x.cuda() for x in batch_n_graphs_val]
    y_val = [x.cuda() for x in y_val]
    adj_test = [x.cuda() for x in adj_test]
    features_test = [x.cuda() for x in features_test]
    batch_n_graphs_test = [x.cuda() for x in batch_n_graphs_test]
    y_test = [x.cuda() for x in y_test]

In [46]:
def train(epoch, adj, features, batch_n_graphs, y):
    optimizer.zero_grad()
    output = model(features, adj, batch_n_graphs)
    loss_train = F.cross_entropy(output, y)
    loss_train.backward()
    optimizer.step()
    return output, loss_train

In [47]:
def test(adj, features, batch_n_graphs, y):
    output = model(features, adj, batch_n_graphs)
    loss_test = F.cross_entropy(output, y)
    return output, loss_test

In [48]:
best_acc = 0

for epoch in range(args.epochs):
    scheduler.step()

    start = time.time()
    model.train()
    train_loss = AverageMeter()
    train_acc = AverageMeter()

    # Train for one epoch
    for i in range(n_train_batches):
        output, loss = train(epoch, adj_train[i], features_train[i], batch_n_graphs_train[i], y_train[i])
        train_loss.update(loss.item(), output.size(0))
        train_acc.update(accuracy(output.data, y_train[i].data), output.size(0))

    # Evaluate on validation set
    model.eval()
    val_loss = AverageMeter()
    val_acc = AverageMeter()

    for i in range(n_val_batches):
        output, loss = test(adj_val[i], features_val[i], batch_n_graphs_val[i], y_val[i])
        val_loss.update(loss.item(), output.size(0))
        val_acc.update(accuracy(output.data, y_val[i].data), output.size(0))

    # Print results
    print("Cross-val iter:", '%02d' % it, "epoch:", '%03d' % (epoch + 1), "train_loss=", "{:.5f}".format(train_loss.avg),
        "train_acc=", "{:.5f}".format(train_acc.avg), "val_loss=", "{:.5f}".format(val_loss.avg),
        "val_acc=", "{:.5f}".format(val_acc.avg), "time=", "{:.5f}".format(time.time() - start))

    # Remember best accuracy and save checkpoint
    is_best = val_acc.avg >= best_acc
    best_acc = max(val_acc.avg, best_acc)
    if is_best:
        early_stopping_counter = 0
        torch.save({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer' : optimizer.state_dict(),
        }, 'model_best.pth.tar')
    else:
        early_stopping_counter += 1
        print("EarlyStopping: %i / %i" % (early_stopping_counter, args.patience))
        if early_stopping_counter == args.patience:
            print("EarlyStopping: Stop training")
            break



Cross-val iter: 00 epoch: 001 train_loss= 0.38416 train_acc= 0.82156 val_loss= 0.93262 val_acc= 0.48200 time= 4.26239
Cross-val iter: 00 epoch: 002 train_loss= 0.26914 train_acc= 0.88911 val_loss= 0.23591 val_acc= 0.91000 time= 3.88161
Cross-val iter: 00 epoch: 003 train_loss= 0.25339 train_acc= 0.89756 val_loss= 0.22240 val_acc= 0.91600 time= 3.80845
Cross-val iter: 00 epoch: 004 train_loss= 0.24279 train_acc= 0.89800 val_loss= 0.20379 val_acc= 0.91800 time= 3.88065
Cross-val iter: 00 epoch: 005 train_loss= 0.22588 train_acc= 0.90956 val_loss= 0.21516 val_acc= 0.93000 time= 3.88223
Cross-val iter: 00 epoch: 006 train_loss= 0.20462 train_acc= 0.92067 val_loss= 0.23471 val_acc= 0.92800 time= 3.85874
EarlyStopping: 1 / 20
Cross-val iter: 00 epoch: 007 train_loss= 0.19792 train_acc= 0.92222 val_loss= 0.20442 val_acc= 0.92800 time= 3.90551
EarlyStopping: 2 / 20
Cross-val iter: 00 epoch: 008 train_loss= 0.20416 train_acc= 0.91889 val_loss= 0.20465 val_acc= 0.93000 time= 3.65842
Cross-val it

KeyboardInterrupt: 