In [1]:
%load_ext autoreload
%autoreload 2

In [28]:
import time
import argparse
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from math import ceil

import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.optim.lr_scheduler import StepLR

from utils import load_file, preprocessing, get_vocab, load_embeddings, create_gows, accuracy, generate_batches, AverageMeter, train,train_simsiam, load_R8, dotdict
from models_v2 import MPAD, SimSiam
from scipy import sparse

### Load Data

In [3]:
args = {}
args['path_to_dataset'] = '../datasets/subjectivity.txt'
args['path_to_embeddings'] = "../GoogleNews-vectors-negative300.bin"
args['no_cuda'] = False
args['epochs'] = 200
args['lr'] = 0.001
args['hidden'] = 64
args['penultimate'] = 64
args['message_passing_layers']=2
args['window_size'] = 2
args['directed'] = True
args['use_master_node'] = True
args['normalize'] = True
args['dropout'] = 0.5
args['batch_size'] = 128
args['patience'] = 20
args['rand_node_drop'] = 0.3
args = dotdict(args)

In [4]:
! head -n5 ../datasets/R8_labels.txt

0	train	earn
1	train	acq
2	train	earn
3	train	earn
4	train	earn


In [7]:
#args.path_to_labelset = '../datasets/R8_labels.txt'
#args.path_to_dataset = '../datasets/R8_data.txt'

#docs, class_labels = load_R8(args.path_to_dataset,args.path_to_labelset)

In [5]:
# Read data
docs, class_labels = load_file(args.path_to_dataset)
docs = preprocessing(docs)

enc = LabelEncoder()
class_labels = enc.fit_transform(class_labels)

nclass = np.unique(class_labels).size
y = list()
for i in range(len(class_labels)):
    t = np.zeros(1)
    t[0] = class_labels[i]
    y.append(t)
print('Number of Classes: ',nclass)

Number of Classes:  2


In [7]:
vocab = get_vocab(docs)
embeddings = load_embeddings("../GoogleNews-vectors-negative300.bin", vocab)

Vocabulary size:  21322
Existing vectors: 17913


In [8]:
adj, features, _ = create_gows(docs, vocab, args.window_size, args.directed, args.normalize, args.use_master_node)

"Adj" is a list of sparse tensors. "Features" is a list of np arrays where "features[0]" corresponds to a single graph, size = num nodes. 

In [9]:
kf = KFold(n_splits=2, shuffle=True)
it = 0
accs = list()
train_index, test_index =  kf.split(y)
train_index = train_index[0]
test_index = test_index[0]

In [10]:
idx = np.random.permutation(train_index)
train_index = idx[:int(idx.size*0.9)].tolist()
val_index = idx[int(idx.size*0.9):].tolist()

In [11]:
n_train = len(train_index)
n_val = len(val_index)
n_test = len(test_index)

print("TRAIN: ",n_train)
print("VAL: ",n_val)
print("TEST: ",n_test)

TRAIN:  4500
VAL:  500
TEST:  5000


In [12]:
adj_train = [adj[i] for i in train_index]
features_train = [features[i] for i in train_index]
y_train = [y[i] for i in train_index]


In [13]:
adj_val = [adj[i] for i in val_index]
features_val = [features[i] for i in val_index]
y_val = [y[i] for i in val_index]
adj_val, features_val, batch_n_graphs_val, y_val = generate_batches(adj_val, features_val, y_val, args.batch_size, args.use_master_node)


In [14]:
adj_test = [adj[i] for i in test_index]
features_test = [features[i] for i in test_index]
y_test = [y[i] for i in test_index]
adj_test, features_test, batch_n_graphs_test, y_test = generate_batches(adj_test, features_test, y_test, args.batch_size, args.use_master_node)

In [15]:
n_train_batches = ceil(n_train/args.batch_size)
n_val_batcihes = ceil(n_val/args.batch_size)
n_test_batches = ceil(n_test/args.batch_size)

In [16]:
# Model and optimizer
model = MPAD(embeddings.shape[1], args.message_passing_layers, args.hidden, args.penultimate, nclass, args.dropout, embeddings, args.use_master_node)

Using => GCN
Using => GCN


In [17]:
if args.cuda:
    model.cuda()
    adj_train = [x.cuda() for x in adj_train]
    features_train = [x.cuda() for x in features_train]
    batch_n_graphs_train = [x.cuda() for x in batch_n_graphs_train]
    y_train = [x.cuda() for x in y_train]
    adj_val = [x.cuda() for x in adj_val]
    features_val = [x.cuda() for x in features_val]
    batch_n_graphs_val = [x.cuda() for x in batch_n_graphs_val]
    y_val = [x.cuda() for x in y_val]
    adj_test = [x.cuda() for x in adj_test]
    features_test = [x.cuda() for x in features_test]
    batch_n_graphs_test = [x.cuda() for x in batch_n_graphs_test]
    y_test = [x.cuda() for x in y_test]

In [29]:
model = MPAD(embeddings.shape[1], 
             args.message_passing_layers, 
             args.hidden,
             args.penultimate, 
             nclass, 
             args.dropout,
             embeddings, 
             args.use_master_node)

model.embedding_dim = 64

sim = SimSiam(backbone=model,project_dim=64,bottle_neck_dim=10).to('cuda:0')

parameters = filter(lambda p: p.requires_grad, sim.parameters())

Using => GCN
Using => GCN


In [19]:
args['lr'] = 0.001
optimizer = optim.Adam(parameters, lr=args.lr)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=8, eta_min=1e-5,verbose=True)
#scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

Adjusting learning rate of group 0 to 1.0000e-03.


In [20]:
adj_val[0].to_dense().shape

torch.Size([5504, 5504])

In [27]:
features_val[0].size()[1]

IndexError: tuple index out of range

In [32]:
#sim.eval()
sim.train()
val_embeds = []
for i in range(3):
    with torch.no_grad():
        val_embeds += sim.encoder[0](features_val[i].to("cuda:0"), adj_val[i].to_dense().unsqueeze(0).to("cuda:0"), batch_n_graphs_val[i])[1]
        break
val_embeds = torch.stack(val_embeds).numpy()

MLP:  0  --  torch.Size([5504, 64])
MLP:  0  --  torch.Size([1, 5504, 64])
MLP:  0  --  torch.Size([5504, 64])
MLP:  0  --  torch.Size([1, 5504, 64])


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [20]:
# args['lr'] = 0.03
# optimizer = optim.SGD(parameters, lr=args.lr)
# scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=8, eta_min=1e-5,verbose=True)
# #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

In [20]:
from sklearn.neighbors import KNeighborsClassifier

In [24]:
model

MPAD(
  (embedding): Embedding(21323, 300)
  (mps): ModuleList(
    (0): MessagePassing(
      (mlp1): MLP(
        (linears): ModuleList(
          (0): Linear(in_features=300, out_features=64, bias=True)
          (1): Linear(in_features=64, out_features=64, bias=True)
        )
        (batch_norms): ModuleList(
          (0): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (mlp2): MLP(
        (linears): ModuleList(
          (0): Linear(in_features=64, out_features=64, bias=True)
          (1): Linear(in_features=64, out_features=64, bias=True)
        )
        (batch_norms): ModuleList(
          (0): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (fc1_update): Linear(in_features=64, out_features=64, bias=True)
      (fc2_update): Linear(in_features=64, out_features=64, bias=True)
      (fc1_reset): Linear(in_features=64, out_features=64, bias=True)
      (fc2_reset):

In [21]:
from aug import rand_node_augment
from utils import train_simsiam

In [22]:
def knn(x_val,y_val,x_test,y_test):
    neigh = KNeighborsClassifier(n_neighbors=10)
    neigh.fit(x_val, y_val)
    score= neigh.score(x_test,y_test)
    print("KNN Acc: ",score)
    return score

In [24]:
best_acc = 0

adj_train_1, features_train_1, batch_n_graphs_train_1, y_train_1 = generate_batches(adj_train, 
                                                                            features_train,
                                                                            y_train, 
                                                                            args.batch_size,
                                                                            args.use_master_node)

for epoch in range(args.epochs):
    if scheduler is not None:
        scheduler.step()

    adj_train_2 = rand_node_augment(adj_train, args)
    adj_train_2, features_train_2, batch_n_graphs_train, _ = generate_batches(adj_train_2,features_train, y_train, args.batch_size, args.use_master_node)

    start = time.time()
    model.train()
    train_loss = AverageMeter()
    train_acc = AverageMeter()
    running_loss = 0.0
    # Train for one epoch
    for i in range(n_train_batches):
        
        loss = train_simsiam(sim,
                             optimizer,
                             epoch, 
                             adj_train_1[i],
                             features_train_1[i],
                             batch_n_graphs_train[i],
                             adj_train_2[i],
                             features_train_2[i],
                            )
        train_loss.update(loss.item())
        running_loss += loss.item()
        #train_acc.update(accuracy(output.data, y_train[i].data), output.size(0))
        #print(loss.item())
    print("="*50)
    print('Epoch Loss: ', running_loss/i)
    sim.eval()

    val_embeds = []
    for i in range(n_val_batches):
        with torch.no_grad():
            val_embeds += sim.encoder[0](features_val[i], adj_val[i], batch_n_graphs_val[i])[1]
    val_embeds = torch.stack(val_embeds).numpy()

    test_embeds = []
    for i in range(n_test_batches):
        with torch.no_grad():
            test_embeds += sim.encoder[0](features_test[i], adj_test[i], batch_n_graphs_test[i])[1]
    test_embeds = torch.stack(test_embeds).numpy()
    
    k = knn(val_embeds,torch.cat(y_val),test_embeds, torch.cat(y_test))



Adjusting learning rate of group 0 to 9.6232e-04.
Epoch Loss:  -0.12607533665640014
KNN Acc:  0.7884
Adjusting learning rate of group 0 to 8.5502e-04.
Epoch Loss:  -0.36965705411774774
KNN Acc:  0.8084
Adjusting learning rate of group 0 to 6.9443e-04.
Epoch Loss:  -0.6018872397286551
KNN Acc:  0.8196
Adjusting learning rate of group 0 to 5.0500e-04.
Epoch Loss:  -0.6533943431718009
KNN Acc:  0.8418
Adjusting learning rate of group 0 to 3.1557e-04.
Epoch Loss:  -0.6794662662914821
KNN Acc:  0.8508
Adjusting learning rate of group 0 to 1.5498e-04.
Epoch Loss:  -0.6976972477776664
KNN Acc:  0.846
Adjusting learning rate of group 0 to 4.7680e-05.
Epoch Loss:  -0.7095664518220084
KNN Acc:  0.8432
Adjusting learning rate of group 0 to 1.0000e-05.
Epoch Loss:  -0.7114867755344936
KNN Acc:  0.8438
Adjusting learning rate of group 0 to 4.7680e-05.
Epoch Loss:  -0.7122538294110979
KNN Acc:  0.8428
Adjusting learning rate of group 0 to 1.5498e-04.
Epoch Loss:  -0.7166303958211626
KNN Acc:  0.8442

KNN Acc:  0.8176
Adjusting learning rate of group 0 to 5.0500e-04.
Epoch Loss:  -1.003283817427499
KNN Acc:  0.827
Adjusting learning rate of group 0 to 6.9443e-04.
Epoch Loss:  -1.0034732443945749
KNN Acc:  0.8298
Adjusting learning rate of group 0 to 8.5502e-04.
Epoch Loss:  -1.00400596005576
KNN Acc:  0.828
Adjusting learning rate of group 0 to 9.6232e-04.
Epoch Loss:  -1.003084225314004
KNN Acc:  0.8402
Adjusting learning rate of group 0 to 1.0000e-03.
Epoch Loss:  -1.003078726359776
KNN Acc:  0.8404
Adjusting learning rate of group 0 to 9.6232e-04.
Epoch Loss:  -1.0050073300089155
KNN Acc:  0.8386
Adjusting learning rate of group 0 to 8.5502e-04.
Epoch Loss:  -1.00602194581713
KNN Acc:  0.8252
Adjusting learning rate of group 0 to 6.9443e-04.
Epoch Loss:  -1.008393018586295
KNN Acc:  0.825
Adjusting learning rate of group 0 to 5.0500e-04.


KeyboardInterrupt: 