In [1]:
import torch
import argparse
from transformers import BertModel, BertTokenizer

# Custom imports
from model.utils import *
from model.data_utils import *
from model.transformer import Transformer
from model.bracketing import IdentityChunker, NNSimilarityChunker, cos
from model.generators import IdentityGenerator, EmbeddingGenerator
from model.classifiers import AttentionClassifier, SeqPairAttentionClassifier, NaivePoolingClassifier
from model.model import MultiTaskNet, End2EndModel


parser = argparse.ArgumentParser(description='Model Options')
#parser.add_argument('integers', metavar='N', type=int, nargs='+',
#                    help='an integer for the accumulator')

In [2]:
import pandas as pd

# LOAD SST-2
DATA_SST_TRAIN = pd.read_csv('./assets/datasets/SST2/SST2_dataset_train.csv', sep='\t')
DATA_SST_TEST = pd.read_csv('./assets/datasets/SST2/SST2_dataset_test.csv', sep='\t')
DATA_SST_DEV = pd.read_csv('./assets/datasets/SST2/SST2_dataset_dev.csv', sep='\t')

# LOAD QUORA QUESTION PAIRS
DATA_QQP_TRAIN = pd.read_csv('./assets/datasets/QQP/QQP_train.tsv', sep='\t', error_bad_lines=False)
DATA_QQP_TEST = pd.read_csv('./assets/datasets/QQP/QQP_test.tsv', sep='\t', error_bad_lines=False)
DATA_QQP_DEV = pd.read_csv('./assets/datasets/QQP/QQP_dev.tsv', sep='\t', error_bad_lines=False)

b'Skipping line 83032: expected 6 fields, saw 7\n'
b'Skipping line 154657: expected 6 fields, saw 7\n'
b'Skipping line 323916: expected 6 fields, saw 7\n'
  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Device being used: {device}")


########### LOAD MODELS AND OPTIMIZER ###########
transformer_net = Transformer(model_class=BertModel,
                              tokenizer_class=BertTokenizer,
                              pre_trained_weights='bert-base-uncased',
                              device=device)

bracketing_net = NNSimilarityChunker(sim_function=cos,
                                     threshold=0.7,
                                     exclude_special_tokens=False,
                                     combinatorics='sequential',
                                     device=device)

#bracketing_net = IdentityChunker()
#generator_net = IdentityGenerator()
generator_net = EmbeddingGenerator(pool_function=abs_max_pooling, 
                                   device=device)

seq_classifier = AttentionClassifier(embedding_dim=768,
                                     sentset_size=2,
                                     dropout=0.3,
                                     n_sentiments=2,
                                     pool_mode='concat',
                                     device=device)

seq_pair_classifier = SeqPairAttentionClassifier(embedding_dim=768,
                                                 num_classes=4,
                                                 dropout=0.3,
                                                 n_attention_vecs=2,
                                                 pool_mode='concat',
                                                 device=device)

naive_classifier = NaivePoolingClassifier(embedding_dim=768, 
                                          num_classes=2, 
                                          dropout=0., 
                                          pool_mdoe='max_pooling', 
                                          device=device)

multitask_net = MultiTaskNet(seq_classifier,
                             seq_pair_classifier,
                             device=device)

model = End2EndModel(transformer=transformer_net,
                     bracketer=bracketing_net,
                     generator=generator_net,
                     multitasknet=multitask_net,
                     device=device)

batch_sequence = ['this is one sentence', 'this is a second sentence', 'this is a third sentnen.']


Device being used: cpu


In [4]:
from torch.utils.tensorboard import SummaryWriter
from model.data_utils import get_batch_SST2_from_indices, get_batch_QQP_from_indices

# Writer will output to ./runs/ directory by default
writer = SummaryWriter(log_dir='./tensorboard/')
torch.manual_seed(0)

batch_size = {'SST2': 32, 'QQP': 4}
n_batches = {'SST2': 10, 'QQP': 10}
counter = {'SST2': 0, 'QQP': 0}
get_batch_function = {'SST2': get_batch_SST2_from_indices, 'QQP': get_batch_QQP_from_indices}
dataframe = {'SST2': DATA_SST_TRAIN, 'QQP': DATA_QQP_TRAIN}

datasets = ['SST2', 'QQP']
batch_indices = {}

optimizer = torch.optim.Adam(multitask_net.parameters(), 
                             lr=0.001,
                             betas=(0.9, 0.999), 
                             eps=1e-08, 
                             weight_decay=0.01, 
                             amsgrad=False)

#print(list(multitask_net.parameters()))
global_counter = 0
finished_training = False

while not finished_training:
    for dataset in datasets:
        if counter[dataset] > n_batches[dataset] or global_counter == 0:
            counter[dataset] = 0
            # Re-shuffle the training batches data
            batch_indices[dataset] = torch.randperm(n_batches[dataset]*batch_size[dataset],
                                                    device=device).reshape(-1, batch_size[dataset])
    
    batch_sequences, batch_targets, batch_splits = [], [], [0]
    for dataset in datasets:
        idx = counter[dataset]
        dataset_batch = get_batch_function[dataset](dataframe[dataset], 
                                                    batch_indices[dataset][idx])

        # List of tensors, one for each task
        batch_targets.append(torch.tensor([data[1] for data in dataset_batch], 
                                          dtype=torch.int64, 
                                          device=device))
        
        # Big list combining the input sequences/ tuple of sequences because the batch needs
        # to be at the same "depth" level
        batch_sequences.extend([data[0] for data in dataset_batch])
        batch_splits.append(batch_splits[-1] + len(dataset_batch))
        counter[dataset] += 1

    model.train()
    batch_predictions = model.forward(batch_sequences, batch_splits=batch_splits)
    L = model.loss(batch_predictions, batch_targets, weights=None)
    optimizer.zero_grad()
    L.backward()
    optimizer.step()
    writer.add_scalar('Global Loss', L.item())
    #print(list(multitask_net.parameters()))
    print('Loss: ', L)

Loss:  tensor(1.0560, grad_fn=<MeanBackward0>)
Loss:  tensor(1.4333, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7245, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6271, grad_fn=<MeanBackward0>)
Loss:  tensor(2.4836, grad_fn=<MeanBackward0>)
Loss:  tensor(0.5770, grad_fn=<MeanBackward0>)
Loss:  tensor(0.3348, grad_fn=<MeanBackward0>)
Loss:  tensor(1.2072, grad_fn=<MeanBackward0>)
Loss:  tensor(1.3773, grad_fn=<MeanBackward0>)
Loss:  tensor(1.0517, grad_fn=<MeanBackward0>)
Loss:  tensor(1.5649, grad_fn=<MeanBackward0>)
Loss:  tensor(0.5743, grad_fn=<MeanBackward0>)
Loss:  tensor(1.1112, grad_fn=<MeanBackward0>)
Loss:  tensor(2.4364, grad_fn=<MeanBackward0>)
Loss:  tensor(1.0935, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7353, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7015, grad_fn=<MeanBackward0>)
Loss:  tensor(0.5804, grad_fn=<MeanBackward0>)
Loss:  tensor(0.9227, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6300, grad_fn=<MeanBackward0>)
Loss:  tensor(0.8781, grad_fn=<MeanBackward0>)
Loss:  tensor

KeyboardInterrupt: 

In [None]:

batch_sequence_pairs = [('Why Tamil Nadu Universities are no good ?  lol this is longer.', 'What was the deadliest battle in history?'),
                       ('What are the difference between polyester','What is the difference between cotton and poly'),
                       ('What are the difference between polyester', 'What was the deadliest battle in history?')]
batch_sequence = ['Why Tamil Nadu Universities are no good?', 
                  'What was the deadliest battle in history?',
                  'What are the difference between polyester']
combined = batch_sequence_pairs + batch_sequence
output, masks_dict = transformer_net.forward(combined, return_masks=True)
print(output.size())
print(masks_dict['seq_pair_mask'])

In [None]:
L = [(1, 3),(1, 3),(6, 0)]
[l[:-1][:] for l in L]

In [None]:
L = ['hola', 'que', 'taas', 'asdf', 'qwe?']
L[0:3]
L[3:5]


In [None]:
def function(a, **kwargs):
    print('First argument is: ', a)
    if second_argument:
        print('second argument:', second_argument)
        
function('hoola', second_argument=True)

In [None]:
LL = [[1, 0, 0, 1], [1, 0], [1, 0, 0, 0, 0, 0, 1]]
dict_L = {}
dict_L['list'] = LL
max_len = max([len(l) for l in dict_L['list']])
print(dict_L['list'])
dict_L['list'] = [L + [1]*(max_len-len(L)) for L in dict_L['list']]
dict_L['list']

In [None]:
mask = torch.tensor([[True, True, False],[True, False, False]], dtype=torch.bool)
T = mask.sum(dim=0) == 0
print(T)

In [None]:
T = torch.tensor([[4, 2, 3],[5, 2, 3]])
print(-T)

In [None]:
L = torch.tensor([1, 4, 5])
T = torch.tensor([[6, 10, 5, 2], 
                  [1, 1, 1, 1]], dtype=torch.float32)
T.prod(dim=1) == 1

In [None]:
import torch

T1 = torch.tensor([True, False, False], dtype=torch.bool)
T2 = torch.tensor([True, True, False], dtype=torch.bool)
T1 * T2

In [None]:
m = torch.tensor([True, False, True, True], dtype=torch.bool)
T = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]])
T[:, m]

In [4]:
import torch
device = torch.device('cpu')
T = torch.tensor([1, 2, 3], device=device)

AssertionError: Torch not compiled with CUDA enabled