In [1]:
import torch
import argparse
from transformers import BertModel, BertTokenizer

# Custom imports
from model.utils import *
from model.data_utils import *
from model.transformer import Transformer
from model.bracketing import IdentityChunker, NNSimilarityChunker, cos
from model.generators import IdentityGenerator, EmbeddingGenerator
from model.classifiers import AttentionClassifier, SeqPairAttentionClassifier
from model.model import MultiTaskNet, End2EndModel


parser = argparse.ArgumentParser(description='Model Options')
#parser.add_argument('integers', metavar='N', type=int, nargs='+',
#                    help='an integer for the accumulator')

In [2]:
import pandas as pd

# LOAD SST-2
DATA_SST_TRAIN = pd.read_csv('./assets/datasets/SST2/SST2_dataset_train.csv', sep='\t')
DATA_SST_TEST = pd.read_csv('./assets/datasets/SST2/SST2_dataset_test.csv', sep='\t')
DATA_SST_DEV = pd.read_csv('./assets/datasets/SST2/SST2_dataset_dev.csv', sep='\t')

# LOAD QUORA QUESTION PAIRS
DATA_QQP_TRAIN = pd.read_csv('./assets/datasets/QQP/QQP_train.tsv', sep='\t', error_bad_lines=False)
DATA_QQP_TEST = pd.read_csv('./assets/datasets/QQP/QQP_test.tsv', sep='\t', error_bad_lines=False)
DATA_QQP_DEV = pd.read_csv('./assets/datasets/QQP/QQP_dev.tsv', sep='\t', error_bad_lines=False)

b'Skipping line 83032: expected 6 fields, saw 7\n'
b'Skipping line 154657: expected 6 fields, saw 7\n'
b'Skipping line 323916: expected 6 fields, saw 7\n'
  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Device being used: {device}")


########### LOAD MODELS AND OPTIMIZER ###########
transformer_net = Transformer(model_class=BertModel,
                              tokenizer_class=BertTokenizer,
                              pre_trained_weights='bert-base-uncased',
                              device=device)

bracketing_net = NNSimilarityChunker(sim_function=cos,
                                     threshold=3,
                                     exclude_special_tokens=False,
                                     combinatorics='sequential',
                                     device=device)
bracketing_net = IdentityChunker()
generator_net = IdentityGenerator()
#generator_net = EmbeddingGenerator(pool_function=abs_max_pooling, 
#                                   device=device)

seq_classifier = AttentionClassifier(embedding_dim=768,
                                       sentset_size=2,
                                       batch_size=32,
                                       dropout=0.3,
                                       n_sentiments=4,
                                       pool_mode='concat',
                                       device=device)


seq_pair_classifier = SeqPairAttentionClassifier(embedding_dim=768,
                                              num_classes=4,
                                              batch_size=32,
                                              dropout=0.3,
                                              n_attention_vecs=4,
                                              pool_mode='concat',
                                              device=device)

multitask_net = MultiTaskNet(seq_classifier, 
                             device=device)

model = End2EndModel(transformer=transformer_net,
                     bracketer=bracketing_net,
                     generator=generator_net,
                     multitasknet=multitask_net,
                     device=device)

batch_sequence = ['this is one sentence', 'this is a second sentence', 'this is a third sentnen.']


#optimizer = torch.optim.Adam(multitask_net.parameters())



Device being used: cpu


In [None]:
from model.data_utils import get_batch_SST2_from_indices, get_batch_QQP_from_indices
torch.manual_seed(0)

"""
batch_size = {'SST2': 4, 'QQP': 4}
n_batches = {'SST2': 10, 'QQP': 10}
counter = {'SST2': 0, 'QQP': 0}
get_batch_function = {'SST2': get_batch_SST2_from_indices, 'QQP': get_batch_QQP_from_indices}
dataframe = {'SST2': DATA_SST_TRAIN, 'QQP': DATA_QQP_TRAIN}

datasets = ['SST2', 'QQP']
batch_indices = {}
"""

batch_size = {'SST2': 4, 'QQP': 4}
n_batches = {'SST2': 10, 'QQP': 10}
counter = {'SST2': 0, 'QQP': 0}
get_batch_function = {'SST2': get_batch_SST2_from_indices, 'QQP': get_batch_QQP_from_indices}
dataframe = {'SST2': DATA_SST_TRAIN, 'QQP': DATA_QQP_TRAIN}

datasets = ['SST2']
batch_indices = {}

optimizer = torch.optim.Adam(multitask_net.parameters(), 
                             lr=0.01, 
                             betas=(0.9, 0.999), 
                             eps=1e-08, 
                             weight_decay=0.01, 
                             amsgrad=False)

global_counter = 0
finished_training = False

while not finished_training:
    for dataset in datasets:
        if counter[dataset] > n_batches[dataset] or global_counter == 0:
            counter[dataset] = 0
            # Re-shuffle the training batches data
            batch_indices[dataset] = torch.randperm(n_batches[dataset]*batch_size[dataset],
                                                    device=device).reshape(-1, batch_size[dataset])
    
    batch_sequences, batch_targets, batch_splits = [], [], [0]
    for dataset in datasets:
        idx = counter[dataset]
        dataset_batch = get_batch_function[dataset](dataframe[dataset], 
                                                    batch_indices[dataset][idx])

        # List of tensors, one for each task
        batch_targets.append(torch.tensor([data[1] for data in dataset_batch], 
                                          dtype=torch.int64, 
                                          device=device))
        
        # Big list combining the input sequences/ tuple of sequences because the batch needs
        # to be at the same "depth" level
        batch_sequences.extend([data[0] for data in dataset_batch])
        batch_splits.append(batch_splits[-1] + len(dataset_batch))
        counter[dataset] += 1

    model.train()
    batch_predictions = model.forward(batch_sequences, batch_splits=batch_splits)
    L = model.loss(batch_predictions, batch_targets, weights = None)
    L.backward()
    optimizer.step()
    print('Loss: ', L)

Loss:  tensor(0.7339, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6660, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7073, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7109, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7285, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7117, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6981, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7257, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6321, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7096, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6522, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7007, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6712, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6977, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6777, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7365, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6637, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7190, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6960, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6877, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7009, grad_fn=<MeanBackward0>)
Loss:  tensor

Loss:  tensor(0.7585, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6716, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6586, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7267, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6685, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7584, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7179, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6699, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7160, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7656, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6680, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6997, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6587, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7166, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6386, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6742, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7171, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7171, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7212, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6909, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6891, grad_fn=<MeanBackward0>)
Loss:  tensor

Loss:  tensor(0.7184, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6829, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6627, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7525, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6610, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6983, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7382, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7184, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6890, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7497, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6848, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6833, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6383, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7190, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6511, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6981, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6556, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7072, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7269, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7348, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6497, grad_fn=<MeanBackward0>)
Loss:  tensor

Loss:  tensor(0.6928, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7611, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6923, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7001, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6830, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7245, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7208, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7384, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6707, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7295, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7253, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6528, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7300, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6355, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7004, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6905, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7131, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6379, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7198, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6480, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6423, grad_fn=<MeanBackward0>)
Loss:  tensor

Loss:  tensor(0.6532, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6913, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6723, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7132, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6875, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7188, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6564, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7407, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7366, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6851, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6909, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6696, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7321, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6825, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7227, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6663, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6931, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6825, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6923, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6699, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7370, grad_fn=<MeanBackward0>)
Loss:  tensor

Loss:  tensor(0.6378, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6595, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7195, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7185, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7232, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6956, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6673, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7149, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6566, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6855, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6921, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7261, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6709, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7189, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6585, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6722, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7288, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7240, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7432, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6508, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7013, grad_fn=<MeanBackward0>)
Loss:  tensor

Loss:  tensor(0.6692, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6359, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6880, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7245, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6285, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6934, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7120, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6686, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7100, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6882, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7053, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6894, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7165, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7288, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6910, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7232, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6669, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7220, grad_fn=<MeanBackward0>)
Loss:  tensor(0.6995, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7323, grad_fn=<MeanBackward0>)
Loss:  tensor(0.7006, grad_fn=<MeanBackward0>)
Loss:  tensor

In [None]:

batch_sequence_pairs = [('Why Tamil Nadu Universities are no good ?  lol this is longer.', 'What was the deadliest battle in history?'),
                       ('What are the difference between polyester','What is the difference between cotton and poly'),
                       ('What are the difference between polyester', 'What was the deadliest battle in history?')]
batch_sequence = ['Why Tamil Nadu Universities are no good?', 
                  'What was the deadliest battle in history?',
                  'What are the difference between polyester']
combined = batch_sequence_pairs + batch_sequence
output, masks_dict = transformer_net.forward(combined, return_masks=True)
print(output.size())
print(masks_dict['seq_pair_mask'])

In [None]:
L = [(1, 3),(1, 3),(6, 0)]
[l[:-1][:] for l in L]

In [None]:
L = ['hola', 'que', 'taas', 'asdf', 'qwe?']
L[0:3]
L[3:5]


In [None]:
def function(a, **kwargs):
    print('First argument is: ', a)
    if second_argument:
        print('second argument:', second_argument)
        
function('hoola', second_argument=True)

In [None]:
LL = [[1, 0, 0, 1], [1, 0], [1, 0, 0, 0, 0, 0, 1]]
dict_L = {}
dict_L['list'] = LL
max_len = max([len(l) for l in dict_L['list']])
print(dict_L['list'])
dict_L['list'] = [L + [1]*(max_len-len(L)) for L in dict_L['list']]
dict_L['list']

In [None]:
mask = torch.tensor([[True, True, False],[True, False, False]], dtype=torch.bool)
T = mask.sum(dim=0) == 0
print(T)

In [None]:
T = torch.tensor([[4, 2, 3],[5, 2, 3]])
print(-T)

In [None]:
L = torch.tensor([1, 4, 5])
T = torch.tensor([[6, 10, 5, 2], 
                  [1, 1, 1, 1]], dtype=torch.float32)
T.prod(dim=1) == 1

In [None]:
import torch

T1 = torch.tensor([True, False, False], dtype=torch.bool)
T2 = torch.tensor([True, True, False], dtype=torch.bool)
T1 * T2

In [None]:
m = torch.tensor([True, False, True, True], dtype=torch.bool)
T = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]])
T[:, m]

In [None]:
import itertools
indices = [0, 1, 2, 3, 4, 5]
idx_combinations = [indices[s:e] for s, e in itertools.combinations(range(len(indices)+1), 2)]
print(idx_combinations)