In [15]:
import torch
import torch.nn as nn
from torch.nn import TransformerEncoderLayer, TransformerEncoder
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler, RandomSampler
import os
import json

In [16]:
class MUTANT(nn.Module):

    def __init__(self, d_model=768, seq_len=16, dropout=0.1):
        super(MUTANT,self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        self.token_type_embeddings = nn.Embedding(3, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=2)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)
        
        # Scoreing heads
        self.head_passage = nn.Linear(d_model, 1)
        self.head_entity = nn.Linear(d_model, 1)

        
    def forward(self, input_CLSs, type_mask=None):
        # input_CLSs -> [seq_len, batch_size, d_model]
        # type_mask -> [seq_len, batch_size] 0 or 1 for different types
        
        if isinstance(type_mask, torch.Tensor):
                  token_type_embeddings = self.token_type_embeddings(type_mask)
            print('----- token_type_embeddings -----')
            print(token_type_embeddings.shape)
            print(token_type_embeddings)
            
            input_CLSs = input_CLSs + token_type_embeddings 
            print('----- input_CLSs -----')
            print(input_CLSs.shape)
            print(input_CLSs)
        
            # Build padding masks i.e. type_mask == 0.
            #src_key_padding_mask = (type_mask > 0).type(torch.int).T
            src_key_padding_mask = (type_mask > 0).T
            print('----- src_key_padding_mask -----')
            print(src_key_padding_mask.shape)
            print(src_key_padding_mask)
            
            # Forward pass of Transformer encoder.
            output_CLSs = self.transformer_encoder(input_CLSs, src_key_padding_mask=src_key_padding_mask)
            print('----- output_CLSs -----')
            print(output_CLSs)

            # Ensure Passage and Entity heads score correct mask type i.e. passage == 1 & entity = 2. 
            passage_mask = (type_mask == 1).type(torch.int).unsqueeze(-1)
            entity_mask = (type_mask == 2).type(torch.int).unsqueeze(-1)
            entity_output = self.head_entity(output_CLSs) * entity_mask
            passage_output = self.head_passage(output_CLSs) * passage_mask
        
            output = passage_output+entity_output

        else:
            
            output_CLSs = self.transformer_encoder(input_CLSs)
#             print('----- output_CLSs -----')
#             print(output_CLSs)

            output = self.head_entity(output_CLSs)        
      
#         print('----- output -----')
#         print(output)
        
        return output
    
    
    def get_device(self):
        return next(self.parameters()).device
    

In [17]:
# model = MUTANT(d_model=10, seq_len=6, dropout=0.1)
# lr = 0.001

# optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss
# train_loss_total = 0.0

# model.train()
# bag_of_CLS = torch.rand(6, 3, 10) # [seq_len, batch_size, d_model]
# type_mask = torch.tensor([[1,1,1],
#                           [2,2,2],
#                           [2,2,2],
#                           [2,2,0],
#                           [2,2,0],
#                           [0,0,0]]) # [seq_len, batch_size]

# labels = torch.tensor([[[1.0],[0.0],[1.0]],
#                         [[0.0],[0.0],[0.0]],
#                         [[1.0],[0.0],[1.0]],
#                         [[0.0],[1.0],[0.0]],
#                         [[0.0],[0.0],[0.0]],
#                         [[0.0],[0.0],[0.0]]]) # [seq_len, batch_size]
# for i in range(100):
#     # ========================================
#     #               Training
#     # ========================================
#     model.zero_grad()
#     outputs = model.forward(bag_of_CLS, type_mask=type_mask)

#     # Calculate Loss: softmax --> cross entropy loss
#     loss = loss_func(outputs, labels)
#     # Getting gradients w.r.t. parameters
#     loss.sum().backward()
#     optimizer.step()

#     torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)


#     train_loss_total += loss.sum().item()
    
#     if i % 10 == 0:
#         print('--------')
#         print(train_loss_total/(1+i))
#         print(labels)
#         print(outputs)


FileNotFoundError: [Errno 2] No such file or directory: '/nfs/trec_news_track/data/5_fold/scaled_5fold_0_data/mutant_data/valid/0_mutant_max.json'

In [19]:

bag_of_CLS = []
labels = []
type_mask = []
max_seq_len = 16
batch_size = 32
d_model = 768
dir_path = '/nfs/trec_news_track/data/5_fold/scaled_5fold_0_data/mutant_data/train/'
file_name = '_mutant_max.json'
doc_to_entity_map_path = '/nfs/trec_news_track/data/5_fold/scaled_5fold_0_data/doc_to_entity_map.json'

with open(doc_to_entity_map_path, 'r') as f:
    doc_to_entity_map = json.load(f)

for path in [dir_path + f for f in os.listdir(dir_path) if file_name in f]:
    with open(path, 'r') as f:
        d = json.load(f)
    for passage_id in d['query']['passage'].keys():
        seq_cls = []
        seq_labels = []
        seq_mask = []

        passage_cls = d['query']['passage'][passage_id]['cls_token']
        passage_relevant = d['query']['passage'][passage_id]['relevant']
        seq_cls.append(passage_cls)
        seq_labels.append([passage_relevant])
        seq_mask.append(1)
        
        if passage_id in doc_to_entity_map:
            entity_id_list = doc_to_entity_map[passage_id]
            entity_id_list_sorted = [elem for count, elem in sorted(((entity_id_list.count(e), e) for e in set(entity_id_list)), reverse=True)]
            for entity_id in entity_id_list_sorted:
                if len(seq_mask) < max_seq_len:
                    entity_cls = d['query']['passage'][passage_id]['entity'][entity_id]['cls_token']
                    entity_relevant = d['query']['passage'][passage_id]['entity'][entity_id]['relevant']
                    seq_cls.append(entity_cls)
                    seq_labels.append([entity_relevant])
                    seq_mask.append(2)

        else: 
            #print('{} not in doc_to_entity_map'.format(passage_id))
            for entity_id in d['query']['passage'][passage_id]['entity']:
                if len(seq_mask) < max_seq_len:
                    entity_cls = d['query']['passage'][passage_id]['entity'][entity_id]['cls_token']
                    entity_relevant = d['query']['passage'][passage_id]['entity'][entity_id]['relevant']
                    seq_cls.append(entity_cls)
                    seq_labels.append([entity_relevant])
                    seq_mask.append(2)

        if len(seq_mask) < max_seq_len:
            padding_len = max_seq_len - len(seq_mask)
            for i in range(padding_len):
                seq_cls.append([0]*768)
                seq_labels.append([0])
                seq_mask.append(0)

        bag_of_CLS.append(seq_cls) 
        labels.append(seq_labels)
        type_mask.append(seq_mask)
        break
    break
    
        
bag_of_CLS_tensor = torch.tensor(bag_of_CLS)
type_mask_tensor = torch.tensor(type_mask)
labels_tensor = torch.tensor(labels)
print(bag_of_CLS_tensor.shape, type_mask_tensor.shape, labels_tensor.shape)

train_dataset = TensorDataset(bag_of_CLS_tensor, type_mask_tensor, labels_tensor)

train_data_loader = DataLoader(train_dataset, sampler=SequentialSampler(train_dataset), batch_size=batch_size)


NameError: name 'd' is not defined

In [20]:


model = MUTANT(d_model=d_model, seq_len=max_seq_len, dropout=0.1)

 # Use GPUs if available.
if torch.cuda.is_available():
    # Tell PyTorch to use the GPU.
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU: {}'.format(torch.cuda.get_device_name(0)))
    model.cuda()
    device = torch.device("cuda")

# Otherwise use CPU.
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

lr = 0.001

for i in range(100):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss
    train_loss_total = 0.0

    model.train()
    train_loss_total = 0
    for i_train, train_batch in enumerate(train_data_loader):
#         print('-------------------------------------')
#         print('-------------------------------------')
#         print('-------------------------------------')

       
        bag_of_CLS, type_mask, labels = train_batch
#         bag_of_CLS = bag_of_CLS.view(max_seq_len,batch_size,d_model)
#         type_mask = type_mask.view(max_seq_len,batch_size)
#         labels = labels.view(max_seq_len,batch_size,1)
        bag_of_CLS = torch.reshape(bag_of_CLS, (max_seq_len, bag_of_CLS.shape[0], d_model))
        type_mask = torch.reshape(type_mask, (max_seq_len, type_mask.shape[0]))
        labels = torch.reshape(labels, (max_seq_len, labels.shape[0], 1))
        
        print('----- batch -----')
        print(bag_of_CLS.shape)
        print(bag_of_CLS)
        print(type_mask.shape)
        print(type_mask)  
        print(labels.shape)
        print(labels)

        model.zero_grad()

        outputs = model.forward(bag_of_CLS.to(device), type_mask=type_mask.to(device))
#         print(outputs)

        # Calculate Loss: softmax --> cross entropy loss
        loss = loss_func(outputs.cpu(), labels)
        # Getting gradients w.r.t. parameters
        loss.sum().backward()
        optimizer.step()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        train_loss_total += loss.sum().item()
    
    if i % 10 == 0:
        print(train_loss_total / len(train_data_loader))

NameError: name 'train_data_loader' is not defined

In [21]:
x = ['c','a', 'b', 'c', 'd', 'a', 'a']

In [22]:
import collections

y = collections.Counter(x)

In [27]:

for i in [elem for count, elem in sorted(((x.count(e), e) for e in set(x)), reverse=True)]:
    print(i)
 

a
c
d
b


In [2]:
import torch

bag_of_CLS = torch.rand(3, 2, 4)

In [3]:
bag_of_CLS

tensor([[[0.5678, 0.5374, 0.3939, 0.5543],
         [0.0576, 0.5003, 0.1142, 0.4358]],

        [[0.7278, 0.0719, 0.7973, 0.0197],
         [0.9373, 0.3528, 0.3551, 0.4850]],

        [[0.5901, 0.5281, 0.8237, 0.7881],
         [0.8297, 0.1818, 0.4055, 0.3765]]])

In [5]:
torch.reshape(bag_of_CLS, (2,3,4))

tensor([[[0.5678, 0.5374, 0.3939, 0.5543],
         [0.0576, 0.5003, 0.1142, 0.4358],
         [0.7278, 0.0719, 0.7973, 0.0197]],

        [[0.9373, 0.3528, 0.3551, 0.4850],
         [0.5901, 0.5281, 0.8237, 0.7881],
         [0.8297, 0.1818, 0.4055, 0.3765]]])