In [1]:
from utils import *
from atae_lstm import *
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
from torchsummary import summary

In [2]:
TRAIN_FILE_PATH = '../data/absa/SemEval14/abas-pytorch/Laptops_Train.xml.seg'
TEST_FILE_PATH = '../data/absa/SemEval14/abas-pytorch/Laptops_Test_Gold.xml.seg'

In [3]:
BATCH_SIZE = 16
NUM_EPOCHS = 20

In [4]:
tokenizer = build_tokenizer(fnames=[TRAIN_FILE_PATH, TEST_FILE_PATH], 
                            max_seq_len=85,
                            dat_fname='./output/laptop_tokenizer.dat')

loading tokenizer:  ./output/laptop_tokenizer.dat
finish!


In [5]:
trainset = ABSADataset(TRAIN_FILE_PATH, tokenizer)
testset = ABSADataset(TEST_FILE_PATH, tokenizer)

print(len(trainset))
val_len = int(len(trainset) * 0.1)
trainset, valset = random_split(trainset, [len(trainset) - val_len, val_len])
print(len(trainset))

2328
2096


In [6]:
x1 = [1, 2]
x2 = [11, 22, 33, 44]
x_len = [2, 4]
x_len = torch.tensor(x_len)
x1_pad = pad_and_truncate(x1, 4)
x2_pad = pad_and_truncate(x2, 4)
embedding_dim = 3

print('x1: ', x1_pad)
print('x2: ', x2_pad)

x_batch = np.vstack((x1_pad, x2_pad))
x_batch = torch.tensor(x_batch)
print('x_batch: ', x_batch)

embedding = nn.Embedding(1000, embedding_dim, padding_idx=0)
x_batch_embd = embedding(x_batch)
print('embedding: ', x_batch_embd)

squeeze_embedding = SqueezeEmbedding()
x_batch_squeeze_embd = squeeze_embedding(x_batch_embd, x_len)
print('squeeze embedding: ', x_batch_squeeze_embd)

attn = NoQueryAttention(embedding_dim)
output, score = attn(x_batch_squeeze_embd)
print(f'attention output: {output}, \n score: {score}')

rnn_test = DynamicRNN(embedding_dim, 6)
out, (ht, ct) = rnn_test(x_batch_embd, x_len)

x1:  [1 2 0 0]
x2:  [11 22 33 44]
x_batch:  tensor([[ 1,  2,  0,  0],
        [11, 22, 33, 44]])
embedding:  tensor([[[ 0.9648,  0.5789,  0.4078],
         [ 1.0477, -1.1492,  0.4719],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[-0.6998,  0.5070,  0.1527],
         [ 0.9293, -2.2551, -0.2015],
         [ 0.6141, -0.1342, -0.8167],
         [ 0.8165, -0.9285,  0.7614]]], grad_fn=<EmbeddingBackward0>)
squeeze embedding:  tensor([[[ 0.9648,  0.5789,  0.4078],
         [ 1.0477, -1.1492,  0.4719],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[-0.6998,  0.5070,  0.1527],
         [ 0.9293, -2.2551, -0.2015],
         [ 0.6141, -0.1342, -0.8167],
         [ 0.8165, -0.9285,  0.7614]]], grad_fn=<IndexBackward0>)
attention output: tensor([[[ 0.3925, -0.7789, -0.2674]],

        [[ 0.4479, -0.5102, -0.3536]]], grad_fn=<AddBackward0>), 
 score: tensor([[[0.3619, 0.2396, 0.1992, 0.1992]],

        [[0.1997, 0.20

In [7]:
vocab_size = 1000
embed_dim = 3
hidden_dim = 6

x1 = [1, 2]
x2 = [11, 22, 33, 44]
aspect1 = [9]
aspect2 = [8, 9]

x1_pad = pad_and_truncate(x1, 4)
x2_pad = pad_and_truncate(x2, 4)
aspect1_pad = pad_and_truncate(aspect1, 3)
aspect2_pad = pad_and_truncate(aspect2, 3)

x_batch = torch.tensor(np.vstack((x1_pad, x2_pad)))
aspect_batch = torch.tensor(np.vstack((aspect1_pad, aspect2_pad)))
print(f'x_batch: {x_batch}, \naspect_batch: {aspect_batch}')


atae = ATAE_LSTM(embed_dim, vocab_size, hidden_dim)
final_out = atae((x_batch, aspect_batch))
print(f'final_out: {final_out}')

x_batch: tensor([[ 1,  2,  0,  0],
        [11, 22, 33, 44]]), 
aspect_batch: tensor([[9, 0, 0],
        [8, 9, 0]])
text_indices: tensor([[ 1,  2,  0,  0],
        [11, 22, 33, 44]]), 
aspect_indices: tensor([[9, 0, 0],
        [8, 9, 0]])
x_len: tensor([2, 4])
x embed: tensor([[[-1.1631,  1.0519, -1.6253],
         [-0.1466,  0.6123, -0.6912],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[-1.1159,  0.4964,  0.5894],
         [-1.0793,  0.2259,  0.1929],
         [-0.8608, -0.1362,  0.5534],
         [-0.4917,  1.4615,  0.6565]]], grad_fn=<IndexBackward0>)
aspect embed: tensor([[[ 0.1481, -0.5886,  0.9991],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[ 0.5531, -0.3588,  0.0470],
         [ 0.1481, -0.5886,  0.9991],
         [ 0.0000,  0.0000,  0.0000]]], grad_fn=<EmbeddingBackward0>)
torch.sum(aspect, dim=1): tensor([[ 0.1481, -0.5886,  0.9991],
        [ 0.7012, -0.9474,  1.0461]], grad_fn=<SumBackw