In [1]:
from utils import *
from atae_lstm import *
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
from torchsummary import summary

In [2]:
TRAIN_FILE_PATH = '../data/absa/SemEval14/abas-pytorch/Laptops_Train.xml.seg'
TEST_FILE_PATH = '../data/absa/SemEval14/abas-pytorch/Laptops_Test_Gold.xml.seg'

In [3]:
BATCH_SIZE = 16
NUM_EPOCHS = 20

In [4]:
tokenizer = build_tokenizer(fnames=[TRAIN_FILE_PATH, TEST_FILE_PATH], 
                            max_seq_len=85,
                            dat_fname='./output/laptop_tokenizer.dat')

loading tokenizer:  ./output/laptop_tokenizer.dat
finish!


In [None]:
trainset = ABSADataset(TRAIN_FILE_PATH, tokenizer)
testset = ABSADataset(TEST_FILE_PATH, tokenizer)

print(len(trainset))
val_len = int(len(trainset) * 0.1)
trainset, valset = random_split(trainset, [len(trainset) - val_len, val_len])
print(len(trainset))

In [6]:
x1 = [1, 2]
x2 = [11, 22, 33, 44]
x_len = [2, 4]
x_len = torch.tensor(x_len)
x1_pad = pad_and_truncate(x1, 4)
x2_pad = pad_and_truncate(x2, 4)
embedding_dim = 3

print('x1: ', x1_pad)
print('x2: ', x2_pad)

x_batch = np.vstack((x1_pad, x2_pad))
x_batch = torch.tensor(x_batch)
print('x_batch: ', x_batch)

embedding = nn.Embedding(1000, embedding_dim, padding_idx=0)
x_batch_embd = embedding(x_batch)
print('embedding: ', x_batch_embd)

squeeze_embedding = SqueezeEmbedding()
x_batch_squeeze_embd = squeeze_embedding(x_batch_embd, x_len)
print('squeeze embedding: ', x_batch_squeeze_embd)

attn = NoQueryAttention(embedding_dim)
output, score = attn(x_batch_squeeze_embd)
print(f'attention output: {output}, \n score: {score}')

rnn_test = DynamicRNN(embedding_dim, 6)
out, (ht, ct) = rnn_test(x_batch_embd, x_len)

x1:  [1 2 0 0]
x2:  [11 22 33 44]
x_batch:  tensor([[ 1,  2,  0,  0],
        [11, 22, 33, 44]])
embedding:  tensor([[[-1.8584, -0.3601, -0.0726],
         [ 1.7019, -0.2690, -0.4833],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[-0.4000,  0.2280, -0.0531],
         [ 0.9400, -0.1664, -0.0234],
         [ 0.3305, -0.7436, -0.6359],
         [-0.1622, -0.1226, -1.1903]]], grad_fn=<EmbeddingBackward0>)
squeeze embedding:  tensor([[[-1.8584, -0.3601, -0.0726],
         [ 1.7019, -0.2690, -0.4833],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[-0.4000,  0.2280, -0.0531],
         [ 0.9400, -0.1664, -0.0234],
         [ 0.3305, -0.7436, -0.6359],
         [-0.1622, -0.1226, -1.1903]]], grad_fn=<IndexBackward0>)
attention output: tensor([[[0.4671, 0.2107, 0.1577]],

        [[0.4889, 0.0986, 0.1362]]], grad_fn=<AddBackward0>), 
 score: tensor([[[0.2413, 0.2558, 0.2515, 0.2515]],

        [[0.2515, 0.2533, 0.

In [7]:
vocab_size = 1000
embed_dim = 3
hidden_dim = 6

x1 = [1, 2]
x2 = [11, 22, 33, 44]
aspect1 = [9]
aspect2 = [8, 9]

x1_pad = pad_and_truncate(x1, 4)
x2_pad = pad_and_truncate(x2, 4)
aspect1_pad = pad_and_truncate(aspect1, 3)
aspect2_pad = pad_and_truncate(aspect2, 3)

x_batch = torch.tensor(np.vstack((x1_pad, x2_pad)))
aspect_batch = torch.tensor(np.vstack((aspect1_pad, aspect2_pad)))

print(f'x_batch: {x_batch}, \naspect_batch: {aspect_batch}')


atae = ATAE_LSTM(embed_dim, vocab_size, hidden_dim)
final_out = atae((x_batch, aspect_batch))
print(f'final_out: {final_out}')

x_batch: tensor([[ 1,  2,  0,  0],
        [11, 22, 33, 44]]), 
aspect_batch: tensor([[9, 0, 0],
        [8, 9, 0]])
text_indices: tensor([[ 1,  2,  0,  0],
        [11, 22, 33, 44]]), 
aspect_indices: tensor([[9, 0, 0],
        [8, 9, 0]])
x_len: tensor([2, 4])
x embed: tensor([[[ 1.7366,  0.8752, -1.0643],
         [ 0.6609,  1.9008, -1.6762],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[ 0.1033,  1.3022,  0.4987],
         [-0.7301,  0.4664,  2.2021],
         [ 0.0893, -0.5834, -2.1886],
         [-3.0773,  0.1663, -3.0898]]], grad_fn=<IndexBackward0>)
aspect embed: tensor([[[-0.7560, -0.6563,  1.7949],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[-1.5416,  0.0565,  0.8412],
         [-0.7560, -0.6563,  1.7949],
         [ 0.0000,  0.0000,  0.0000]]], grad_fn=<EmbeddingBackward0>)
torch.sum(aspect, dim=1): tensor([[-0.7560, -0.6563,  1.7949],
        [-2.2976, -0.5998,  2.6361]], grad_fn=<SumBackw