## Data

In [1]:
import torch
import torch.nn as nn
import torchtext; torchtext.disable_torchtext_deprecation_warning()
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

corpus = [
    "i like a dog",
    "books are expensive"    
]
data_size = len(corpus)

# 0: noun/pronoun - 1: verb - others - 2
labels = [[0, 1, 2, 0],
          [0, 1, 2]]

# Define the max vocabulary size and sequence length
vocab_size = 9
sequence_length = 4

In [2]:
# Define tokenizer function
tokenizer = get_tokenizer('basic_english')

# Create a function to yield list of tokens
def yield_tokens(examples):
    for text in examples:
        yield tokenizer(text)

# Create vocabulary
vocab = build_vocab_from_iterator(yield_tokens(corpus),
                                  max_tokens=vocab_size,
                                  specials=["<unk>", "<pad>"])
vocab.set_default_index(vocab["<unk>"])
vocab.get_stoi()

{'dog': 5,
 'books': 4,
 'like': 8,
 'i': 7,
 'are': 3,
 'a': 2,
 'expensive': 6,
 '<pad>': 1,
 '<unk>': 0}

In [3]:
# Tokenize and numericalize your samples
def vectorize(text, vocab, sequence_length, sequence_label):
    tokens = tokenizer(text)
    
    token_ids = [vocab[token] for token in tokens]    
    token_ids = token_ids + [vocab["<pad>"]] * (sequence_length - len(tokens))
    sequence_label = sequence_label + [3] * (sequence_length - len(tokens))
    
    return torch.tensor(token_ids, dtype=torch.long), torch.tensor(sequence_label, dtype=torch.long)

# Vectorize the samples
sentence_vecs = []
label_vecs = []
for sentence, labels in zip(corpus, labels):
    sentence_vec, labels_vec = vectorize(sentence, vocab, sequence_length, labels)
    sentence_vecs.append(sentence_vec)
    label_vecs.append(labels_vec)

In [4]:
for v in sentence_vecs:
    print(v)

tensor([7, 8, 2, 5])
tensor([4, 3, 6, 1])


In [5]:
for v in label_vecs:
    print(v)

tensor([0, 1, 2, 0])
tensor([0, 1, 2, 3])


## Model

In [6]:
class TransformerBlock(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim=embed_dim, 
                                          num_heads=num_heads)
        self.ffn = nn.Linear(in_features=embed_dim, 
                             out_features=ff_dim)
        self.layernorm_1 = nn.LayerNorm(normalized_shape=embed_dim)
        self.layernorm_2 = nn.LayerNorm(normalized_shape=embed_dim)

    def forward(self, query, key, value):
        attn_output, _ = self.attn(query, key, value)
        out_1 = self.layernorm_1(query + attn_output)
        ffn_output = self.ffn(out_1)
        x = self.layernorm_2(out_1 + ffn_output)
        
        return x

In [7]:
class POS_Model(nn.Module):
    def __init__(self, vocab_size, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, 4)
        self.transformer = TransformerBlock(4, 1, 4)

    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer(x, x, x)
        return x.permute(0, 2, 1)

num_classes = 4
model = POS_Model(vocab_size, num_classes)

# test
input = torch.tensor([[7, 8, 2, 5]], dtype=torch.long)
output = model(input)
print(output.shape)

torch.Size([1, 4, 4])


## Train with full data

In [8]:
criterion = nn.CrossEntropyLoss(ignore_index=3)
optimizer = torch.optim.Adam(model.parameters(), lr=0.2)

In [9]:
input_data = torch.tensor( [[7, 8, 2, 5],
                            [4, 3, 6, 1]], dtype=torch.long)
label_data = torch.tensor([[0, 1, 2, 0],
                           [0, 1, 2, 3]], dtype=torch.long)

for _ in range(15):
    optimizer.zero_grad()
    outputs = model(input_data)
    loss = criterion(outputs, label_data)
    print(loss.item())
    loss.backward()
    optimizer.step()

1.988482117652893
1.2319965362548828
0.9852806925773621
0.8991092443466187
0.7803830504417419
0.732030987739563
0.6829239726066589
0.6356580853462219
0.5949907302856445
0.5630892515182495
0.5413790345191956
0.5278937220573425
0.5173899531364441
0.5053364634513855
0.4895307421684265


In [10]:
outputs = model(input_data)
o_softmax = torch.softmax(outputs, axis=1)

print(o_softmax[0, :, 0].detach().numpy())
print(o_softmax[0, :, 1].detach().numpy())
print(o_softmax[0, :, 2].detach().numpy())
print(o_softmax[0, :, 3].detach().numpy())

print()
print(o_softmax[1, :, 0].detach().numpy())
print(o_softmax[1, :, 1].detach().numpy())
print(o_softmax[1, :, 2].detach().numpy())
print(o_softmax[1, :, 3].detach().numpy())

[0.59873897 0.00551746 0.38744986 0.00829372]
[3.3286240e-02 9.5723909e-01 9.1784503e-03 2.9623765e-04]
[0.58609414 0.00520172 0.39907968 0.00962446]
[0.7214994  0.04390387 0.2338018  0.00079492]

[0.5995267  0.00552838 0.38668054 0.00826439]
[3.3451356e-02 9.5717150e-01 9.0853656e-03 2.9170804e-04]
[0.58767104 0.00523364 0.39762446 0.00947084]
[0.56975514 0.0050397  0.41484427 0.01036088]


In [11]:
# [[0, 1, 2, 0],
#  [0, 1, 2, *]]

o_softmax.argmax(axis=1)

tensor([[0, 1, 0, 0],
        [0, 1, 0, 0]])