## Data

In [1]:
import torch
import torch.nn as nn
import torchtext; torchtext.disable_torchtext_deprecation_warning()
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

corpus = [
    "i like cats and dogs",
    "books are expensive"    
]
data_size = len(corpus)

# 0: noun/pronoun - 1: verb - others - 2
labels = [[0, 1, 0, 2, 0],
          [0, 1, 2]]

# Define the max vocabulary size and sequence length
vocab_size = 10
sequence_length = 5

In [2]:
# Define tokenizer function
tokenizer = get_tokenizer('basic_english')

# Create a function to yield list of tokens
def yield_tokens(examples):
    for text in examples:
        yield tokenizer(text)

# Create vocabulary
vocab = build_vocab_from_iterator(yield_tokens(corpus),
                                  max_tokens=vocab_size,
                                  specials=["<unk>", "<pad>"])
vocab.set_default_index(vocab["<unk>"])
vocab.get_stoi()

{'dogs': 6,
 'books': 4,
 'like': 9,
 'i': 8,
 'cats': 5,
 'are': 3,
 'expensive': 7,
 'and': 2,
 '<pad>': 1,
 '<unk>': 0}

In [3]:
# Tokenize and numericalize your samples
def vectorize(text, vocab, sequence_length, sequence_label):
    tokens = tokenizer(text)
    
    token_ids = [vocab[token] for token in tokens]    
    token_ids = token_ids + [vocab["<pad>"]] * (sequence_length - len(tokens))
    sequence_label = sequence_label + [3] * (sequence_length - len(tokens))
    
    return torch.tensor(token_ids, dtype=torch.long), torch.tensor(sequence_label, dtype=torch.long)

# Vectorize the samples
sentence_vecs = []
label_vecs = []
for sentence, labels in zip(corpus, labels):
    sentence_vec, labels_vec = vectorize(sentence, vocab, sequence_length, labels)
    sentence_vecs.append(sentence_vec)
    label_vecs.append(labels_vec)

In [4]:
for v in sentence_vecs:
    print(v)

tensor([8, 9, 5, 2, 6])
tensor([4, 3, 7, 1, 1])


In [5]:
for v in label_vecs:
    print(v)

tensor([0, 1, 0, 2, 0])
tensor([0, 1, 2, 3, 3])


## Model

In [6]:
class TransformerBlock(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim=embed_dim, 
                                          num_heads=num_heads)
        self.ffn = nn.Linear(in_features=embed_dim, 
                             out_features=ff_dim)
        self.layernorm_1 = nn.LayerNorm(normalized_shape=embed_dim)
        self.layernorm_2 = nn.LayerNorm(normalized_shape=embed_dim)

    def forward(self, query, key, value):
        attn_output, _ = self.attn(query, key, value)
        out_1 = self.layernorm_1(query + attn_output)
        ffn_output = self.ffn(out_1)
        x = self.layernorm_2(out_1 + ffn_output)
        
        return x

In [7]:
class POS_Model(nn.Module):
    def __init__(self, vocab_size, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, 4)
        self.transformer = TransformerBlock(4, 1, 4)

    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer(x, x, x)
        return x.permute(0, 2, 1)

num_classes = 4
model = POS_Model(vocab_size, num_classes)

# test
data = torch.tensor([[3, 2, 3, 2, 1]]).long()
output = model(data)
print(output.shape)

torch.Size([1, 4, 5])


## Train with full data

In [8]:
criterion = nn.CrossEntropyLoss(ignore_index=3)
optimizer = torch.optim.Adam(model.parameters(), lr=0.2)

In [9]:
input_data = torch.tensor( [[8, 9, 5, 2, 6],
                            [4, 3, 7, 1, 1]], dtype=torch.long)
label_data = torch.tensor([[0, 1, 0, 2, 0],
                           [0, 1, 2, 3, 3]], dtype=torch.long)

for _ in range(30):
    optimizer.zero_grad()
    outputs = model(input_data)
    loss = criterion(outputs, label_data)
    print(loss.item())
    loss.backward()
    optimizer.step()

2.3551383018493652
1.2236855030059814
0.8930372595787048
0.7608769536018372
0.7075272798538208
0.6068465709686279
0.4954513907432556
0.3880462348461151
0.2865082323551178
0.22473940253257751
0.17101509869098663
0.1228785440325737
0.08610690385103226
0.062026482075452805
0.04746752232313156
0.03917742520570755
0.030918743461370468
0.02529299631714821
0.02039281837642193
0.0164200346916914
0.01336493156850338
0.01106932107359171
0.009313876740634441
0.007914122194051743
0.006764868274331093
0.005825435742735863
0.00507664680480957
0.004495302215218544
0.0040522608906030655
0.0037174136377871037


In [10]:
outputs = model(input_data)
o_softmax = torch.softmax(outputs, axis=1)

print(o_softmax[0, :, 0].detach().numpy())
print(o_softmax[0, :, 1].detach().numpy())
print(o_softmax[0, :, 2].detach().numpy())
print(o_softmax[0, :, 3].detach().numpy())
print(o_softmax[0, :, 4].detach().numpy())

print()
print(o_softmax[1, :, 0].detach().numpy())
print(o_softmax[1, :, 1].detach().numpy())
print(o_softmax[1, :, 2].detach().numpy())
print(o_softmax[1, :, 3].detach().numpy())
print(o_softmax[1, :, 4].detach().numpy())

[9.9727160e-01 1.4023897e-03 1.1621624e-03 1.6378172e-04]
[3.0090823e-03 9.9515307e-01 1.3992398e-03 4.3855415e-04]
[9.9642295e-01 7.3190132e-04 2.6797473e-03 1.6530434e-04]
[1.5782492e-03 7.6615403e-04 9.9746144e-01 1.9422935e-04]
[9.9685717e-01 2.1324824e-03 8.3606428e-04 1.7419037e-04]

[9.9726784e-01 1.4327981e-03 1.1348557e-03 1.6452580e-04]
[3.8401443e-03 9.9455607e-01 1.1618974e-03 4.4184385e-04]
[1.6727662e-03 7.3092937e-04 9.9739850e-01 1.9783550e-04]
[9.8695999e-01 1.2293526e-02 4.6303714e-04 2.8339788e-04]
[9.9375522e-01 5.4823956e-03 5.4428866e-04 2.1796297e-04]


In [11]:
# [[0, 1, 0, 2, 0],
#  [0, 1, 2, *, *]]

o_softmax.argmax(axis=1)

tensor([[0, 1, 0, 2, 0],
        [0, 1, 2, 0, 0]])