## Data

In [1]:
import torch
import torch.nn as nn
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

corpus = [
    "karpathy is working in openai",
    "geoffrey hinton is from canada"    
]
data_size = len(corpus)

# 0: B-Person - 1: I-Person
# 2: B-Org./Location - 3: I-Org./Location 
# 4: O
labels = [[0, 4, 4, 4, 2],
          [0, 1, 4, 4, 2]]

# Define the max vocabulary size and sequence length
vocab_size = 11
sequence_length = 5
num_classes = 5 + 1

In [2]:
# Define tokenizer function
tokenizer = get_tokenizer('basic_english')

# Create a function to yield list of tokens
def yield_tokens(examples):
    for text in examples:
        yield tokenizer(text)

# Create vocabulary
vocab = build_vocab_from_iterator(yield_tokens(corpus),
                                  max_tokens=vocab_size,
                                  specials=["<unk>", "<pad>"])
vocab.set_default_index(vocab["<unk>"])
vocab.get_stoi()

{'karpathy': 8,
 'hinton': 6,
 '<unk>': 0,
 'geoffrey': 5,
 'in': 7,
 '<pad>': 1,
 'from': 4,
 'is': 2,
 'canada': 3,
 'openai': 9,
 'working': 10}

In [3]:
# Tokenize and numericalize your samples
def vectorize(text, vocab, sequence_length, sequence_label):
    tokens = tokenizer(text)
    
    token_ids = [vocab[token] for token in tokens]    
    token_ids = token_ids + [vocab["<pad>"]] * (sequence_length - len(tokens))
    sequence_label = sequence_label + [5] * (sequence_length - len(tokens))
    
    return torch.tensor(token_ids, dtype=torch.long), torch.tensor(sequence_label, dtype=torch.long)

# Vectorize the samples
sentence_vecs = []
label_vecs = []
for sentence, labels in zip(corpus, labels):
    sentence_vec, labels_vec = vectorize(sentence, vocab, sequence_length, labels)
    sentence_vecs.append(sentence_vec)
    label_vecs.append(labels_vec)

In [4]:
for v in sentence_vecs:
    print(v)

tensor([ 8,  2, 10,  7,  9])
tensor([5, 6, 2, 4, 3])


In [5]:
for v in label_vecs:
    print(v)

tensor([0, 4, 4, 4, 2])
tensor([0, 1, 4, 4, 2])


## Model

In [6]:
class POS_Model(nn.Module):
    def __init__(self, vocab_size, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, 4)
        self.conv1d = nn.Conv1d(4, num_classes, kernel_size=2, padding='same')

    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(0, 2, 1)
        x = self.conv1d(x)
        return x

# create model
model = POS_Model(vocab_size, num_classes)

# test
data = torch.tensor([[0, 4, 4, 4, 2]]).long()
output = model(data)
print(output.shape)

torch.Size([1, 6, 5])


  return F.conv1d(input, weight, bias, self.stride,


## Train with full data

In [7]:
criterion = nn.CrossEntropyLoss(ignore_index=5)
optimizer = torch.optim.Adam(model.parameters(), lr=0.2)

In [8]:
input_data = torch.tensor( [[ 8,  2, 10,  7,  9],
                            [5, 6, 2, 4, 3]], dtype=torch.long)
label_data = torch.tensor([[0, 4, 4, 4, 2],
                           [0, 1, 4, 4, 2]], dtype=torch.long)

for _ in range(25):
    optimizer.zero_grad()
    outputs = model(input_data)
    loss = criterion(outputs, label_data)
    print(loss.item())
    loss.backward()
    optimizer.step()

2.0805439949035645
1.139344334602356
0.6254342794418335
0.33526140451431274
0.16110697388648987
0.06471849977970123
0.022986019030213356
0.0088480981066823
0.003919375594705343
0.0018540618475526571
0.0008953640353865921
0.0004395248834043741
0.0002211297396570444
0.00011506187729537487
6.234012835193425e-05
3.534366987878457e-05
2.099214725603815e-05
1.3124727047397755e-05
8.606826668255962e-06
5.924665856582578e-06
4.2438332457095385e-06
3.218641268176725e-06
2.491468421794707e-06
2.0265540570107987e-06
1.68084807228297e-06


In [9]:
input_data = torch.tensor( [[ 8,  2, 10,  7,  9],
                            [5, 6, 2, 4, 3]], dtype=torch.long)
label_data = torch.tensor([[0, 4, 4, 4, 2],
                           [0, 1, 4, 4, 2]], dtype=torch.long)

Z = model(input_data) # N, C, L

In [10]:
Z.argmax(axis=1)

tensor([[0, 4, 4, 4, 2],
        [0, 1, 4, 4, 2]])

In [11]:
o_softmax = torch.softmax(Z, axis=1)
o_softmax.argmax(axis=1)

tensor([[0, 4, 4, 4, 2],
        [0, 1, 4, 4, 2]])

In [12]:
outputs = model(input_data)
o_softmax = torch.softmax(outputs, axis=1)

print(o_softmax[0, :, 0].detach().numpy())
print(o_softmax[0, :, 1].detach().numpy())
print(o_softmax[0, :, 2].detach().numpy())
print(o_softmax[0, :, 3].detach().numpy())
print(o_softmax[0, :, 4].detach().numpy())

print()
print(o_softmax[1, :, 0].detach().numpy())
print(o_softmax[1, :, 1].detach().numpy())
print(o_softmax[1, :, 2].detach().numpy())
print(o_softmax[1, :, 3].detach().numpy())
print(o_softmax[1, :, 4].detach().numpy())

[9.9999881e-01 1.0772566e-08 1.1512000e-06 2.7031995e-13 1.0121404e-10
 1.2862355e-14]
[1.2865807e-12 2.8637978e-06 2.5172731e-16 1.5602674e-12 9.9999714e-01
 8.9435343e-14]
[5.4560978e-18 3.5073988e-16 8.6888309e-12 1.6754290e-19 1.0000000e+00
 1.8383869e-19]
[4.098677e-20 1.299504e-20 6.504203e-07 8.292677e-19 9.999994e-01
 8.617894e-18]
[9.6515504e-11 1.4982250e-16 1.0000000e+00 2.5962208e-13 4.0856523e-09
 3.8069118e-12]

[9.9999785e-01 4.2800050e-08 2.1014521e-06 1.3001085e-10 8.6452434e-18
 1.6039803e-11]
[2.1398057e-06 9.9999785e-01 5.9901866e-19 3.1403891e-10 1.2665124e-11
 3.7594979e-11]
[1.4481269e-12 3.7619438e-07 3.1771391e-18 5.5591470e-14 9.9999964e-01
 1.4945150e-15]
[3.6134446e-18 1.2834756e-15 4.9359019e-06 1.0818972e-13 9.9999511e-01
 1.8772615e-13]
[8.6265850e-10 6.7586252e-16 1.0000000e+00 1.7979054e-11 3.3075562e-10
 2.2178791e-10]


In [13]:
# labels = [[0, 4, 4, 4, 2],
#           [0, 1, 4, 4, 2]]

o_softmax.argmax(axis=1)

tensor([[0, 4, 4, 4, 2],
        [0, 1, 4, 4, 2]])