In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x124c4b310>

In [2]:
CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right
EMBEDDING_DIM = 10
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

# By deriving a set from `raw_text`, we deduplicate the array
vocab = set(raw_text)
vocab_size = len(vocab)

In [3]:
print ("vocab size: {}".format(vocab_size))

vocab size: 49


In [4]:
word_to_ix = {word: i for i, word in enumerate(vocab)}
data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    data.append((context, target))
print(data[:5])

[(['We', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to'), (['about', 'to', 'the', 'idea'], 'study'), (['to', 'study', 'idea', 'of'], 'the'), (['study', 'the', 'of', 'a'], 'idea')]


In [32]:
class CBOW(nn.Module):

    def __init__(self, vocab_size, embedding_dim, context_size):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(embedding_dim, vocab_size)

    def forward(self, inputs):
        embeds = self.embeddings(inputs).sum(dim=0).view((1, -1))
        out = self.linear1(embeds)
        out = F.log_softmax(out, dim=-1)
        return out

In [33]:
losses = []
loss_function = nn.NLLLoss()
model = CBOW(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE)
optimizer = optim.SGD(model.parameters(), lr=0.001)

In [34]:
for epoch in range(10):
    total_loss = 0
    for context, target in data:

        # Step 1. Prepare the inputs to be passed to the model (i.e, turn the words
        # into integer indices and wrap them in tensors)
        context_idxs = torch.tensor([word_to_ix[w] for w in context], dtype=torch.long)

        # Step 2. Recall that torch *accumulates* gradients. Before passing in a
        # new instance, you need to zero out the gradients from the old
        # instance
        model.zero_grad()

        # Step 3. Run the forward pass, getting log probabilities over next
        # words
        log_probs = model(context_idxs)

        # Step 4. Compute your loss function. (Again, Torch wants the target
        # word wrapped in a tensor)
        loss = loss_function(log_probs, torch.tensor([word_to_ix[target]], dtype=torch.long))

        # Step 5. Do the backward pass and update the gradient
        loss.backward()
        optimizer.step()

        # Get the Python number from a 1-element Tensor by calling tensor.item()
        total_loss += loss.item()
    losses.append(total_loss)
print(losses)  # The loss decreased every iteration over the training data!

[252.88486909866333, 250.99831771850586, 249.13452696800232, 247.2929413318634, 245.47303891181946, 243.67429780960083, 241.896258354187, 240.1384379863739, 238.40040063858032, 236.68173170089722]


In [35]:
print (word_to_ix.keys())

dict_keys(['called', 'conjure', 'with', 'create', 'spells.', 'idea', 'data.', 'beings', 'other', 'of', 'People', 'The', 'to', 'about', 'computer', 'spirits', 'study', 'is', 'our', 'the', 'effect,', 'process.', 'Computational', 'directed', 'pattern', 'computers.', 'rules', 'we', 'program.', 'We', 'In', 'processes.', 'they', 'abstract', 'process', 'processes', 'evolution', 'inhabit', 'by', 'computational', 'that', 'a', 'evolve,', 'manipulate', 'direct', 'As', 'are', 'things', 'programs'])


In [44]:
# print the embedding weights for words in the vocabulary 
input = torch.LongTensor([word_to_ix[w] for w in list(vocab)[:5]])
model.embeddings(input)

tensor([[ 0.8652, -1.2586,  0.3911,  1.0608,  2.0939,  0.0113,  0.1799, -1.2016,
         -0.7667,  2.2097],
        [ 1.2588,  0.1746,  0.5316, -0.3355,  0.7673, -0.4981, -1.3101, -1.0293,
          0.1556, -0.0675],
        [ 0.5467,  0.3988,  0.1760,  0.1769,  0.6784, -1.1280,  0.0095,  1.6059,
          0.7278,  0.7825],
        [ 1.0040, -0.1612, -1.3938, -1.1648, -0.2300, -0.6855, -0.4335, -1.6061,
         -0.1018, -0.1895],
        [ 0.2510, -1.1044, -0.3683,  0.3774, -0.1726, -0.8112, -0.0918,  0.2349,
         -1.8467, -1.2028]], grad_fn=<EmbeddingBackward>)