In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [9]:
CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

# By deriving a set from `raw_text`, we deduplicate the array
vocab = set(raw_text)
vocab_size = len(vocab)

word_to_idx = {word: i for i, word in enumerate(vocab)}
data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    data.append((context, target))
print(data[:5])

[(['We', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to'), (['about', 'to', 'the', 'idea'], 'study'), (['to', 'study', 'idea', 'of'], 'the'), (['study', 'the', 'of', 'a'], 'idea')]


In [17]:
class CBoW(nn.Module):
    
    def __init__(self, embed_dims, vocab_size, context_size):
        super(CBoW, self).__init__()
        self.embeds = nn.Embedding(vocab_size, embed_dims)
        
        self.l1 = nn.Linear(embed_dims * context_size * 2, 128)
        self.l2 = nn.Linear(128, vocab_size)
        
    
    def forward(self, inputs):
        output = self.embeds(inputs).view((1, -1))
        output = F.relu(self.l1(output))
        output = self.l2(output)
        log_probs = F.log_softmax(output, dim = 1)
        return log_probs


In [11]:
def make_context_vector(context, word_to_idx):
    idxs = [word_to_idx[w] for w in context]
    tensor = torch.LongTensor(idxs)
    return autograd.Variable(tensor)


make_context_vector(data[0][0], word_to_idx)  # example

Variable containing:
 24
 18
 34
 40
[torch.LongTensor of size 4]

In [18]:
# Define loss and optimizer
losses = []
loss_fn = nn.NLLLoss()
cbow_model = CBoW(10, vocab_size, CONTEXT_SIZE)
optimizer = optim.SGD(cbow_model.parameters(), lr = 1e-3)

In [20]:
# Train the model
for epoch in range(100):
    total_loss = torch.Tensor([0])
    for context, target in data:
        context_var = make_context_vector(context, word_to_idx)
        cbow_model.zero_grad()
        
        log_probs = cbow_model(context_var)
        loss = loss_fn(log_probs, autograd.Variable(torch.LongTensor([word_to_idx[target]])))
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.data
    losses.append(total_loss)
    
print(losses)

[
 224.5558
[torch.FloatTensor of size 1]
, 
 222.9171
[torch.FloatTensor of size 1]
, 
 221.2901
[torch.FloatTensor of size 1]
, 
 219.6741
[torch.FloatTensor of size 1]
, 
 218.0686
[torch.FloatTensor of size 1]
, 
 216.4718
[torch.FloatTensor of size 1]
, 
 214.8834
[torch.FloatTensor of size 1]
, 
 213.3023
[torch.FloatTensor of size 1]
, 
 211.7293
[torch.FloatTensor of size 1]
, 
 210.1639
[torch.FloatTensor of size 1]
, 
 208.6043
[torch.FloatTensor of size 1]
, 
 207.0513
[torch.FloatTensor of size 1]
, 
 205.5040
[torch.FloatTensor of size 1]
, 
 203.9620
[torch.FloatTensor of size 1]
, 
 202.4240
[torch.FloatTensor of size 1]
, 
 200.8908
[torch.FloatTensor of size 1]
, 
 199.3607
[torch.FloatTensor of size 1]
, 
 197.8350
[torch.FloatTensor of size 1]
, 
 196.3131
[torch.FloatTensor of size 1]
, 
 194.7925
[torch.FloatTensor of size 1]
, 
 193.2758
[torch.FloatTensor of size 1]
, 
 191.7612
[torch.FloatTensor of size 1]
, 
 190.2483
[torch.FloatTensor of size 1]
, 
 188.7385

In [24]:
# Testing model
context = ['idea', 'of', 'process', 'is']
context_var = make_context_vector(context, word_to_idx)
log_probs = cbow_model(context_var)

values, indices = log_probs.max(1)

print(values)
print(indices)
print(list(vocab)[indices.data[0]])

Variable containing:
-0.8660
[torch.FloatTensor of size 1]

Variable containing:
 29
[torch.LongTensor of size 1]

a
