In [1]:
import torch
import torch.nn as nn
import torch.autograd as autograd
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x1002c1b88>

In [2]:
CONTEXT_SIZE = 2 # 2 words to the left, 2 to the right
EMBEDDING_DIM = 5

raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

In [3]:
vocab = set(raw_text)
word_to_ix = {word: i for i, word in enumerate(vocab)}

In [4]:
data = [([raw_text[i-2],raw_text[i-1],raw_text[i+1],raw_text[i+2]], raw_text[i]) for i in range(2, len(raw_text) - 2)]

In [5]:
class CBOW(nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
    def forward(self, inputs):
        e = self.embeddings(inputs)
        i = torch.mean(e, 0)
        o = self.embeddings(autograd.Variable(torch.LongTensor(range(len(vocab)))))
        #out = torch.mm(o, torch.t(i))
        out = torch.mm(i, torch.t(o))
        log_probs = F.log_softmax(out)
        return log_probs
        
def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    tensor = torch.LongTensor(idxs)
    return autograd.Variable(tensor)

In [6]:
losses = []
loss_function = nn.NLLLoss()
model = CBOW(len(vocab), EMBEDDING_DIM)
optimizer = optim.SGD(model.parameters(), lr=0.001)

In [7]:
for epoch in range(10):
    total_loss = torch.Tensor([0])
    for context, target in data:
        context_var = make_context_vector(context, word_to_ix)
        model.zero_grad()
        log_probs = model(context_var)
        loss = loss_function(log_probs, autograd.Variable(
            torch.LongTensor([word_to_ix[target]])))
        loss.backward()
        optimizer.step()
        total_loss += loss.data
    losses.append(total_loss)

In [8]:
losses

[
  269.9901
 [torch.FloatTensor of size 1], 
  269.6921
 [torch.FloatTensor of size 1], 
  269.3954
 [torch.FloatTensor of size 1], 
  269.1000
 [torch.FloatTensor of size 1], 
  268.8058
 [torch.FloatTensor of size 1], 
  268.5129
 [torch.FloatTensor of size 1], 
  268.2212
 [torch.FloatTensor of size 1], 
  267.9307
 [torch.FloatTensor of size 1], 
  267.6413
 [torch.FloatTensor of size 1], 
  267.3531
 [torch.FloatTensor of size 1]]