In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

context_size = 2
embed_dim = 10
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

vocab = set(raw_text)
vocab_size = len(vocab)

word_to_ix = {word: i for i, word in enumerate(vocab)}
data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    data.append((context, target))
print(data[:5])

[(['We', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to'), (['about', 'to', 'the', 'idea'], 'study'), (['to', 'study', 'idea', 'of'], 'the'), (['study', 'the', 'of', 'a'], 'idea')]


In [2]:
class CBOW(nn.Module):
    def __init__(self,vocab_size, embed_dim, context_size):
        super(CBOW, self).__init__()
        self.embedd = nn.Embedding(vocab_size, embed_dim)
        self.linear1 = nn.Linear(2*context_size*embed_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)
    def forward(self, inputs):
        embedd = self.embedd(inputs).view(1,-1)
        out = F.relu(self.linear1(embedd))
        out = self.linear2(out)
        return F.log_softmax(out, dim=1)
        
model = CBOW(vocab_size, embed_dim, context_size)
print(model)

CBOW(
  (embedd): Embedding(49, 10)
  (linear1): Linear(in_features=40, out_features=128)
  (linear2): Linear(in_features=128, out_features=49)
)


In [3]:
def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    tensor = torch.LongTensor(idxs)
    return autograd.Variable(tensor)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.001)
total_loss= 0
losses = []

for epochs in range(10):
    for context, target in data:
        context_var = make_context_vector(context, word_to_ix)

        model.zero_grad()

        log_probs = model(context_var)
        loss = loss_fn(log_probs, autograd.Variable(
            torch.LongTensor([word_to_ix[target]])))
        loss.backward()
        optimizer.step()

        total_loss += loss.data[0]
    losses.append(total_loss)
print(losses)

[230.79164385795593, 459.7260811328888, 686.8189218044281, 912.0851876735687, 1135.5400738716125, 1357.195882320404, 1577.0642130374908, 1795.157002210617, 2011.484936952591, 2226.056624650955]
