In [1]:
import torch
from torch import nn, optim
from torch.autograd import Variable
import torch.nn.functional as F

In [2]:
# The Continuous Bag-of-Words model (CBOW) is frequently used in NLP deep learning.
#It is a model that tries to predict words given the context 
#of a few words before and a few words after the target word. 
#This is distinct from language modeling, since CBOW is not sequential and does not have to be probabilistic. 
#Typcially, CBOW is used to quickly train word embeddings, 
#and these embeddings are used to initialize the embeddings of some more complicated model. 
#Usually, this is referred to as pretraining embeddings. It almost always helps performance a couple of percent.

CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

vocab = set(raw_text)
word_to_idx = {word:i for i, word in enumerate(vocab)}
idx_to_word = {word_to_idx[word]: word for word in word_to_idx}

In [3]:
data=[]
for i in range(CONTEXT_SIZE,len(raw_text)-CONTEXT_SIZE):
    context = [
        raw_text[i - 2], raw_text[i - 1], raw_text[i + 1], raw_text[i + 2]
    ]
    target = raw_text[i]
    data.append((context, target))

In [4]:
class CBOW(nn.Module):
    def __init__(self, n_word, n_dim, context_size):
        super(CBOW, self).__init__()
        self.embedding = nn.Embedding(n_word, n_dim)
        self.project = nn.Linear(n_dim, n_dim, bias=False)
        self.linear1 = nn.Linear(n_dim, 128)
        self.linear2 = nn.Linear(128, n_word)

    def forward(self, x):
        x = self.embedding(x)
        x = self.project(x)
        x = torch.sum(x, 0)
        x = self.linear1(x)
        x = F.relu(x, inplace=True)
        x = self.linear2(x)
        x = F.log_softmax(x)
        return x


model = CBOW(len(word_to_idx), 100, CONTEXT_SIZE)

model = model.cuda()

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3)

In [5]:
for epoch in range(100):
    print('epoch{}'.format(epoch))
    print('*'*10)
    running_loss = 0
    for word in data:
        context, target = word
        context = Variable(torch.LongTensor([word_to_idx[i] for i in context]))
        target = Variable(torch.LongTensor([word_to_idx[target]]))
        context = context.cuda()
        target = target.cuda()
        # forward
        out = model(context)
        loss = criterion(out,target)
        running_loss += loss.data[0]
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print('loss: {:.6f}'.format(running_loss / len(data)))

epoch0
**********
loss: 3.920053
epoch1
**********
loss: 3.859071
epoch2
**********
loss: 3.800043
epoch3
**********
loss: 3.742670
epoch4
**********
loss: 3.686319
epoch5
**********
loss: 3.630465
epoch6
**********
loss: 3.575054
epoch7
**********
loss: 3.520260
epoch8
**********
loss: 3.465271
epoch9
**********
loss: 3.410168
epoch10
**********
loss: 3.354805
epoch11
**********
loss: 3.299028
epoch12
**********
loss: 3.242306
epoch13
**********
loss: 3.184932
epoch14
**********
loss: 3.126474
epoch15
**********
loss: 3.066969
epoch16
**********
loss: 3.006021
epoch17
**********
loss: 2.944084
epoch18
**********
loss: 2.881287
epoch19
**********
loss: 2.818032
epoch20
**********
loss: 2.754190
epoch21
**********
loss: 2.690403
epoch22
**********
loss: 2.626366
epoch23
**********
loss: 2.562176
epoch24
**********
loss: 2.498178
epoch25
**********
loss: 2.434214
epoch26
**********
loss: 2.370172
epoch27
**********
loss: 2.306383
epoch28
**********
loss: 2.242408
epoch29
**********
loss:

In [6]:
context, target = data[5]

In [7]:
context = Variable(torch.LongTensor([word_to_idx[i] for i in context])).cuda()


In [8]:
context

Variable containing:
 41
 37
 34
 27
[torch.cuda.LongTensor of size 4 (GPU 0)]

In [9]:
out = model(context)

In [10]:
_, predict_label = torch.max(out, 1)
predict_word = idx_to_word[predict_label.data[0][0]]
print('real word is {}, predict word is {}'.format(target, predict_word))

real word is of, predict word is of
