In [None]:
"""
https://github.com/smafjal/continuous-bag-of-words-pytorch/blob/master/cbow_model_pytorch.py
"""

In [40]:
import torch
from torch import nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F

In [67]:
# Is CUDA available?

torch.cuda.is_available()

False

In [51]:
EMBEDDING_DIM = 100
EPOCH = 50
VERBOSE = 5
CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right

raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".lower().split()

# By deriving a set from `raw_text`, we deduplicate the array
vocab = set(raw_text)
vocab_size = len(vocab)
print("Vocab size:",vocab_size)
word_to_ix = {word: i for i, word in enumerate(vocab)}
ix_to_word = {word_to_ix[k]:k for k in word_to_ix}

print(word_to_ix)
print(ix_to_word)

Vocab size: 46
{'beings': 0, 'program.': 1, 'evolve,': 2, 'processes': 3, 'process': 4, 'study': 5, 'that': 6, 'by': 7, 'people': 8, 'data.': 9, 'computer': 10, 'directed': 11, 'effect,': 12, 'conjure': 13, 'programs': 14, 'rules': 15, 'things': 16, 'called': 17, 'direct': 18, 'create': 19, 'to': 20, 'processes.': 21, 'computational': 22, 'spells.': 23, 'spirits': 24, 'other': 25, 'idea': 26, 'inhabit': 27, 'is': 28, 'evolution': 29, 'a': 30, 'are': 31, 'with': 32, 'they': 33, 'as': 34, 'computers.': 35, 'we': 36, 'the': 37, 'process.': 38, 'our': 39, 'pattern': 40, 'abstract': 41, 'manipulate': 42, 'about': 43, 'in': 44, 'of': 45}
{0: 'beings', 1: 'program.', 2: 'evolve,', 3: 'processes', 4: 'process', 5: 'study', 6: 'that', 7: 'by', 8: 'people', 9: 'data.', 10: 'computer', 11: 'directed', 12: 'effect,', 13: 'conjure', 14: 'programs', 15: 'rules', 16: 'things', 17: 'called', 18: 'direct', 19: 'create', 20: 'to', 21: 'processes.', 22: 'computational', 23: 'spells.', 24: 'spirits', 25: 

In [52]:
data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    data.append((context, target))
print(data[:5])

[(['we', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to'), (['about', 'to', 'the', 'idea'], 'study'), (['to', 'study', 'idea', 'of'], 'the'), (['study', 'the', 'of', 'a'], 'idea')]


In [7]:
def make_context_vector(context, word_to_ix):
    #print("context")
    #print(context)
    #print("W2i")
    #print(word_to_ix)
    idxs = [word_to_ix[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)

In [24]:
context, target = data[0]
print(context)
context_idxs = make_context_vector(context, word_to_ix)
print(context_idxs)

['we', 'are', 'to', 'study']
tensor([36, 31, 20,  5])


In [25]:
embeds = nn.Embedding(vocab_size, EMBEDDING_DIM)
out = embeds(context_idxs)
print(out.shape)
out = out.view(1,-1)
print(out.shape)

torch.Size([4, 20])
torch.Size([1, 80])


In [26]:
lin1 = nn.Linear(CONTEXT_SIZE * 2 * EMBEDDING_DIM, 128)
out = lin1(out)
print(out.shape)

torch.Size([1, 128])


In [27]:
ac_fun = nn.ReLU()
out = ac_fun(out)
print(out.shape)

torch.Size([1, 128])


In [28]:
lin2 = nn.Linear(128, vocab_size)
out = lin2(out)
print(out.shape)

torch.Size([1, 46])


In [30]:
ac_fun2 = nn.LogSoftmax(dim = -1)
out = ac_fun2(out)
print(out.shape)

torch.Size([1, 46])


In [35]:
print(out)
print(out.argmax())
print(ix_to_word[out.argmax().item()])

tensor([[-4.0031, -3.9752, -3.8581, -3.5210, -4.0729, -4.1629, -4.1508, -3.4978,
         -3.8328, -3.8272, -4.0240, -4.1708, -3.8586, -3.4624, -4.2465, -4.1310,
         -3.8688, -3.6987, -3.8868, -3.7290, -3.7423, -3.6906, -3.9477, -3.5691,
         -3.9223, -3.8741, -3.7212, -3.6597, -3.6552, -3.7132, -3.6235, -3.7652,
         -4.1080, -3.8612, -3.7019, -3.9297, -3.8748, -3.8947, -3.8991, -3.5967,
         -3.8162, -3.8327, -4.0893, -4.1568, -3.3259, -4.2275]],
       grad_fn=<LogSoftmaxBackward>)
tensor(44)
in


In [53]:
class CBOW(nn.Module):

    def __init__(self, vocab_size, embed_dim, context_size):
        super(CBOW, self).__init__()
        self.embeds = nn.Embedding(vocab_size, embed_dim)
        self.lin1 = nn.Linear(context_size * 2 * embed_dim, 128)
        self.ac_fun1 = nn.ReLU()
        self.lin2 = nn.Linear(128, vocab_size)
        self.ac_fun2 = nn.LogSoftmax(dim = -1)

    def forward(self, inputs):
        out = self.embeds(inputs).view(1,-1)    ### This is the most important step. Resizing the tensor
        out = self.lin1(out)
        out = self.ac_fun1(out)
        out = self.lin2(out)
        out = self.ac_fun2(out)
        return out

In [54]:
model = CBOW(vocab_size, EMBEDDING_DIM, CONTEXT_SIZE)
losses = []
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

============================= Training

In [55]:
model.train()
for epoch in range(EPOCH):
    total_loss = 0
    for context, target in data:
        context_idxs = make_context_vector(context, word_to_ix)
        model.zero_grad()
        log_probs = model(context_idxs)
        target_word = torch.tensor([word_to_ix[target]], dtype=torch.long)
        loss = loss_function(log_probs, target_word)
        loss.backward()
        optimizer.step()

        # Get the python number from a 1-element Tensor
        total_loss += loss.item()

    mean_loss = total_loss/len(data)
    print("Epoch {}; Loss {:.4f}".format(epoch+1, mean_loss))

    losses.append(mean_loss)
print(losses)

Epoch 1; Loss 3.8707
Epoch 2; Loss 3.7828
Epoch 3; Loss 3.6971
Epoch 4; Loss 3.6131
Epoch 5; Loss 3.5308
Epoch 6; Loss 3.4498
Epoch 7; Loss 3.3695
Epoch 8; Loss 3.2896
Epoch 9; Loss 3.2098
Epoch 10; Loss 3.1307
Epoch 11; Loss 3.0517
Epoch 12; Loss 2.9725
Epoch 13; Loss 2.8933
Epoch 14; Loss 2.8141
Epoch 15; Loss 2.7346
Epoch 16; Loss 2.6548
Epoch 17; Loss 2.5749
Epoch 18; Loss 2.4948
Epoch 19; Loss 2.4152
Epoch 20; Loss 2.3360
Epoch 21; Loss 2.2580
Epoch 22; Loss 2.1808
Epoch 23; Loss 2.1046
Epoch 24; Loss 2.0296
Epoch 25; Loss 1.9555
Epoch 26; Loss 1.8825
Epoch 27; Loss 1.8107
Epoch 28; Loss 1.7402
Epoch 29; Loss 1.6708
Epoch 30; Loss 1.6027
Epoch 31; Loss 1.5358
Epoch 32; Loss 1.4700
Epoch 33; Loss 1.4056
Epoch 34; Loss 1.3426
Epoch 35; Loss 1.2815
Epoch 36; Loss 1.2216
Epoch 37; Loss 1.1634
Epoch 38; Loss 1.1073
Epoch 39; Loss 1.0529
Epoch 40; Loss 1.0004
Epoch 41; Loss 0.9500
Epoch 42; Loss 0.9015
Epoch 43; Loss 0.8552
Epoch 44; Loss 0.8111
Epoch 45; Loss 0.7688
Epoch 46; Loss 0.72

====================== TEST

In [56]:
def get_index_of_max(input):
    index = 0
    for i in range(1, len(input)):
        if input[i] > input[index]:
            index = i
    return index

def get_max_prob_result(input, ix_to_word):
    return ix_to_word[get_index_of_max(input)]

In [64]:
context = ["processes", "are", "beings", "that" ]
context_vector = make_context_vector(context, word_to_ix)
a = model(context_vector)
print(a)

tensor([[-4.9367, -5.5674, -5.0082, -4.1530, -4.9800, -4.2173, -4.6565, -4.3320,
         -5.2938, -4.9266, -5.2301, -4.9740, -4.4534, -4.7850, -4.3016, -4.8002,
         -4.9228, -5.0719, -4.7758, -5.9392, -4.9429, -4.7623, -5.1495, -5.1621,
         -4.7104, -5.2874, -4.4092, -4.6519, -4.4553, -4.1950, -3.9179, -6.0036,
         -4.3459, -4.9360, -4.7681, -5.0887, -5.8944, -4.8123, -4.8315, -5.8520,
         -4.7472, -0.5079, -5.4367, -4.1312, -5.5181, -3.5747]],
       grad_fn=<LogSoftmaxBackward>)


In [65]:
a = a.data.numpy()
print(a)

[[-4.936703   -5.5673943  -5.0082293  -4.1529584  -4.9800034  -4.217256
  -4.656454   -4.3319526  -5.293833   -4.926599   -5.230086   -4.973999
  -4.4534297  -4.784968   -4.3015747  -4.8002005  -4.9228454  -5.0718565
  -4.775781   -5.939238   -4.942948   -4.762255   -5.1494746  -5.162073
  -4.710438   -5.2873507  -4.409164   -4.651874   -4.4553475  -4.1950374
  -3.9179158  -6.003648   -4.345876   -4.935962   -4.7681184  -5.0886607
  -5.8944197  -4.81225    -4.83149    -5.852008   -4.7472315  -0.50793606
  -5.436667   -4.1312275  -5.5180883  -3.57473   ]]


In [66]:
print('Raw text: {}\n'.format(' '.join(raw_text)))
print('Context: {}\n'.format(context))
print('Prediction: {}'.format(get_max_prob_result(a[0], ix_to_word)))


Raw text: we are about to study the idea of a computational process. computational processes are abstract beings that inhabit computers. as they evolve, processes manipulate other abstract things called data. the evolution of a process is directed by a pattern of rules called a program. people create programs to direct processes. in effect, we conjure the spirits of the computer with our spells.

Context: ['processes', 'are', 'beings', 'that']

Prediction: abstract
