In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np

torch.manual_seed(1)

<torch._C.Generator at 0x26eeb905a10>

In [3]:
context_size = 2 # target 앞 뒤로 2개의 단어를 참조
raw_sentence =  """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

print(len(raw_sentence))

62


In [73]:
vocab = set(raw_sentence)
word2idx = {w: idx for idx, w in enumerate(vocab)}
idx2word = {idx: w for w, idx in word2idx.items()}

In [11]:
vocab_size = len(vocab)
embedding_dim = 10
data = []
for i in range(2, len(raw_sentence)-2):
    context = [raw_sentence[i-2], raw_sentence[i-1],
              raw_sentence[i+1], raw_sentence[i+2]]
    target = [raw_sentence[i]]
    data.append((context, target))

In [19]:
print(data[0][0]) # context
print(data[0][1]) # target

['We', 'are', 'to', 'study']
['about']


In [43]:
class CBOW(nn.Module):
    def __init__(self, vocab_size, embedding_dim, context_size):
        super(CBOW, self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(context_size*embedding_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)
        
    def forward(self, x):
        out = F.relu(self.linear1(self.embedding(x).view(1, -1)))
        out = self.linear2(out)
        out = F.log_softmax(out, dim=1)
        return out

In [48]:
criterion = nn.NLLLoss()
model = CBOW(vocab_size, embedding_dim, context_size*2) # 앞 뒤로 2개씩 들어가서
optimizer = optim.SGD(model.parameters(), lr=0.001)
print(model)

CBOW(
  (embedding): Embedding(49, 10)
  (linear1): Linear(in_features=40, out_features=128, bias=True)
  (linear2): Linear(in_features=128, out_features=49, bias=True)
)


In [49]:
# word vector to index vector
def wordvec_to_idxvec(vec, word2idx):
    idxs = [word2idx[w] for w in vec]
    return idxs

In [50]:
# training
for epoch in range(10):
    losses = []
    for step, (context, target) in enumerate(data):
        context = wordvec_to_idxvec(context, word2idx)
        target = wordvec_to_idxvec(target, word2idx)
        
        context = torch.LongTensor(context)
        target = torch.LongTensor(target)
        
        outputs = model(context)
        loss = criterion(outputs, target)
        
#         print(outputs)
#         print(target)
        
#         print(loss.item())
        
        loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
    print("[%d/%d] loss:%.3f" % (epoch+1, 10, np.mean(losses)))

[1/10] loss:3.972
[2/10] loss:2.813
[3/10] loss:1.250
[4/10] loss:0.122
[5/10] loss:0.026
[6/10] loss:0.001
[7/10] loss:0.008
[8/10] loss:0.000
[9/10] loss:0.000
[10/10] loss:0.000


In [66]:
# test 
test_sentence = "processes are abstract beings that"
test_sentence = test_sentence.split()
test_context = [test_sentence[0], test_sentence[1], test_sentence[3], test_sentence[4]]
test_target = [test_sentence[2]]

print(test_context)
print(test_target)

['processes', 'are', 'beings', 'that']
['abstract']


In [68]:
context_vec = wordvec_to_idxvec(test_context, word2idx)
target_vec = wordvec_to_idxvec(test_target, word2idx)
context_vec, target_vec = torch.LongTensor(context_vec), torch.LongTensor(target_vec)
print(context_vec)
print(target_vec)

tensor([  8,  25,   4,  19])
tensor([ 27])


In [72]:
outputs = model(context_vec)
pred, idx = torch.max(outputs, dim=1)

result = idx2word[idx.item()]
print("Predicted result:", result)
print("Actual target:", test_target[0])

Predicted result: abstract
Actual target: abstract
