In [11]:
training_story = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

# By deriving a set from `training_story`, we deduplicate the array
vocabulary = list(set(training_story))

In [12]:
import torch
import torch.nn as nn

CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right
EMDEDDING_DIM = 100

def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context] 
    return torch.tensor(idxs, dtype=torch.long)

vocabulary_size = len(vocabulary)
word_to_ix = {word:ix for ix, word in enumerate(vocabulary)}
ix_to_word = {ix:word for ix, word in enumerate(vocabulary)}

training_data = []
for i in range(2, len(training_story) - 2):
    context = [training_story[i - 2], training_story[i - 1],
               training_story[i + 1], training_story[i + 2]]
    target = training_story[i]
    training_data.append((context, target))

In [13]:
class CBOW(torch.nn.Module):
    def __init__(self, vocabulary_size, embedding_dim):
        super(CBOW, self).__init__()
        
        #out: 1 x emdedding_dim
        self.embeddings = nn.Embedding(vocabulary_size, embedding_dim)
        self.linear1 = nn.Linear(embedding_dim, 128)
        self.activation_function1 = nn.ReLU()  

        #out: 1 x vocabulary_size
        self.linear2 = nn.Linear(128, vocabulary_size)
        self.activation_function2 = nn.LogSoftmax(dim = -1)
        
    def forward(self, inputs):
        embeds = sum(self.embeddings(inputs)).view(1,-1)
        out = self.linear1(embeds)
        out = self.activation_function1(out)
        out = self.linear2(out)
        out = self.activation_function2(out)
        return out

    def get_word_emdedding(self, word):
        word = torch.tensor([word_to_ix[word]])
        return self.embeddings(word).view(1,-1)

In [14]:
model = CBOW(vocabulary_size, EMDEDDING_DIM)

loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [23]:
# for context, target in training_data[20:21]:
#     print(context)
training_data[20][0]

['they', 'evolve,', 'manipulate', 'other']

In [26]:
for epoch in range(50):
    total_loss = 0

    for context, target in training_data:
        context_vector = make_context_vector(context, word_to_ix)  
        log_probs = model(context_vector)
        total_loss += loss_function(log_probs, torch.tensor([word_to_ix[target]]))

    #optimize at the end of each epoch
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()

#TESTING
data = training_data[20]
context = data[0]
context_vector = make_context_vector(context, word_to_ix)
a = model(context_vector)

print(f'Context: {context}\n')
print(f'Fact: {data[1]}')
print(f'Prediction: {ix_to_word[torch.argmax(a[0]).item()]}')

Context: ['they', 'evolve,', 'manipulate', 'other']

Fact: processes
Prediction: processes
