## Objective: To train a model using SGD and populate word embeddings for the corpus

In [42]:
import torch
import torch.nn as nn
import itertools


CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right
EMDEDDING_DIM = 100

In [33]:
## To create tensor which entails indexes of context
def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)

In [34]:
raw_text = """We are about to study the idea of a computational process. Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data. The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect, we conjure the spirits of the computer with our spells.""".split()

In [35]:
vocab = set(raw_text) # Total unique words in the raw text
# vocab

In [36]:
vocab_size = len(vocab)  #Size of vocabulary
vocab_size

49

In [37]:
word_to_ix = {word:ix for ix, word in enumerate(vocab)} #Word to index mapping
ix_to_word = {ix:word for ix, word in enumerate(vocab)} #Index to word mapping, to decode the text

In [40]:
dict(itertools.islice(word_to_ix.items(), 5))

{'things': 0, 'conjure': 1, 'We': 2, 'study': 3, 'idea': 4}

In [41]:
dict(itertools.islice(ix_to_word.items(), 5))

{0: 'things', 1: 'conjure', 2: 'We', 3: 'study', 4: 'idea'}

In [13]:
data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    data.append((context, target))

In [15]:
data # Features and labels

[(['We', 'are', 'to', 'study'], 'about'),
 (['are', 'about', 'study', 'the'], 'to'),
 (['about', 'to', 'the', 'idea'], 'study'),
 (['to', 'study', 'idea', 'of'], 'the'),
 (['study', 'the', 'of', 'a'], 'idea'),
 (['the', 'idea', 'a', 'computational'], 'of'),
 (['idea', 'of', 'computational', 'process.'], 'a'),
 (['of', 'a', 'process.', 'Computational'], 'computational'),
 (['a', 'computational', 'Computational', 'processes'], 'process.'),
 (['computational', 'process.', 'processes', 'are'], 'Computational'),
 (['process.', 'Computational', 'are', 'abstract'], 'processes'),
 (['Computational', 'processes', 'abstract', 'beings'], 'are'),
 (['processes', 'are', 'beings', 'that'], 'abstract'),
 (['are', 'abstract', 'that', 'inhabit'], 'beings'),
 (['abstract', 'beings', 'inhabit', 'computers.'], 'that'),
 (['beings', 'that', 'computers.', 'As'], 'inhabit'),
 (['that', 'inhabit', 'As', 'they'], 'computers.'),
 (['inhabit', 'computers.', 'they', 'evolve,'], 'As'),
 (['computers.', 'As', 'evol

In [43]:
## Inheriting nn.Module
class CBOW(torch.nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super(CBOW, self).__init__()

        #out: 1 x emdedding_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim) 
        self.linear1 = nn.Linear(embedding_dim, 128)
        self.activation_function1 = nn.ReLU()
        
        #out: 1 x vocab_size
        self.linear2 = nn.Linear(128, vocab_size)
        self.activation_function2 = nn.LogSoftmax(dim = -1)
        

    def forward(self, inputs):
        embeds = sum(self.embeddings(inputs)).view(1,-1)
        out = self.linear1(embeds)
        out = self.activation_function1(out)
        out = self.linear2(out)
        out = self.activation_function2(out)
        return out

    def get_word_emdedding(self, word):
        word = torch.tensor([word_to_ix[word]])
        return self.embeddings(word).view(1,-1)


In [44]:
model = CBOW(vocab_size, EMDEDDING_DIM)

In [45]:
## Loss function
loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

### Model Training

In [47]:
for epoch in range(500):
    total_loss = 0

    for context, target in data:
        context_vector = make_context_vector(context, word_to_ix)  
        print(context_vector)

        log_probs = model(context_vector)

        total_loss += loss_function(log_probs, torch.tensor([word_to_ix[target]]))

    #optimize at the end of each epoch
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()

tensor([ 2, 41, 21,  3])
tensor([41, 39,  3, 40])
tensor([39, 21, 40,  4])
tensor([21,  3,  4,  9])
tensor([ 3, 40,  9, 24])
tensor([40,  4, 24, 33])
tensor([ 4,  9, 33,  6])
tensor([ 9, 24,  6, 29])
tensor([24, 33, 29, 11])
tensor([33,  6, 11, 41])
tensor([ 6, 29, 41, 38])
tensor([29, 11, 38, 22])
tensor([11, 41, 22, 16])
tensor([41, 38, 16, 25])
tensor([38, 22, 25, 23])
tensor([22, 16, 23, 42])
tensor([16, 25, 42, 27])
tensor([25, 23, 27,  5])
tensor([23, 42,  5, 11])
tensor([42, 27, 11, 18])
tensor([27,  5, 18, 10])
tensor([ 5, 11, 10, 38])
tensor([11, 18, 38,  0])
tensor([18, 10,  0, 46])
tensor([10, 38, 46, 26])
tensor([38,  0, 26, 13])
tensor([ 0, 46, 13, 37])
tensor([46, 26, 37,  9])
tensor([26, 13,  9, 24])
tensor([13, 37, 24,  7])
tensor([37,  9,  7, 43])
tensor([ 9, 24, 43, 36])
tensor([24,  7, 36, 14])
tensor([ 7, 43, 14, 24])
tensor([43, 36, 24, 32])
tensor([36, 14, 32,  9])
tensor([14, 24,  9, 20])
tensor([24, 32, 20, 46])
tensor([32,  9, 46, 24])
tensor([ 9, 20, 24, 17])


### Testing

In [51]:
context = ['People','create','to', 'direct']
context_vector = make_context_vector(context, word_to_ix)
a = model(context_vector)
torch.argmax(a[0]).item()
ix_to_word[47]

'programs'

In [52]:
print(f'Raw text: {" ".join(raw_text)}\n')
print(f'Context: {context}\n')
print(f'Prediction: {ix_to_word[torch.argmax(a[0]).item()]}')

Raw text: We are about to study the idea of a computational process. Computational processes are abstract beings that inhabit computers. As they evolve, processes manipulate other abstract things called data. The evolution of a process is directed by a pattern of rules called a program. People create programs to direct processes. In effect, we conjure the spirits of the computer with our spells.

Context: ['People', 'create', 'to', 'direct']

Prediction: programs
