In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7fa2c763f9b0>

## Task 1. Prepare data
**3 points**

In this task, you should prepare your data, which is a list of tuples. Each tuple has two elements: a list of context words, and the target word.
Here both context words and target word are represented by word indices.

In [None]:
CONTEXT_SIZE = 3  # Define the context size. Default value 3, which means the context includes 3 words to the left, 3 to the right

raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

vocab = set(raw_text)
vocab_size = len(vocab)

word_to_idx = {word: i for i, word in enumerate(vocab)}
idx_to_word = list(vocab)
word_indices = [word_to_idx[w] for w in raw_text]

def prepare_data(word_indices):
    data = []
    for i in range(CONTEXT_SIZE, len(word_indices) - CONTEXT_SIZE):

        #### START YOUR CODE ####
        # Hint: You can intialize context to an empty list
        # and then use a for loop to append elements to context propoerly.
        context = [word_indices[i-3],word_indices[i-2],word_indices[i-1],
                  word_indices[i+1],word_indices[i+2],word_indices[i+3]
                  ] # added
        target =  word_indices[i] # added
        #### END YOUR CODE ####

        data.append((context, target))

    return data

In [None]:
# Test Task 1. Do not change the code below.
data = prepare_data(word_indices)
print('data[0]:', data[0])
ctx, tgt = data[0]
print('context words:', [idx_to_word[c] for c in ctx])
print('target word:', idx_to_word[tgt])

## Expected output

|&nbsp;|&nbsp;|
|--|--|
|data\[0\]: |(\[1, 20, 39, 37, 45, 35\], 3)|
|context words: |\['We', 'are', 'about', 'study', 'the', 'idea'\]|
|target word: | to|

---



## Task 2: Implement a CBOW model

**4 points**

In this task, you will implement a CBOW model. In the `__init__()` method, define the size of `self.embeddings` and `self.linear` properly.

The `self.linear` takes the average embeddings of all context words as input, and the output size is `vocab_size`.
It is followed by a softmax activation (`nn.LogSoftmax`).

The `forward()` method has a input argument `inputs`, which is the context word indices (in a `torch.long` tensor).
You should get the embeddings of all context words, and compute the average emebdding (into the `embeds` variable).

In [None]:
class CBOW(nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super(CBOW, self).__init__()

        #### START YOUR CODE ####
        self.embeddings = nn.Embedding(vocab_size, embedding_dim) #added
        self.linear = nn.Linear(embedding_dim, 10) #added,128: output features
        #### END YOUR CODE ####
        
        self.act = nn.LogSoftmax(dim = -1)

    def forward(self, inputs):
        #### START YOUR CODE ####
        embeds = sum(self.embeddings(inputs)).view(1,-1) # added
        #### END YOUR CODE ####
        
        out = self.linear(embeds)
        out = self.act(out)

        return out

In [None]:
# Test Task 2. Do not change the code blow
torch.manual_seed(0)

m = CBOW(10, 20)
test_input = torch.tensor([1,2,3], dtype=torch.long)
test_output = m(test_input)

print('test_output.shape', test_output.shape)
print('test_output', test_output.data)

### Expected output
|&nbsp;|&nbsp;|
|--|--|
|test_output.shape| torch.Size(\[1, 10\])|
|test_output|tensor(\[\[-1.6878, -4.2108, -5.0252, -2.9802, -3.1362, -1.5436, -1.4120, -3.2485, -1.6490, -4.5009\]\])|

---

## Task 3. Training loop
**2 points**

In this task, you will complete the training loop. 

You should create `ctx_tensor` and `tgt_tensor` out of `ctx` and `tgt`, respectively. *Hint*: you need to put `tgt` to a list before craeting the `tgt_tensor`, so that the resulting tensor is of the correct dimension that is acceptable to `nn.NLLLoss()`.

`ctx_tensor` is used to compute `output`. `loss_function()` is called upon `output` and `tgt_tensor` to compute the loss.

In [None]:
torch.manual_seed(0)

EMDEDDING_DIM = 100
model = CBOW(vocab_size, EMDEDDING_DIM)

loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

# Training
for epoch in range(100):
    total_loss = 0

    for ctx, tgt in data:
        #### START YOUR CODE ####
        ctx_tensor = torch.tensor(ctx, dtype=torch.long) #added ,torch.long: for long typre
        tgt_tensor = torch.tensor(tgt, dtype=torch.long) #added
        output = model(ctx_tensor) #added

        # The try...except code is to help you debug. You can leave them unchanged. 
        try:
            total_loss += loss_function(output,tgt_tensor) #added
        except Exception:
            print(ctx_tensor)
            print(tgt_tensor)
            raise
        #### END YOUR CODE ####

    #optimize at the end of each epoch
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()

    # print training information
    if epoch % 5 == 0 and epoch > 0:
        print(f'Loss within epoch {epoch}: ', total_loss.item())

### Expected output:

You should obeserve the loss decreasing from 100+ (at epoch 5) to around 3.x (at epoch 95).
The absolute values do not matter.

<!-- |&nbsp;|&nbsp;|
|--|--|
|Loss within epoch 5: | 138.33709716796875|
|Loss within epoch 10: | 70.50218963623047|
|Loss within epoch 15: | 38.877227783203125|
|Loss within epoch 20: | 25.064970016479492|
|Loss within epoch 25: | 18.110904693603516|
|Loss within epoch 30: | 14.05634880065918|
|Loss within epoch 35: | 11.44089126586914|
|Loss within epoch 40: | 9.62782096862793|
|Loss within epoch 45: | 8.302525520324707|
|Loss within epoch 50: | 7.293947219848633|
|Loss within epoch 55: | 6.501856327056885|
|Loss within epoch 60: | 5.863919734954834|
|Loss within epoch 65: | 5.339460372924805|
|Loss within epoch 70: | 4.900869846343994|
|Loss within epoch 75: | 4.528764247894287|
|Loss within epoch 80: | 4.209164619445801|
|Loss within epoch 85: | 3.9317333698272705|
|Loss within epoch 90: | 3.688669443130493|
|Loss within epoch 95: | 3.473975896835327| -->

---

## Task 4
**1 point**

In this final task, you will need to find the maximum index among the model output. *Hint*: use `torch.argmax()`.

In [None]:
def get_predicted_word(model_output, idx_to_word):
    #### START YOUR CODE ####
    idx = torch.argmax(model_output[0]).item()
    #### END YOUR CODE ####

    return idx_to_word[idx]

In [None]:
# Test Task 4. Do not change the code blow
ctx_words = 'evolution of a is directed by'.split()
ctx_indices = [word_to_idx[w] for w in ctx_words]
ctx_tensor = torch.tensor(ctx_indices, dtype=torch.long)

out = model(ctx_tensor)
pred = get_predicted_word(out, idx_to_word)
print(f'The predicted word is: \"{pred}\"')

### Expected output

|&nbsp;|&nbsp;|
|--|--|
|The predicted word is: |"process"|

## Congratulation!
Congratulations! You have now understood how to use word embeddings for some basic NLP tasks!