In [3]:
'''
First, we will import everything we need. We will also define a couple of useful functions.
'''
import torch
from torch import nn
from torch import optim

import random

# This is a function that prints the number of trainable parameters 
# of a model.
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# This functions prints all parameters (and their gradients) of a model.
def print_parameters(model):
    for name, param in model.named_parameters():
        print(name)
        print(param.data)
        print(param.grad)

In [4]:
'''
Then, we need to define our model.
'''
class SeqLabRNN(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, 1, bias=False)
        self.output_layer = nn.Linear(hidden_dim, output_dim)
        
        self.relu = nn.ReLU()
        
    def forward(self, input):

        embedding = self.embedding(input)
        lstm_output, (_, _) = self.lstm(embedding)
        output = self.output_layer(lstm_output)  
        return output

In [5]:
train_data = [(torch.tensor([1, 3, 4, 1, 2]), torch.tensor([0, 0, 1, 0, 1])),
            (torch.tensor([1, 4, 4, 1, 2]), torch.tensor([0, 1, 1, 0, 1]))]

print(train_data[0])

(tensor([1, 3, 4, 1, 2]), tensor([0, 0, 1, 0, 1]))


In [6]:
# 2) Initialize our model.

model = SeqLabRNN(5, 3, 7, 2)
count_parameters(model)


311

In [7]:
# 3) Now we train our model. 
# TODO

epochs = 10
ce = nn.CrossEntropyLoss()
softmax = nn.Softmax(dim=0)
optimizer = optim.SGD(model.parameters(), lr=0.1)

for i in range(epochs):
    print('### Epoch: ' + str(i+1) + ' ###')
    av_loss = 0
    model.train()
    for (x, y) in train_data:
        optimizer.zero_grad()
        
        # a) calculate probs / get an output
        y_raw = model(x)
        y_hat = softmax(y_raw)
        
        # b) compute loss
        loss = ce(y_raw, y)
        av_loss += loss
        
        # c) get the gradient
        loss.backward()

        # d) update the weights
        optimizer.step()
    print(av_loss/len(train_data))

### Epoch: 1 ###
tensor(0.7139, grad_fn=<DivBackward0>)
### Epoch: 2 ###
tensor(0.7120, grad_fn=<DivBackward0>)
### Epoch: 3 ###
tensor(0.7103, grad_fn=<DivBackward0>)
### Epoch: 4 ###
tensor(0.7086, grad_fn=<DivBackward0>)
### Epoch: 5 ###
tensor(0.7069, grad_fn=<DivBackward0>)
### Epoch: 6 ###
tensor(0.7054, grad_fn=<DivBackward0>)
### Epoch: 7 ###
tensor(0.7039, grad_fn=<DivBackward0>)
### Epoch: 8 ###
tensor(0.7024, grad_fn=<DivBackward0>)
### Epoch: 9 ###
tensor(0.7010, grad_fn=<DivBackward0>)
### Epoch: 10 ###
tensor(0.6996, grad_fn=<DivBackward0>)


In [8]:
print_parameters(model)

embedding.weight
tensor([[-1.8228,  0.0655,  0.5636],
        [-0.5254,  0.5660,  1.0119],
        [-1.1711, -1.8649,  1.2791],
        [-2.9051,  0.4586,  0.3852],
        [ 0.5584, -0.1623,  0.5428]])
tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0013,  0.0052, -0.0008],
        [ 0.0035, -0.0012, -0.0032],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0018, -0.0101, -0.0090]])
lstm.weight_ih_l0
tensor([[ 0.3129,  0.1494, -0.1215],
        [-0.3041, -0.3052,  0.0639],
        [ 0.2017, -0.1411,  0.2030],
        [-0.1349,  0.3369,  0.2525],
        [-0.1097,  0.1708,  0.0236],
        [ 0.3049,  0.0166,  0.0868],
        [ 0.1456,  0.2971,  0.0556],
        [ 0.1171,  0.0359, -0.3670],
        [-0.0072, -0.2622,  0.1684],
        [ 0.2658,  0.3570, -0.3437],
        [ 0.1812, -0.3308, -0.0145],
        [ 0.0143, -0.2777,  0.3170],
        [-0.3716, -0.1532,  0.1955],
        [ 0.1617,  0.2389, -0.2414],
        [ 0.2947, -0.1310,  0.2161],
        [ 0.3452, -0.0889, -0.1934],
 