#### Test lstm implemtenation

In [8]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


In [9]:
torch.manual_seed(1)

<torch._C.Generator at 0x7f4b0194d408>

In [10]:
VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2

In [11]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]


In [12]:
test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

In [13]:
word_to_ix = {}
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)

{'me': 0, 'gusta': 1, 'comer': 2, 'en': 3, 'la': 4, 'cafeteria': 5, 'Give': 6, 'it': 7, 'to': 8, 'No': 9, 'creo': 10, 'que': 11, 'sea': 12, 'una': 13, 'buena': 14, 'idea': 15, 'is': 16, 'not': 17, 'a': 18, 'good': 19, 'get': 20, 'lost': 21, 'at': 22, 'Yo': 23, 'si': 24, 'on': 25}


In [14]:
class BoWClassifier(nn.Module):  # inheriting from nn.Module!

    def __init__(self, num_labels, vocab_size):
        super(BoWClassifier, self).__init__()

        # Define the parameters that you will need.  In this case, we need A and b,
        # the parameters of the affine mapping.
        # Torch defines nn.Linear(), which provides the affine map.
        # Make sure you understand why the input dimension is vocab_size
        # and the output is num_labels!
        self.linear = nn.Linear(vocab_size, num_labels)

        # NOTE! The non-linearity log softmax does not have parameters! So we don't need
        # to worry about that here

    def forward(self, bow_vec):
        # Pass the input through the linear layer,
        # then pass that through log_softmax.
        # Many non-linearities and other functions are in torch.nn.functional
        return F.log_softmax(self.linear(bow_vec))

In [15]:
# make a vector based on the index of a word
def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)


In [17]:
# tensor that contains the labels
def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])

In [18]:
model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)


In [20]:
for param in model.parameters():
    print(param)

Parameter containing:

Columns 0 to 9 
-0.0325  0.1950  0.0864  0.1697 -0.1961 -0.1459 -0.0775  0.1957 -0.1386 -0.1035
 0.1483 -0.1061 -0.1854  0.0135  0.0669  0.1624 -0.0324 -0.0168  0.0230 -0.0272

Columns 10 to 19 
-0.1599 -0.0406 -0.1231 -0.0440 -0.0606  0.0666 -0.0405  0.1708  0.0152  0.1358
-0.1411  0.1722 -0.1184  0.1092  0.1180  0.0847  0.1837  0.1188 -0.0732 -0.1597

Columns 20 to 25 
-0.0317 -0.0732  0.0726  0.0096 -0.1159 -0.0222
 0.0754  0.0071  0.1476  0.1432  0.1548  0.1291
[torch.FloatTensor of size 2x26]

Parameter containing:
-0.1628
 0.1293
[torch.FloatTensor of size 2]



In [22]:
sample = data[0]
bow_vector = make_bow_vector(sample[0], word_to_ix)
log_probs = model(autograd.Variable(bow_vector))
print(log_probs)

Variable containing:
-0.8630 -0.5480
[torch.FloatTensor of size 1x2]



In [23]:
label_to_ix = {"SPANISH": 0, "ENGLISH": 1}

In [24]:
# Run on test data before we train, just to see a before-and-after
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print(log_probs)

Variable containing:
-1.2611 -0.3332
[torch.FloatTensor of size 1x2]

Variable containing:
-1.1140 -0.3978
[torch.FloatTensor of size 1x2]



In [25]:
# Print the matrix column corresponding to "creo"
print(next(model.parameters())[:, word_to_ix["creo"]])

Variable containing:
-0.1599
-0.1411
[torch.FloatTensor of size 2]



In [26]:
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [31]:
for epoch in range(100):
    for instance, label in data:
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Make our BOW vector and also we must wrap the target in a
        # Variable as an integer. For example, if the target is SPANISH, then
        # we wrap the integer 0. The loss function then knows that the 0th
        # element of the log probabilities is the log probability
        # corresponding to SPANISH
        bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
        target = autograd.Variable(make_target(label, label_to_ix))

        # Step 3. Run our forward pass.
        log_probs = model(bow_vec)

        # Step 4. Compute the loss, gradients, and update the parameters by
        # calling optimizer.step()
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()


In [32]:
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print(log_probs)

Variable containing:
-0.1247 -2.1434
[torch.FloatTensor of size 1x2]

Variable containing:
-3.9635 -0.0192
[torch.FloatTensor of size 1x2]



In [34]:
# Index corresponding to Spanish goes up, English goes down!
print(next(model.parameters())[:, word_to_ix["it"]])

Variable containing:
-0.8618
 1.0408
[torch.FloatTensor of size 2]



http://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html

In [None]:
s