<a href="https://colab.research.google.com/github/sriprad/PyTorch/blob/master/PyTorch_NLP_Simple_Example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [34]:
# importing the libraries
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
from time import time
from torchvision import datasets, transforms
from torch import nn, optim
from torch.autograd import Variable
import torch.nn.functional as F

In [35]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

In [36]:
word_to_ix = {}
for sent,_ in data + test_data:
  for word in sent:
    if word not in word_to_ix:
      word_to_ix[word] = len(word_to_ix)
print(len(word_to_ix))
VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2

26


In [37]:
class BoWClassifier(nn.Module):
  def __init__(self, num_labels, vocab_size):
    super(BoWClassifier, self).__init__()
    self.linear = nn.Linear(vocab_size, num_labels)
  def forward(self, bow_vec):
        # Pass the input through the linear layer,
        # then pass that through log_softmax.
        # Many non-linearities and other functions are in torch.nn.functional
        return F.log_softmax(self.linear(bow_vec), dim=1)


In [38]:
def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)


def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])

In [39]:
model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

In [40]:
print(model)

BoWClassifier(
  (linear): Linear(in_features=26, out_features=2, bias=True)
)


In [41]:
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[-0.1404, -0.0277,  0.1930,  0.0239,  0.0774,  0.1043,  0.1034,  0.1845,
          0.1141, -0.1145,  0.0058, -0.0026,  0.0534, -0.0945,  0.0879, -0.0444,
         -0.0228,  0.1806,  0.1394,  0.0025, -0.1180,  0.1380,  0.0591,  0.0620,
          0.1470,  0.1896],
        [ 0.1142, -0.0696,  0.1824, -0.1568, -0.1755,  0.0968,  0.0269,  0.0478,
         -0.1813, -0.0890,  0.0782, -0.1411, -0.1222, -0.0904,  0.0798, -0.1221,
         -0.0599,  0.1755, -0.1095, -0.0241, -0.0999, -0.0508, -0.0643,  0.1308,
          0.1377,  0.0857]], requires_grad=True)
Parameter containing:
tensor([ 0.1289, -0.0470], requires_grad=True)


In [42]:
with torch.no_grad():
    sample = data[0]
    bow_vector = make_bow_vector(sample[0], word_to_ix)
    log_probs = model(bow_vector)
    print(log_probs)


tensor([[-0.5071, -0.9219]])


In [43]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

# word_to_ix maps each word in the vocab to a unique integer, which will be its
# index into the Bag of words vector
word_to_ix = {}
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)

VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2


class BoWClassifier(nn.Module):  # inheriting from nn.Module!

    def __init__(self, num_labels, vocab_size):
        # calls the init function of nn.Module.  Dont get confused by syntax,
        # just always do it in an nn.Module
        super(BoWClassifier, self).__init__()

        # Define the parameters that you will need.  In this case, we need A and b,
        # the parameters of the affine mapping.
        # Torch defines nn.Linear(), which provides the affine map.
        # Make sure you understand why the input dimension is vocab_size
        # and the output is num_labels!
        self.linear = nn.Linear(vocab_size, num_labels)

        # NOTE! The non-linearity log softmax does not have parameters! So we don't need
        # to worry about that here

    def forward(self, bow_vec):
        # Pass the input through the linear layer,
        # then pass that through log_softmax.
        # Many non-linearities and other functions are in torch.nn.functional
        return F.log_softmax(self.linear(bow_vec), dim=1)


def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)


def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])


model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

# the model knows its parameters.  The first output below is A, the second is b.
# Whenever you assign a component to a class variable in the __init__ function
# of a module, which was done with the line
# self.linear = nn.Linear(...)
# Then through some Python magic from the PyTorch devs, your module
# (in this case, BoWClassifier) will store knowledge of the nn.Linear's parameters
for param in model.parameters():
    print(param)

# To run the model, pass in a BoW vector
# Here we don't need to train, so the code is wrapped in torch.no_grad()
with torch.no_grad():
    sample = data[0]
    bow_vector = make_bow_vector(sample[0], word_to_ix)
    log_probs = model(bow_vector)
    print(log_probs)

{'me': 0, 'gusta': 1, 'comer': 2, 'en': 3, 'la': 4, 'cafeteria': 5, 'Give': 6, 'it': 7, 'to': 8, 'No': 9, 'creo': 10, 'que': 11, 'sea': 12, 'una': 13, 'buena': 14, 'idea': 15, 'is': 16, 'not': 17, 'a': 18, 'good': 19, 'get': 20, 'lost': 21, 'at': 22, 'Yo': 23, 'si': 24, 'on': 25}
Parameter containing:
tensor([[ 0.1346, -0.0129,  0.0996, -0.1787, -0.1247, -0.0449, -0.1256, -0.1245,
         -0.0910,  0.1796,  0.1810,  0.0308,  0.0019, -0.0172,  0.1162, -0.0911,
         -0.0369, -0.1494,  0.1205,  0.1169, -0.1767,  0.0200, -0.0804,  0.1069,
         -0.0463,  0.0151],
        [ 0.1512, -0.1256,  0.0190, -0.0499, -0.0927,  0.0883, -0.1939,  0.1076,
          0.0483, -0.1933, -0.0082,  0.1953,  0.1832, -0.1867, -0.1938, -0.0308,
          0.0099,  0.0465,  0.0196,  0.1619, -0.0100, -0.1510,  0.1365,  0.0346,
          0.0086, -0.0577]], requires_grad=True)
Parameter containing:
tensor([ 0.1016, -0.1725], requires_grad=True)
tensor([[-0.6177, -0.7747]])


In [44]:
label_to_ix = {"SPANISH": 0, "ENGLISH": 1}

In [45]:
# Run on test data before we train, just to see a before-and-after
with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)

tensor([[-0.5475, -0.8637]])
tensor([[-0.5881, -0.8105]])


In [46]:
print(bow_vec)

tensor([[1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
         0., 0., 0., 1., 0., 0., 0., 1.]])


In [47]:
# Print the matrix column corresponding to "creo"
print(next(model.parameters())[:, word_to_ix["creo"]])

loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Usually you want to pass over the training data several times.
# 100 is much bigger than on a real data set, but real datasets have more than
# two instances.  Usually, somewhere between 5 and 30 epochs is reasonable.
for epoch in range(100):
    for instance, label in data:
        # Step 1. Remember that PyTorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Make our BOW vector and also we must wrap the target in a
        # Tensor as an integer. For example, if the target is SPANISH, then
        # we wrap the integer 0. The loss function then knows that the 0th
        # element of the log probabilities is the log probability
        # corresponding to SPANISH
        bow_vec = make_bow_vector(instance, word_to_ix)
        target = make_target(label, label_to_ix)

        # Step 3. Run our forward pass.
        log_probs = model(bow_vec)

        # Step 4. Compute the loss, gradients, and update the parameters by
        # calling optimizer.step()
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()

with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)

# Index corresponding to Spanish goes up, English goes down!
print(next(model.parameters())[:, word_to_ix["creo"]])

tensor([ 0.1810, -0.0082], grad_fn=<SelectBackward>)
tensor([[-0.1469, -1.9909]])
tensor([[-2.3700, -0.0981]])
tensor([ 0.5401, -0.3673], grad_fn=<SelectBackward>)
