In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.autograd as autograd
import torch.optim as optim

In [2]:
data = [ ("me gusta comer en la cafeteria".split(), "SPANISH"),
         ("Give it to me".split(), "ENGLISH"),
         ("No creo que sea una buena idea".split(), "SPANISH"),
         ("No it is not a good idea to get lost at sea".split(), "ENGLISH") ]

test_data = [ ("Yo creo que si".split(), "SPANISH"),
              ("it is lost on me".split(), "ENGLISH")]

# word_to_ix maps each word in the vocab to a unique integer, which will be its
# index into the Bag of words vector
word_to_ix = {}
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)

VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2

{'Give': 6, 'me': 0, 'si': 24, 'No': 9, 'una': 13, 'at': 22, 'Yo': 23, 'lost': 21, 'is': 16, 'buena': 14, 'que': 11, 'sea': 12, 'comer': 2, 'good': 19, 'en': 3, 'a': 18, 'on': 25, 'cafeteria': 5, 'it': 7, 'to': 8, 'la': 4, 'get': 20, 'not': 17, 'gusta': 1, 'creo': 10, 'idea': 15}


In [3]:
data

[(['me', 'gusta', 'comer', 'en', 'la', 'cafeteria'], 'SPANISH'),
 (['Give', 'it', 'to', 'me'], 'ENGLISH'),
 (['No', 'creo', 'que', 'sea', 'una', 'buena', 'idea'], 'SPANISH'),
 (['No',
   'it',
   'is',
   'not',
   'a',
   'good',
   'idea',
   'to',
   'get',
   'lost',
   'at',
   'sea'],
  'ENGLISH')]

In [4]:
class BoWClassifier(nn.Module):
    def __init__(self, num_labels, vocab_size):
        super(BoWClassifier, self).__init__()
        
        ## Defining parameters for linear model
        self.linear = nn.Linear(vocab_size, num_labels)
    
    def forward(self, bow_vec):
        ## do the foward pass and implement non-linearity
        return F.log_softmax(self.linear(bow_vec))

In [14]:
def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1,-1)

In [15]:
def make_target(label, label_to_idx):
    return torch.LongTensor([label_to_idx[label]])

In [16]:
model = BoWClassifier(NUM_LABELS,VOCAB_SIZE)

In [17]:
for param in model.parameters():
    print(param)
    print(len(param))

Parameter containing:

Columns 0 to 9 
 0.1555 -0.0755  0.0055  0.1538  0.1202 -0.1706  0.0021 -0.0291  0.1090  0.0671
-0.1707 -0.0504  0.0391 -0.1038  0.0772  0.0893  0.0738  0.1266  0.0141 -0.0243

Columns 10 to 19 
 0.1231 -0.1477  0.1418 -0.0302  0.1940 -0.1004  0.1693 -0.0148 -0.0473  0.0305
 0.0115 -0.0428 -0.0997 -0.0725  0.0072  0.1182 -0.0298 -0.0306 -0.1837 -0.1469

Columns 20 to 25 
 0.1938  0.0669 -0.1581  0.1167 -0.1125 -0.1091
 0.0291  0.0135 -0.0307 -0.1540  0.1684  0.0441
[torch.FloatTensor of size 2x26]

2
Parameter containing:
 0.0895
 0.1143
[torch.FloatTensor of size 2]

2


In [18]:
sample = data[0]

In [19]:
sample

(['me', 'gusta', 'comer', 'en', 'la', 'cafeteria'], 'SPANISH')

In [20]:
bow_vector = make_bow_vector(sample[0], word_to_ix)

In [21]:
print(bow_vector)



Columns 0 to 12 
    1     1     1     1     1     1     0     0     0     0     0     0     0

Columns 13 to 25 
    0     0     0     0     0     0     0     0     0     0     0     0     0
[torch.FloatTensor of size 1x26]



In [22]:
log_probs = model(autograd.Variable(bow_vector))
print(log_probs)

Variable containing:
-0.5614 -0.8449
[torch.FloatTensor of size 1x2]



In [23]:
label_to_ix = { "SPANISH": 0, "ENGLISH": 1 }

In [24]:
# Run on test data before we train, just to see a before-and-after
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print("Log probabilities - {}".format(log_probs))
print(next(model.parameters())[:,word_to_ix["gusta"]]) # Print the matrix column corresponding to "creo"

Log probabilities - Variable containing:
-0.7073 -0.6792
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.5781 -0.8231
[torch.FloatTensor of size 1x2]

Variable containing:
1.00000e-02 *
 -7.5494
 -5.0408
[torch.FloatTensor of size 2]



In [25]:
## Training the BoW classifier

In [26]:
loss_function = nn.NLLLoss()
learning_rate = 0.01
iterations = 100
optimizer = optim.SGD(params = model.parameters(), lr = learning_rate)

In [27]:
for epoch in range(iterations):
    for instance, label in data:
        
        # Step 1 - clear the gradients
        model.zero_grad()
        
        # Step 2 - make BoW vector and wrap into torch Variable
        bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
        target = autograd.Variable(make_target(label, label_to_ix))
        
        # Step 3 - Run forward pass
        log_probs = model(bow_vec)
        print("Log probabilities - {}".format(log_probs))
        
        # Step 4 - Compute loss, gradients, update parameters
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()

Log probabilities - Variable containing:
-0.5614 -0.8449
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.6044 -0.7906
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.5446 -0.8677
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.4175 -1.0750
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.5471 -0.8642
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.6362 -0.7535
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.5394 -0.8749
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.4755 -0.9718
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.5331 -0.8838
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.6675 -0.7194
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.5332 -0.8836
[torch.FloatTensor of size 1x2]

Log probab

Log probabilities - Variable containing:
-0.1947 -1.7319
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-1.5551 -0.2372
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.2014 -1.7017
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-2.2256 -0.1143
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.1916 -1.7465
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-1.5685 -0.2336
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.1978 -1.7180
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-2.2459 -0.1119
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.1886 -1.7611
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-1.5817 -0.2302
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.1943 -1.7340
[torch.FloatTensor of size 1x2]

Log probab

Log probabilities - Variable containing:
-2.9007 -0.0566
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.1093 -2.2676
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-2.0517 -0.1376
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.1062 -2.2951
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-2.9119 -0.0559
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.1083 -2.2768
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-2.0604 -0.1363
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.1051 -2.3053
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-2.9230 -0.0553
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-0.1072 -2.2860
[torch.FloatTensor of size 1x2]

Log probabilities - Variable containing:
-2.0691 -0.1350
[torch.FloatTensor of size 1x2]

Log probab

In [38]:
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print(log_probs)
print(next(model.parameters())[:,word_to_ix["comer"]]) # Print the matrix column corresponding to "creo"

Variable containing:
-0.3674 -1.1794
[torch.FloatTensor of size 1x2]

Variable containing:
-1.5189 -0.2471
[torch.FloatTensor of size 1x2]

Variable containing:
 0.2066
-0.1620
[torch.FloatTensor of size 2]

