## Logistic Regression Bag-of-Words Classifier

In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7ff0542b5990>

In [11]:
# Define the dataset
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

# Map each word to a unique integer which represents it's index in the vocabulary
word_to_idx = {} 
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_idx:
            word_to_idx[word] = len(word_to_idx)
print(word_to_idx)

VOCAB_SIZE = len(word_to_idx)
NUM_LABELS = 2

{'gusta': 1, 'en': 3, 'it': 7, 'not': 17, 'is': 16, 'good': 19, 'buena': 14, 'a': 18, 'Give': 6, 'Yo': 23, 'idea': 15, 'get': 20, 'creo': 10, 'si': 24, 'at': 22, 'cafeteria': 5, 'sea': 12, 'lost': 21, 'una': 13, 'to': 8, 'la': 4, 'me': 0, 'No': 9, 'on': 25, 'que': 11, 'comer': 2}


In [12]:
class BoWClassifier(nn.Module):
    '''
        The bag of words classifier model
    '''
    
    def __init__(self, num_labels, vocab_size):
        super(BoWClassifier, self).__init__()
        
        # Define the parameters
        self.linear = nn.Linear(vocab_size, num_labels)
        
    def forward(self, bow_vec):
        return F.log_softmax(self.linear(bow_vec))

In [13]:
def make_bow_vector(sentence, word_to_idx):
    '''
        Converts sentences to their bag of words vectors
    '''
    vec = torch.zeros(len(word_to_idx))
    for word in sentence:
        vec[word_to_idx[word]] +=1
    return vec.view(1, -1)

In [14]:
def make_target(label, label_to_idx):
    return torch.LongTensor([label_to_idx[label]])

In [15]:
model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

for param in model.parameters():
    print(param)
    
sample = data[0]
bow_vector = make_bow_vector(sample[0], word_to_idx)
log_probs = model(autograd.Variable(bow_vector))
print(log_probs)

Parameter containing:

Columns 0 to 9 
-0.0033  0.0331 -0.1752  0.0259  0.0291  0.0447 -0.1386  0.1791  0.0350 -0.0938
 0.0058  0.1147  0.1744 -0.1844  0.0339  0.1503  0.1582  0.0160 -0.1422 -0.0204

Columns 10 to 19 
 0.0784 -0.1055 -0.1560  0.0131 -0.0337  0.1765  0.0763 -0.0027 -0.0337  0.0159
-0.1415  0.1538  0.1206 -0.0480 -0.0401  0.0151 -0.1313  0.0597  0.1677 -0.0544

Columns 20 to 25 
-0.1765  0.1041  0.0141 -0.1783  0.0642 -0.1412
-0.0597  0.0279  0.0984  0.0541  0.0886 -0.1466
[torch.FloatTensor of size 2x26]

Parameter containing:
 0.1503
 0.0746
[torch.FloatTensor of size 2]

Variable containing:
-0.8343 -0.5695
[torch.FloatTensor of size 1x2]



In [16]:
label_to_idx = {"SPANISH": 0, "ENGLISH": 1}

In [17]:
# Forward pass on the test data once
for instance, _ in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_idx))
    log_probs = model(bow_vec)
    print(log_probs)

Variable containing:
-0.8095 -0.5890
[torch.FloatTensor of size 1x2]

Variable containing:
-0.4670 -0.9858
[torch.FloatTensor of size 1x2]



In [20]:
# Print parameters corresponging to the word "creo"
print(next(model.parameters())[:, word_to_idx["creo"]])

Variable containing:
 0.0784
-0.1415
[torch.FloatTensor of size 2]



In [22]:
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [27]:
# Train
for epoch in range(100):
    counter = 0
    for instance, label in data:
        model.zero_grad()
        
        bow_vec = autograd.Variable(make_bow_vector(instance, word_to_idx))
        target = autograd.Variable(make_target(label, label_to_idx))
        
        log_probs = model(bow_vec)
        
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()
        
        print("Epoch: {}, Element: {}, Loss: {}".format(epoch, counter, loss.data.numpy()))
        counter += 1

Epoch: 0, Element: 0, Loss: [ 0.00343033]
Epoch: 0, Element: 1, Loss: [ 0.00446006]
Epoch: 0, Element: 2, Loss: [ 0.00292907]
Epoch: 0, Element: 3, Loss: [ 0.00142599]
Epoch: 1, Element: 0, Loss: [ 0.003419]
Epoch: 1, Element: 1, Loss: [ 0.00444515]
Epoch: 1, Element: 2, Loss: [ 0.00291933]
Epoch: 1, Element: 3, Loss: [ 0.00142122]
Epoch: 2, Element: 0, Loss: [ 0.00340774]
Epoch: 2, Element: 1, Loss: [ 0.00443035]
Epoch: 2, Element: 2, Loss: [ 0.00290966]
Epoch: 2, Element: 3, Loss: [ 0.00141649]
Epoch: 3, Element: 0, Loss: [ 0.00339656]
Epoch: 3, Element: 1, Loss: [ 0.00441564]
Epoch: 3, Element: 2, Loss: [ 0.00290004]
Epoch: 3, Element: 3, Loss: [ 0.00141178]
Epoch: 4, Element: 0, Loss: [ 0.00338545]
Epoch: 4, Element: 1, Loss: [ 0.00440103]
Epoch: 4, Element: 2, Loss: [ 0.0028905]
Epoch: 4, Element: 3, Loss: [ 0.00140711]
Epoch: 5, Element: 0, Loss: [ 0.00337441]
Epoch: 5, Element: 1, Loss: [ 0.00438652]
Epoch: 5, Element: 2, Loss: [ 0.00288101]
Epoch: 5, Element: 3, Loss: [ 0.00140

Epoch: 76, Element: 3, Loss: [ 0.00113643]
Epoch: 77, Element: 0, Loss: [ 0.00273252]
Epoch: 77, Element: 1, Loss: [ 0.00354514]
Epoch: 77, Element: 2, Loss: [ 0.00233018]
Epoch: 77, Element: 3, Loss: [ 0.0011334]
Epoch: 78, Element: 0, Loss: [ 0.00272531]
Epoch: 78, Element: 1, Loss: [ 0.00353572]
Epoch: 78, Element: 2, Loss: [ 0.00232401]
Epoch: 78, Element: 3, Loss: [ 0.00113039]
Epoch: 79, Element: 0, Loss: [ 0.00271815]
Epoch: 79, Element: 1, Loss: [ 0.00352636]
Epoch: 79, Element: 2, Loss: [ 0.00231787]
Epoch: 79, Element: 3, Loss: [ 0.00112739]
Epoch: 80, Element: 0, Loss: [ 0.00271102]
Epoch: 80, Element: 1, Loss: [ 0.00351704]
Epoch: 80, Element: 2, Loss: [ 0.00231176]
Epoch: 80, Element: 3, Loss: [ 0.00112441]
Epoch: 81, Element: 0, Loss: [ 0.00270393]
Epoch: 81, Element: 1, Loss: [ 0.00350777]
Epoch: 81, Element: 2, Loss: [ 0.00230568]
Epoch: 81, Element: 3, Loss: [ 0.00112145]
Epoch: 82, Element: 0, Loss: [ 0.00269687]
Epoch: 82, Element: 1, Loss: [ 0.00349855]
Epoch: 82, E

In [29]:
# Evaluate on the test data
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_idx))
    log_probs = model(bow_vec)
    print(log_probs)

Variable containing:
-0.0994 -2.3576
[torch.FloatTensor of size 1x2]

Variable containing:
-2.9547 -0.0535
[torch.FloatTensor of size 1x2]



In [31]:
# Print parameters corresponging to the word "creo"
print(next(model.parameters())[:, word_to_idx["creo"]])

# We can see that the element 0 (corresponding to Spanish has gone up while the other has gone down)

Variable containing:
 0.6395
-0.7026
[torch.FloatTensor of size 2]



## Word Embeddings

In [36]:
# Small example
word_to_idx = {"hello": 0, "world": 1}
embeds = nn.Embedding(2, 5) # 2 is the vocabulary size, 5-dimensional embeddings
lookup_tensor = torch.LongTensor([word_to_idx["hello"]])
hello_embed = embeds(autograd.Variable(lookup_tensor))
print(hello_embed)

Variable containing:
-0.1326 -0.0228  1.1848 -1.0322 -0.7039
[torch.FloatTensor of size 1x5]



#### N-Gram Language Modeling

In [41]:
CONTEXT_SIZE = 2
EMBEDDING_DIM = 10

test_sentence = """When forty winters shall besiege thy brow,
And dig deep trenches in thy beauty's field,
Thy youth's proud livery so gazed on now,
Will be a totter'd weed of small worth held:
Then being asked, where all thy beauty lies,
Where all the treasure of thy lusty days;
To say, within thine own deep sunken eyes,
Were an all-eating shame, and thriftless praise.
How much more praise deserv'd thy beauty's use,
If thou couldst answer 'This fair child of mine
Shall sum my count, and make my old excuse,'
Proving his beauty by succession thine!
This were to be new made when thou art old,
And see thy blood warm when thou feel'st it cold.""".split()

trigrams = [([test_sentence[i], test_sentence[i+1]], test_sentence[i+2]) for i in range(len(test_sentence) - 2)]
print(trigrams[:3])

vocab = set(test_sentence)
word_to_idx = {word: i for i, word in enumerate(vocab)}

[(['When', 'forty'], 'winters'), (['forty', 'winters'], 'shall'), (['winters', 'shall'], 'besiege')]


In [42]:
class NGramLanguageModeler(nn.Module):
    '''
        The N-Gram Language Model
    '''
    
    def __init__(self, vocab_size, embedding_dim, context_size):
        super(NGramLanguageModeler, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(context_size * embedding_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)
        
    def forward(self, inputs):
        embeds = self.embeddings(inputs).view((1,-1))
        out = F.relu(self.linear1(embeds))
        out = self.linear2(out)
        log_probs = F.log_softmax(out)
        return log_probs

In [44]:
loss_function = nn.NLLLoss()
model = NGramLanguageModeler(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE)
optimizer = optim.SGD(model.parameters(), lr=0.001)

In [47]:
losses = []

# Train
for epoch in range(10):
    
    total_loss = torch.Tensor([0])
    
    for context, target in trigrams:
        
        context_idxs = [word_to_idx[w] for w in context]
        context_var = autograd.Variable(torch.LongTensor(context_idxs))
        
        model.zero_grad()
        
        log_probs = model(context_var)
        
        loss = loss_function(log_probs, autograd.Variable(torch.LongTensor([word_to_idx[target]])))

        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.data
        
    losses.append(total_loss)
print(losses)

[
 524.1758
[torch.FloatTensor of size 1]
, 
 521.5498
[torch.FloatTensor of size 1]
, 
 518.9454
[torch.FloatTensor of size 1]
, 
 516.3609
[torch.FloatTensor of size 1]
, 
 513.7955
[torch.FloatTensor of size 1]
, 
 511.2482
[torch.FloatTensor of size 1]
, 
 508.7176
[torch.FloatTensor of size 1]
, 
 506.2034
[torch.FloatTensor of size 1]
, 
 503.7036
[torch.FloatTensor of size 1]
, 
 501.2171
[torch.FloatTensor of size 1]
]
