***The content below is a solution to and a walk-through of the tutorial [Deep Learning for NLP with PyTorch](https://pytorch.org/tutorials/beginner/deep_learning_nlp_tutorial.html).***

<h1>Logistic regression Bag-of-words classifier<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Create-bag-of-words" data-toc-modified-id="Create-bag-of-words-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Create bag-of-words</a></span></li><li><span><a href="#Define-classifier:" data-toc-modified-id="Define-classifier:-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Define classifier:</a></span></li><li><span><a href="#Train-the-network" data-toc-modified-id="Train-the-network-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Train the network</a></span></li><li><span><a href="#Test-the-network" data-toc-modified-id="Test-the-network-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Test the network</a></span></li></ul></div>

In [1]:
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
trainset = [("me gusta comer en la cafeteria".split(), "SPANISH"),
            ("Give it to me".split(), "ENGLISH"),
            ("No creo que sea una buena idea".split(), "SPANISH"),
            ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

In [3]:
testset = [("Yo creo que si".split(), "SPANISH"),
           ("it is lost on me".split(), "ENGLISH")]

## Create bag-of-words

In [4]:
bow = np.r_[list(zip(*(trainset + testset)))[0]]
bow = np.unique([w.lower() for w in bow])
bow

array(['a', 'at', 'buena', 'cafeteria', 'comer', 'creo', 'en', 'get',
       'give', 'good', 'gusta', 'idea', 'is', 'it', 'la', 'lost', 'me',
       'no', 'not', 'on', 'que', 'sea', 'si', 'to', 'una', 'yo'],
      dtype='<U9')

In [5]:
word_to_ix = {v: k for k, v in enumerate(bow)}
word_to_ix

{'a': 0,
 'at': 1,
 'buena': 2,
 'cafeteria': 3,
 'comer': 4,
 'creo': 5,
 'en': 6,
 'get': 7,
 'give': 8,
 'good': 9,
 'gusta': 10,
 'idea': 11,
 'is': 12,
 'it': 13,
 'la': 14,
 'lost': 15,
 'me': 16,
 'no': 17,
 'not': 18,
 'on': 19,
 'que': 20,
 'sea': 21,
 'si': 22,
 'to': 23,
 'una': 24,
 'yo': 25}

In [6]:
label_to_ix = {"ENGLISH": 1, "SPANISH": 0}

In [7]:
VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2

## Define classifier:
$$
\hat y = \log \text{softmax} (Ax + b).\\
x \text{ is a BoW vector.}
$$

In [8]:
class BoWClassifier(nn.Module):
    
    def __init__(self, vocab_size=VOCAB_SIZE, num_labels=NUM_LABELS):
        super(BoWClassifier, self).__init__()
        
        self.linear = nn.Linear(vocab_size, num_labels)
        
    def forward(self, x):
        x = self.linear(x)
        x = F.log_softmax(x, dim=1)  # calculate softmax wrt dim=1
        return x

In [9]:
model = BoWClassifier()

## Train the network

In [10]:
# CrossEntropyLoss() includes log softmax in it.
# NLLLoss(), negative log likelihood loss, does not include log softmax
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [11]:
def sentence_to_bow(sentence, word_to_ix=word_to_ix):
        bow_vec = torch.zeros(len(word_to_ix))
        for word in sentence:
            bow_vec[word_to_ix[word.lower()]] = 1
        return bow_vec.view(1, -1)
    
def make_target(label, label_to_ix=label_to_ix):
        return torch.LongTensor([label_to_ix[label]])  # LongTensor is for 64-bit integer

In [12]:
n_epoch = 100

for i in range(n_epoch):
    for data, label in trainset:
        model.zero_grad()
        
        x = sentence_to_bow(data)
        output = model(x)
        target = make_target(label)
        loss = criterion(output, target)
        
        loss.backward()
        optimizer.step()

## Test the network

In [18]:
print("* prediction probability *")
print(f"{'  SPANISH':<12}{' ENGLISH':<11}")
with torch.no_grad():
    for data, label in testset:
        x = sentence_to_bow(data)
        print(torch.exp(model(x)[0]).numpy(), " ".join(data))

* prediction probability *
  SPANISH    ENGLISH   
[0.86731446 0.13268559] Yo creo que si
[0.06164475 0.93835527] it is lost on me
