[View in Colaboratory](https://colab.research.google.com/github/raymondhs/pytorch-category-suggestion/blob/master/category.ipynb)

# Deep Learning with PyTorch

## Installation

In [1]:
!pip3 install torch torchtext



In [0]:
! [ -d pytorch-category-suggestion ] || git clone --recursive https://github.com/raymondhs/pytorch-category-suggestion/
! [ -d data ] || cp -r pytorch-category-suggestion/data .

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

from torchtext import data


torch.manual_seed(1)

<torch._C.Generator at 0x7f104733a7b0>

## Data Processing



*   Define Field objects corresponding to Product Title and Category



In [0]:
TITLE = data.Field()
LABEL = data.LabelField()

* Load datasets

In [0]:
train, test = data.TabularDataset.splits(
    path='data/category', format='csv',
    train='train.csv', validation='test.csv',
    fields=[('title', TITLE), ('label', LABEL)])

In [6]:
print('# train', len(train))
print('# test', len(test))
print(train[99].title, '=>', train[99].label)

# train 1200
# test 300
['Combat', 'Trousers', 'Military', 'Pants', 'Men', 'SWAT', 'Army'] => Men Clothes


* Build vocabulary

In [0]:
TITLE.build_vocab(train)
LABEL.build_vocab(train)

In [8]:
print("# unique words", len(TITLE.vocab))
print("# labels", len(LABEL.vocab))

# unique words 1493
# labels 2


In [9]:
TITLE.vocab.freqs.most_common(10)

[('Bag', 369),
 ('Fashion', 219),
 ('Men', 203),
 ('Women', 197),
 ('Shoulder', 176),
 ('Backpack', 131),
 ('Long', 125),
 ('Casual', 107),
 ('Mens', 98),
 ('bag', 85)]

* Create dataset iterator

In [0]:
train_iter, test_iter = data.Iterator.splits(
    (train, test), batch_sizes=(1, 1),
    repeat=False, sort=False)

In [11]:
next(iter(train_iter))


[torchtext.data.batch.Batch of size 1]
	[.title]:[torch.LongTensor of size 6x1]
	[.label]:[torch.LongTensor of size 1]

## FastText

Let's implement FastText in PyTorch.

Joulin, A., Grave, E., Bojanowski, P., & Mikolov, T. (2016). Bag of tricks for efficient text classification. arXiv preprint arXiv:1607.01759.

![fasttext](https://github.com/raymondhs/pytorch-category-suggestion/raw/master/fig/fasttext.png)

In [0]:
class FastTextClassifier(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, output_dim):
        # Calls the init function of nn.Module.
        super().__init__()
        
        # Stores a lookup table for each word
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        # Creates a linear projection (y = Ax + b)
        self.linear = nn.Linear(embedding_dim, output_dim)
        
        # LogSoftmax layer: Transform scores to log-probabilities
        self.log_softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, x):
        # Shape of x is (num_words, batch_size)
        # .t() will transpose x to (batch_size, num_words)
        # Output of nn.Embedding is
        # (batch_size, num_words, embedding_dim)
        embedded = self.embedding(x.t())
        
        # Average the word vectors!
        # dim=1 corresponds to word position
        mean_vec = torch.mean(embedded, dim=1)
        
        # Pass the input through the linear layer,
        # then pass that through log_softmax.
        return self.log_softmax(self.linear(mean_vec))

## Training

* Create an instance of our FastText module

In [0]:
vocab_size = len(TITLE.vocab)
# This is a hyperparameter:
# The size of the word vectors
hidden_size = 20
num_labels = len(LABEL.vocab)

model = FastTextClassifier(vocab_size,
                           hidden_size,
                           num_labels)

* Define a training objective:
  * Minimize a loss function called the negative log-likelihood (NLL)


In [0]:
loss_function = nn.NLLLoss()

* Define an optimizer:
  * Stochastic gradient descent (SGD)
  * Hyperparameter: Learning Rate

In [0]:
optimizer = optim.SGD(model.parameters(), lr=0.1)

* Let's train for 100 epochs!

In [16]:
# Usually you want to pass over the training data several times.
# 100 is much bigger than on a real data set, but real datasets have more than
# two instances.  Usually, somewhere between 5 and 30 epochs is reasonable.

# Train for 100 epochs
# 1 epoch = 1 full pass of training data
for epoch in range(100):
    total_loss = 0
    for instance in train_iter:
        # Step 1. Remember that PyTorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Run our forward pass.
        log_probs = model(instance.title)

        # Step 3. Compute the loss, gradients, and
        # update the parameters by calling optimizer.step()
        loss = loss_function(log_probs, instance.label)
        loss.backward()
        optimizer.step()
        
        total_loss += loss
    
    if epoch % 10 == 0:
        print("loss at epoch {} = {:.4f}".format(epoch, total_loss))

print("final loss = {:.4f}".format(total_loss))

loss at epoch 0 = 497.9745
loss at epoch 10 = 38.0995
loss at epoch 20 = 6.7457
loss at epoch 30 = 2.5938
loss at epoch 40 = 1.5151
loss at epoch 50 = 1.0455
loss at epoch 60 = 0.7884
loss at epoch 70 = 0.6276
loss at epoch 80 = 0.5184
loss at epoch 90 = 0.4399
final loss = 0.3863


## Inference

In [17]:
with torch.no_grad():
    num_correct = 0
    for instance in test_iter:
        log_probs = model(instance.title)
        prediction = torch.argmax(log_probs, dim=1)
        if (prediction == instance.label):
            num_correct += 1
    accuracy = num_correct*100/len(test)
    print("Acc.: {:.2f}%".format(accuracy))

Acc.: 94.67%


  return Variable(arr, volatile=not train)
