<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports-&amp;-Inits" data-toc-modified-id="Imports-&amp;-Inits-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports &amp; Inits</a></span></li><li><span><a href="#Data-&amp;-Model" data-toc-modified-id="Data-&amp;-Model-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Data &amp; Model</a></span></li><li><span><a href="#Going-through-the-model" data-toc-modified-id="Going-through-the-model-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Going through the model</a></span></li></ul></div>

# AG News Classifier with ConvNet
Classifier to classify news titles into categories.

## Imports & Inits

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pdb
import pandas as pd
import numpy as np
import torch

from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.utils.data import DataLoader
from pathlib import Path

from ignite.engine import Events, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss
from ignite.contrib.handlers import ProgressBar

In [3]:
from consts import consts
from ag.data import NewsDataset, DataContainer
consts

Namespace(batch_size=256, checkpointer_name='classifier', checkpointer_prefix='cbow', cw_file=PosixPath('../data/ag_news/work_dir/class_weights.pth'), device='cuda:3', dropout_p=0.1, early_stopping_criteria=5, embedding_size=100, glove_path=PosixPath('../pretrained_path/glove6B/glove.6B.100d.txt'), hidden_dim=100, learning_rate=0.001, metric_file=PosixPath('../data/ag_news/work_dir/metrics.csv'), model_dir=PosixPath('../data/ag_news/work_dir/models'), n_channels=100, num_epochs=100, path=PosixPath('../data/ag_news'), proc_dataset_csv=PosixPath('../data/ag_news/news_with_splits.csv'), save_every=2, save_total=5, use_glove=False, vectorizer_file=PosixPath('../data/ag_news/work_dir/vectorizer.json'), work_dir=PosixPath('../data/ag_news/work_dir'))

## Data & Model

In [4]:
df = pd.read_csv(consts.proc_dataset_csv)
df.head()

Unnamed: 0,category,split,title
0,Business,train,"Jobs, tax cuts key issues for Bush"
1,Business,train,Jarden Buying Mr. Coffee #39;s Maker
2,Business,train,Retail sales show festive fervour
3,Business,train,Intervoice's Customers Come Calling
4,Business,train,Boeing Expects Air Force Contract


In [5]:
dc = DataContainer(df, NewsDataset, consts.vectorizer_file, consts.batch_size, is_load=True)

try:
  class_weights = torch.load(consts.cw_file)
except FileNotFoundError:
  cat_vocab = dc.cat_vocab
  class_counts = df['category'].value_counts().to_dict()
  sorted_counts = sorted(class_counts.items(), key=lambda x: cat_vocab.lookup_token(x[0]))
  freq = [count for _, count in sorted_counts]
  class_weights = 1.0/torch.tensor(freq, dtype=torch.float32)
  torch.save(class_weights, consts.cw_file)  

In [None]:
class NewsClassifier(nn.Module):
  def __init__(self, emb_sz, vocab_size, n_channels, hidden_dim, n_classes, dropout_p,
               pretrained=None, freeze_pretrained=False, padding_idx=0):
    super(NewsClassifier, self).__init__()
    
    if pretrained:
      pretrained_emb = torch.from_numpy(pretrained).float()
      self.emb = nn.Embedding(vocab_size, emb_size, padding_idx, _weight=pretrained_emb)
      if freeze_pretrained:
        self.emb.weight.requires_grad = False
    else:
      self.emb = nn.Embedding(vocab_size, emb_sz, padding_idx)
      
    self.convnet = nn.Sequential(
      nn.Conv1d(in_channels=emb_sz, out_channels=n_channels, kernel_size=3),
      nn.ELU(),
      nn.Conv1d(in_channels=n_channels, out_channels=n_channels, kernel_size=3, stride=2),
      nn.ELU(),
      nn.Conv1d(in_channels=n_channels, out_channels=n_channels, kernel_size=3, stride=2),
      nn.ELU(),
      nn.Conv1d(in_channels=n_channels, out_channels=n_channels, kernel_size=3),
      nn.ELU()
    )
    
    self.dropout = nn.Dropout(p=dropout_p)
    self.relu = nn.ReLU()
    self.fc1 = nn.Linear(in_features=n_channels, out_features=hidden_dim)
    self.fc2 = nn.Linear(in_features=hidden_dim, out_features=n_classes)
    self.softmax = nn.Softmax(dim=1)
    
  def forward(self, x_in, apply_softmax=False):
    # embed and permute so features are channels
    # conv1d (batch, channels, input)
    pdb.set_trace()
    x_emb = self.emb(x_in).permute(0,2,1)
    features = self.convnet(x_emb)
    
    # average and remove extra dimension
    remaining_size = features.size(dim=2)
    features = F.avg_pool1d(features, remaining_size).squeeze(dim=2)
    features = self.dropout(features)
    
    # mlp classifier
    hidden_vector = self.fc1(features)
    hidden_vector = self.dropout(hidden_vector)
    hidden_vector = self.relu(hidden_vector)
    prediction_vector = self.fc2(hidden_vector)
    
    if apply_softmax:
      prediction_vector = self.softmax(prediction_vector)
      
    return prediction_vector

In [None]:
classifier = NewsClassifier(consts.embedding_size, dc.vocab_size, consts.n_channels, consts.hidden_dim,
                            dc.n_cats, consts.dropout_p)
loss_fn = nn.CrossEntropyLoss(class_weights)
classifier

In [None]:
x,y = next(itr)
y_pred = classifier(x)
loss_fn(y_pred,y)

## Going through the model

In [6]:
emb = nn.Embedding(dc.vocab_size, consts.embedding_size)
conv1s = nn.Sequential(
      nn.Conv1d(in_channels=consts.embedding_size, out_channels=consts.n_channels, kernel_size=3, padding=1),
      nn.ELU())
conv2s = nn.Sequential(nn.Conv1d(in_channels=consts.n_channels, out_channels=consts.n_channels,
         kernel_size=3, stride=2), nn.ELU())
dropout = nn.Dropout(p=consts.dropout_p)
relu = nn.ReLU()
fc1 = nn.Linear(in_features=consts.n_channels, out_features=consts.hidden_dim)
fc2 = nn.Linear(in_features=consts.hidden_dim, out_features=dc.n_cats)
softmax = nn.Softmax(dim=1)

In [7]:
itr = iter(dc.train_dl)

In [8]:
x,y = next(itr)
print(x.shape, y.shape)

torch.Size([256, 20]) torch.Size([256])


In [9]:
print(emb)
t = emb(x)
print(t.shape)
t = t.permute(0,2,1)
print(t.shape)

Embedding(3566, 100)
torch.Size([256, 20, 100])
torch.Size([256, 100, 20])


In [10]:
print(conv1s)
t = conv1s(t)
print(t.shape)

Sequential(
  (0): Conv1d(100, 100, kernel_size=(3,), stride=(1,), padding=(1,))
  (1): ELU(alpha=1.0)
)
torch.Size([256, 100, 20])


In [11]:
print(conv2s)
t = conv2s(t)
print(t.shape)

Sequential(
  (0): Conv1d(100, 100, kernel_size=(3,), stride=(2,))
  (1): ELU(alpha=1.0)
)
torch.Size([256, 100, 9])


In [12]:
print(conv2s)
t = conv2s(t)
print(t.shape)

Sequential(
  (0): Conv1d(100, 100, kernel_size=(3,), stride=(2,))
  (1): ELU(alpha=1.0)
)
torch.Size([256, 100, 4])


In [13]:
print(conv1s)
t = conv1s(t)
print(t.shape)

Sequential(
  (0): Conv1d(100, 100, kernel_size=(3,), stride=(1,), padding=(1,))
  (1): ELU(alpha=1.0)
)
torch.Size([256, 100, 4])


In [14]:
p = F.avg_pool1d(t, 3)
print(p.shape)
torch.all(p==t)

torch.Size([256, 100, 1])


tensor(0, dtype=torch.uint8)

In [15]:
p = p.squeeze(dim=2)
print(p.shape)

torch.Size([256, 100])


In [16]:
print(fc1)
p = fc1(p)
p = dropout(p)
print(p.shape)

Linear(in_features=100, out_features=100, bias=True)
torch.Size([256, 100])


In [17]:
print(fc2)
p = fc2(p)
print(p.shape)

Linear(in_features=100, out_features=4, bias=True)
torch.Size([256, 4])
