In [1]:

## ALL IMPORTS FOR A NEW NOTEBOOK

import os, sys, random, math
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sns
import itertools as it
import scipy
import glob
import matplotlib
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader
from torch.optim import Optimizer
import torchvision.transforms.transforms as txf
import torch.optim.lr_scheduler as lr_scheduler
from collections import OrderedDict

from sklearn import metrics
from sklearn import preprocessing as pp
from sklearn import model_selection as ms

import torch_utils
from tqdm.notebook import tqdm_notebook as tqdm
import time

font = {'size'   : 20}

matplotlib.rc('font', **font)


In [2]:
from torchtext import data, datasets
SEED = 947
torch_utils.seed_everything(SEED)

In [3]:
TEXT = data.Field(tokenize="spacy")
LABEL = data.LabelField()

In [4]:
%%time
train_data, test_data = datasets.TREC.splits(TEXT, LABEL, fine_grained=True)
train_data, valid_data = train_data.split(split_ratio=0.8, random_state=random.seed(SEED))

CPU times: user 1.46 s, sys: 51.5 ms, total: 1.52 s
Wall time: 563 ms


In [5]:
len(train_data), len(valid_data), len(test_data)

(4362, 1090, 500)

In [6]:
vars(train_data[5])

{'text': ['What', 'are', 'Bellworts', '?'], 'label': 'DESC:def'}

In [32]:
MAX_VOCAB_SIZE = 25000
TEXT.build_vocab(
    train_data,
    max_size=MAX_VOCAB_SIZE,
    vectors="glove.6B.200d",
    unk_init=torch.Tensor.normal_
)

LABEL.build_vocab(train_data)

In [33]:
LABEL.vocab.stoi

defaultdict(None,
            {'HUM:ind': 0,
             'LOC:other': 1,
             'DESC:def': 2,
             'NUM:count': 3,
             'DESC:desc': 4,
             'DESC:manner': 5,
             'ENTY:other': 6,
             'NUM:date': 7,
             'DESC:reason': 8,
             'ENTY:cremat': 9,
             'HUM:gr': 10,
             'LOC:country': 11,
             'LOC:city': 12,
             'ENTY:animal': 13,
             'ENTY:food': 14,
             'ENTY:dismed': 15,
             'ENTY:termeq': 16,
             'NUM:period': 17,
             'ABBR:exp': 18,
             'NUM:money': 19,
             'LOC:state': 20,
             'ENTY:event': 21,
             'ENTY:sport': 22,
             'NUM:other': 23,
             'HUM:desc': 24,
             'ENTY:product': 25,
             'ENTY:color': 26,
             'ENTY:substance': 27,
             'ENTY:techmeth': 28,
             'NUM:dist': 29,
             'ENTY:word': 30,
             'ENTY:veh': 31,
             

In [34]:
BATCH_SIZE = 128
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_sizes=(BATCH_SIZE, BATCH_SIZE, BATCH_SIZE),
    device=device
)

In [35]:
class Sentimental2DCNN(nn.Module):
    def __init__(self, vocab_size, embed_size, n_filters, filter_sizes, output_dim, dropout, pad_idx):
        super(Sentimental2DCNN, self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embed_size, padding_idx=pad_idx)
        self.convs = nn.ModuleList([
            nn.Conv2d(
                in_channels=1, 
                out_channels=n_filters, 
                kernel_size=(fs, embed_size)
            ) for fs in filter_sizes])
        self.fc = nn.Linear(len(filter_sizes)*n_filters, output_dim)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, text):
        # sent_len x batch
        text = text.permute(1,0)
        # batch x sent_len
        embedded = self.embedding(text)
        # batch x sent_len x embedding_size
        embedded = embedded.unsqueeze(dim=1)
        # batch x 1 x sent_len x embedding_size
        conved = [F.leaky_relu(conv(embedded)).squeeze(dim=3) for conv in self.convs]
        # batch x n_filters x sent_len-filter_size[n]+1
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(dim=2) for conv in conved]
        # batch x n_filters
        cat = torch.cat(pooled, dim=1)
        # batch x n_filters*len(filter_sizes)
        return self.fc(cat)

In [36]:
def categorical_accuracy(preds, y):
    max_preds = preds.argmax(dim=1, keepdim=True)
    correct = max_preds.squeeze(dim=1).eq(y)
    return correct.sum()/torch.FloatTensor([y.shape[0]])

In [37]:
def train(model, iterator, optimizer, criterion):
    l,a = 0,0
    model.train()
    for batch in iterator:
        optimizer.zero_grad()
        preds = model(batch.text)
        loss = criterion(preds, batch.label)
        accr = categorical_accuracy(preds, batch.label)
        loss.backward()
        optimizer.step()
        l+=loss.item()
        a+=accr.item()
    return l/(len(iterator)), a/len(iterator)

In [67]:
def evaluate(model, iterator, criterion):
    l,a = 0,0
    model.eval()
    with torch.no_grad():
        for batch in iterator:
            preds = model(batch.text)
            loss = criterion(preds, batch.label)
            accr = categorical_accuracy(preds, batch.label)
            l+=loss.item()
            a+=accr.item()
    return l/(len(iterator)), a/len(iterator)

In [68]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 200
N_FILTERS = 100
FILTER_SIZES = [2,3,4]
OUTPUT_DIM = len(LABEL.vocab)
DROPOUT = 0.45
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
PRETRAINED_EMBEDDINGS = TEXT.vocab.vectors
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [77]:
model = Sentimental2DCNN(INPUT_DIM,EMBEDDING_DIM,N_FILTERS,FILTER_SIZES,OUTPUT_DIM,DROPOUT,PAD_IDX)
model.embedding.weight.data.copy_(PRETRAINED_EMBEDDINGS)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model = model.to(device)
torch_utils.clear_cuda()

In [78]:
optimizer = optim.Adamax(model.parameters())
criterion = nn.CrossEntropyLoss().to(device)

In [None]:
N_EPOCHS = 200
ea = torch_utils.EarlyStopping(verbose=True, patience=25)

sch = optim.lr_scheduler.ReduceLROnPlateau(optimizer,factor=0.5, patience=3)
history = pd.DataFrame()

for e in range(N_EPOCHS):
    st = time.time()
    tl, ta = train(model, train_iterator, optimizer, criterion)
    vl, va = evaluate(model, valid_iterator, criterion)
    
    history = torch_utils.print_epoch_stat(e, time.time()-st, history, tl, ta, vl, va)
    print("LR: {}".format(torch_utils.get_lr(optimizer)))
    ea(vl, model)
    sch.step(vl)
    if ea.early_stop:
        print("STOPPING EARLY!!")
        break



EPOCH 1 Completed, Time Taken: 0:00:00.541681
	Train Loss 	2.64627338
	Train Accuracy 	36.8973214%
	Valid Loss 	2.18850329
	Valid Accuracy 	44.1287878%
LR: 0.002
Found better solution (inf --> 2.188503).  Saving model ...


EPOCH 2 Completed, Time Taken: 0:00:00.418259
	Train Loss 	1.94859985
	Train Accuracy 	50.1383929%
	Valid Loss 	1.96846618
	Valid Accuracy 	49.2503156%
LR: 0.002
Found better solution (2.188503 --> 1.968466).  Saving model ...


EPOCH 3 Completed, Time Taken: 0:00:00.422086
	Train Loss 	1.64698013
	Train Accuracy 	59.1607143%
	Valid Loss 	1.80062058
	Valid Accuracy 	55.2346382%
LR: 0.002
Found better solution (1.968466 --> 1.800621).  Saving model ...


EPOCH 4 Completed, Time Taken: 0:00:00.418046
	Train Loss 	1.41229984
	Train Accuracy 	66.7767857%
	Valid Loss 	1.6665246
	Valid Accuracy 	57.6704545%
LR: 0.002
Found better solution (1.800621 --> 1.666525).  Saving model ...


EPOCH 5 Completed, Time Taken: 0:00:00.385416
	Train Loss 	1.23387858
	Train Accuracy 	7

In [None]:
l, a = evaluate(model, test_iterator, criterion)
print(l, 100.0*a)

In [None]:
model.load_state_dict(torch.load("checkpoint.pt", map_location=device))

In [None]:
l, a = evaluate(model, test_iterator, criterion)
print(l, 100.0*a)

In [None]:
ax = history["train_loss"].plot()
history["valid_loss"].plot(ax=ax)
plt.legend()

In [None]:
ax = history["train_accuracy"].plot()
history["valid_accuracy"].plot(ax=ax)
plt.legend()