# VDCNN Implementation In Torch  

In [1]:
# importing required libraries
import torch 
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchtext import datasets
from torchtext import data
import nltk
import pyprind
import math

import re
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from string import punctuation
from sklearn.metrics import roc_auc_score

In [2]:
SENT_LENGTH = 1024

In [3]:
def tokenizer(text):
    s = 'abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:’"/|_#$%ˆ&*˜‘+=<>()[]{} '
    return [l for l in list(text.lower()) if l in s]

In [4]:
text_field = data.Field(
    sequential=True,
    use_vocab=True,
#     init_token="<ios>",
#     eos_token="<eos>",
    fix_length=SENT_LENGTH,
    tokenize=tokenizer,
    batch_first=True
)
label_field = data.Field(
    sequential=False,
    use_vocab=False,
    is_target=True
)

In [5]:
csv_fields = [
    ("id", None),
    ("comment_text", text_field),
    ("toxic", label_field),
    ("severe_toxic", label_field), ("threat", label_field),
    ("obscene", label_field), ("insult", label_field),
    ("identity_hate", label_field)
]

In [6]:
trainds, valds = data.TabularDataset.splits(
    path="data/toxic_competition_data/",
    format="csv",
    train="train_torch.csv",
    validation="test_torch.csv",
    fields=csv_fields,
    skip_header=True
)

In [7]:
len(trainds), len(valds)

(143613, 15958)

In [9]:
print("Sentence/phrase ----------------- Label")
for i in range(10):
    print("{} ------------- {}, {}, {}, {}, {}, {}".format("".join(trainds.examples[i].comment_text),
                                       trainds.examples[i].toxic, trainds.examples[i].insult, trainds.examples[i].identity_hate,
                                      trainds.examples[i].obscene, trainds.examples[i].severe_toxic, trainds.examples[i].threat))
    break

Sentence/phrase ----------------- Label
hello, thanks message. per wp:noneng, sources languages english also used. let check last source mention, supports state ownership claim, ill add article. regards.  ------------- 0, 0, 0, 0, 0, 0


In [12]:
text_field.build_vocab(trainds)
label_field.build_vocab(trainds)

In [17]:
text_field.vocab.freqs.most_common(10)

[(' ', 5183323),
 ('e', 3987297),
 ('i', 2626000),
 ('a', 2492015),
 ('t', 2366524),
 ('s', 2201455),
 ('n', 2166833),
 ('o', 2041735),
 ('r', 2019890),
 ('l', 1648374)]

In [18]:
BATCH_SIZE = 32

In [21]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

traindl, valdl = data.BucketIterator.splits(
    datasets=(trainds, valds),
    batch_sizes=(BATCH_SIZE, BATCH_SIZE),
    sort_key= lambda x: x.comment_text,
    repeat=False,
    device=device
)
len(traindl), len(valdl)

cuda


(4488, 499)

In [22]:
batch = next(traindl.__iter__()); batch


[torchtext.data.batch.Batch of size 32]
	[.comment_text]:[torch.cuda.LongTensor of size 32x1024 (GPU 0)]
	[.toxic]:[torch.cuda.LongTensor of size 32 (GPU 0)]
	[.severe_toxic]:[torch.cuda.LongTensor of size 32 (GPU 0)]
	[.threat]:[torch.cuda.LongTensor of size 32 (GPU 0)]
	[.obscene]:[torch.cuda.LongTensor of size 32 (GPU 0)]
	[.insult]:[torch.cuda.LongTensor of size 32 (GPU 0)]
	[.identity_hate]:[torch.cuda.LongTensor of size 32 (GPU 0)]

In [23]:
class BatchWrapper:
    def __init__(self, dl, x_var, y_vars):
        self.dl, self.x_var, self.y_vars = dl, x_var, y_vars # we pass in the list of attributes for x and y
    
    def __iter__(self):
        for batch in self.dl:
            x = getattr(batch, self.x_var) # we assume only one input in this wrapper
            
            if self.y_vars is not None: # we will concatenate y into a single tensor
                y = torch.cat([getattr(batch, feat).unsqueeze(1) for feat in self.y_vars], dim=1).float()
            else:
                y = torch.zeros((1))

            yield (x, y)
    
    def __len__(self):
        return len(self.dl)

In [24]:
train_dl = BatchWrapper(traindl, "comment_text", ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"])
valid_dl = BatchWrapper(valdl, "comment_text", ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"])

In [25]:
next(train_dl.__iter__())

(tensor([[27,  2, 12,  ...,  1,  1,  1],
         [23,  3,  6,  ...,  1,  1,  1],
         [ 6, 10, 20,  ...,  1,  1,  1],
         ...,
         [12, 11,  5,  ...,  1,  1,  1],
         [ 7, 15, 10,  ...,  1,  1,  1],
         [27,  2, 22,  ...,  1,  1,  1]], device='cuda:0'),
 tensor([[0., 0., 0., 0., 0., 0.],
         [1., 1., 0., 1., 1., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [1., 0., 0., 1., 1., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0.,

In [1]:
class ConvolutionalBlockRes(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding=1, shortcut=False, pool_type="max_pool"):
        super().__init__()
        self.shortcut = shortcut
        self.pool_type = pool_type
        self.conv_1 = nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, padding=1)
        self.batch_norm_1 = nn.BatchNorm1d(out_channels)
        self.conv_2 = nn.Conv1d(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size, padding=1)
        self.batch_norm_2 = nn.BatchNorm1d(out_channels)

        if shortcut is True:
            self.conv_res = nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=2)
            self.batch_norm_res = nn.BatchNorm1d(out_channels)
    
    def forward(self, x):
        out = self.conv_1(x)
        out = F.relu(self.batch_norm_1(out))

        out = self.conv_2(out)
        out = F.relu(self.batch_norm_2(out))
        
        # downsampled
        if self.pool_type == "k_max":
            k_ = math.ceil(out.shape[2]/2.0)
            out = downsample_k_max_pool(out, k=k_, dim=2)[0]
        else:
            out = downsample_max_pool(out, 3, 2)

        if self.shortcut is True:
            residual = self.conv_res(x)
            residual = F.relu(self.batch_norm_res(residual))
            out = out + residual
        return out

class ConvolutionalIdentityBlock(nn.Module):
    def __init__(self, in_channels, kernel_size, padding=1, shortcut=False):
        super().__init__()
        
        self.shortcut = shortcut
        self.conv_1 = nn.Conv1d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, padding=1)
        self.batch_norm_1 = nn.BatchNorm1d(in_channels)
        self.conv_2 = nn.Conv1d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, padding=1)
        self.batch_norm_2 = nn.BatchNorm1d(in_channels)
    
    def forward(self, x):
        out = self.conv_1(x)
        out = F.relu(self.batch_norm_1(out))

        out = self.conv_2(out)
        out = F.relu(self.batch_norm_2(out))

        if self.shortcut is True:
            out = out + x
        else:
            out = out

        return out

def downsample_max_pool(x, kernel_size, stride):
    pool = nn.MaxPool1d(kernel_size=kernel_size, stride=stride, padding=1)
    return pool(x)


def downsample_k_max_pool(inp, k, dim):
    return inp.topk(k, dim)

NameError: name 'nn' is not defined

In [31]:
class VDCNN(nn.Module):
    def __init__(self, embedding_dim, vocab_size, n_classes):
        super().__init__()
        self.embedding = nn.Embedding(embedding_dim=embedding_dim, num_embeddings=vocab_size)
        
        self.conv_64 = nn.Conv1d(in_channels=embedding_dim, out_channels=64, kernel_size=3, padding=1)
        
        self.id_64 = ConvolutionalIdentityBlock(64, kernel_size=3, padding=1, shortcut=False)
        
        self.res_128 = ConvolutionalBlockRes(in_channels=64, out_channels=128, kernel_size=3, padding=1, shortcut=False, pool_type="k_max")
        
        self.id_128 = ConvolutionalIdentityBlock(128, kernel_size=3, padding=1, shortcut=False)
        
        self.res_256 = ConvolutionalBlockRes(in_channels=128, out_channels=256, kernel_size=3, padding=1, shortcut=False, pool_type="k_max")

        self.id_256 = ConvolutionalIdentityBlock(256, kernel_size=3, padding=1, shortcut=False)
        
        self.res_512 = ConvolutionalBlockRes(in_channels=256, out_channels=512, kernel_size=3, padding=1, shortcut=False, pool_type="k_max")
        
        self.id_512 = ConvolutionalIdentityBlock(512, kernel_size=3, padding=1, shortcut=False)
        
        self.linear_1 = nn.Linear(8*512, 2048)
        self.linear_2 = nn.Linear(2048, 2048)
        self.linear_3 = nn.Linear(2048, n_classes)
        
    def forward(self, inp):
        
        # [batch_size, sent_length]
        embedded = self.embedding(inp)
#         print(embedded.shape)
        
        # [batch_size, sent_lenght, emb_dim]
        embedded = embedded.permute(0, 2, 1)
#         print(embedded.shape)
        
        # [batch_size, emb_dim, sent_length]
        out = self.conv_64(embedded)
#         print(out.shape)
        
        # [batch_size, 64, sent_length]
        out = self.id_64(out)
#         print(out.shape)
        
#         # [batch_size, 64, sent_length]
        out = self.res_128(out)
#         print(out.shape)
        
#         # [batch_size, 128, sent_length/2]
        out = self.id_128(out)
#         print(out.shape)
        
#         # [batch_size, 128, sent_length/2]
        out = self.res_256(out)
#         print(out.shape)
        
#         # [batch_size, 256, sent_length/4]
        out = self.id_256(out)
#         print(out.shape)
        
#         # [batch_size, 256, sent_length/4]
        out = self.res_512(out)
#         print(out.shape)
        
#         # [batch_size, 512, sent_length/8]
        out = self.id_512(out)
#         print(out.shape)
        
#         # [batch_size, 512, sent_length/8]
        out = downsample_k_max_pool(out, k=8, dim=2)[0]
#         return k_max_pooled
#         print(out.shape)
        
        out = out.reshape(out.shape[0], -1)
#         print(out.shape)
        # [batch_size, 512, 8]
        out = self.linear_1(out)
        
        # [batch_size, 4096]
        out = self.linear_2(out)
#         print(out.shape)

        # [batch_size, 512, 2048
        out = self.linear_3(out)
#         print(out.shape)
        
        # [batch_size, n_class]
        return out

In [32]:
embedding_dim = 16
vocab_size = len(text_field.vocab.stoi)
n_classes = 6

model = VDCNN(embedding_dim, vocab_size, n_classes)

In [33]:
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print("Number of trainable parameters in the model are : {}".format(params))

Number of trainable parameters in the model are : 16251910


In [21]:
criterion = nn.BCEWithLogitsLoss()

optimizer = optim.Adam(model.parameters(), lr = 0.01)

model.to(device)
criterion.to(device)

BCEWithLogitsLoss()

In [22]:
print(model)

VDCNN(
  (embedding): Embedding(68, 16)
  (conv_64): Conv1d(16, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (id_64): ConvolutionalIdentityBlock(
    (conv_1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (batch_norm_1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv_2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (batch_norm_2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (res_128): ConvolutionalBlockRes(
    (conv_1): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
    (batch_norm_1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv_2): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
    (batch_norm_2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv_res): Conv1d(64, 128, kernel_size=(1,), stride=(2,))
    (batch_norm_res): BatchNorm1d(128, eps=1

In [23]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """
    preds = torch.round(torch.sigmoid(preds))
    correct = (preds == y).float()
    acc = correct.sum()/float(len(correct))
    return acc / 6

def roc_auc_score_FIXED(y_true, y_pred):
    if len(np.unique(y_true)) == 1: # bug in roc_auc_score
        return 0.5
    return roc_auc_score(y_true, y_pred)

def get_avg_roc_value(y, output):
    out = torch.sigmoid(output)
    out = out.cpu().detach().numpy()
    y = y.cpu().detach().numpy()
    
    # dividing the predictions according to the siz classes
    roc_list = []
    for i in range(6):
        roc = roc_auc_score_FIXED(y[:, i], out[:, i])
        roc_list.append(roc)
    
    # average 
    return sum(roc_list) / float(6)

def get_avg_roc_value_2(y_fin, output_fin):
    n = len(y_fin)
    out_list = [[], [], [], [], [], []]
    y_list = [[], [], [], [], [], []]
    for i in range(n):
        for j in range(6):
            out_list[j].extend(list(output_fin[i][:, j]))
            y_list[j].extend(list(y_fin[i][:, j]))
            
    roc_list = []
    for i in range(6):
        roc_list.append(roc_auc_score_FIXED(y_list[i], out_list[i]))
    
    return sum(roc_list) / 6

In [29]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    epoch_roc = 0
    all_y = []
    all_out_list = []
    
    model.train()
    bar = pyprind.ProgBar(len(iterator), bar_char='█')
    for x, y in iterator:
        optimizer.zero_grad()
        outputs = model(x).squeeze(1)
        loss = criterion(outputs, y)
        acc = binary_accuracy(outputs, y)
        roc = get_avg_roc_value(y, outputs)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        epoch_roc += roc
        
        all_out_list.append(torch.sigmoid(outputs).cpu().detach().numpy())
        all_y.append(y.cpu().detach().numpy())
        
        bar.update()
    roc_main = get_avg_roc_value_2(all_y, all_out_list)
    return epoch_loss / len(iterator), epoch_acc / len(iterator), epoch_roc / len(iterator), roc_main

In [30]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    epoch_roc = 0
    
    all_y = []
    all_out_list = []
    model.eval()
    
    with torch.no_grad():
        bar = pyprind.ProgBar(len(iterator), bar_char='█')
        for x, y in iterator:
            outputs = model(x).squeeze(1)
            loss = criterion(outputs, y)
            acc = binary_accuracy(outputs, y)
            roc = get_avg_roc_value(y, outputs)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
            epoch_roc += roc
            
            all_out_list.append(torch.sigmoid(outputs).cpu().detach().numpy())
            all_y.append(y.cpu().detach().numpy())
            
            bar.update()
    roc_main = get_avg_roc_value_2(all_y, all_out_list)
    return epoch_loss / len(iterator), epoch_acc / len(iterator), epoch_roc / len(iterator), roc_main

In [31]:
# import math

# def sigmoid(x):
#     return 1 / (1 + math.exp(-x))

In [32]:
MODEL_PATH = "data/vdcnn_toxic_model.tar"
def save_checkpoint(state, is_best, filename):
    """Save checkpoint if a new best is achieved"""
    if is_best:
        print ("=> Saving a new best")
        torch.save(state, filename)  # save checkpoint
    else:
        print ("=> Validation loss did not improve")
    return

In [33]:
N_EPOCHS = 1
base_dev_acc = 0.0
for epoch in range(N_EPOCHS):

    train_loss, train_acc, train_roc, train_roc_main = train(model, train_dl, optimizer, criterion)
    valid_loss, valid_acc, valid_roc, valid_roc_main = evaluate(model, valid_dl, criterion)
    
    print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train ROC: {train_roc*100:.2f} | Train Acc: {train_acc*100:.2f}%')
    print(f'| Epoch: {epoch+1:02} | Val. Loss: {valid_loss:.3f} | Val. ROC: {valid_roc*100:.2f} | Val. Acc: {valid_acc*100:.2f}% |')
    print(f'| Train Main ROC: {train_roc_main*100:.2f} | Val. Main ROC: {valid_roc_main*100:.2f} ')
    is_best = False
    if base_dev_acc < valid_acc:
        is_best = True,
        base_dev_acc = valid_acc
    
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'best_loss': valid_loss,
        'best_dev_accuracy': valid_acc
    }, is_best, MODEL_PATH)

0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 01:26:29
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:04:53


| Epoch: 01 | Train Loss: 9.366 | Train ROC: 56.68 | Train Acc: 95.94%
| Epoch: 01 | Val. Loss: 0.138 | Val. ROC: 54.31 | Val. Acc: 96.38% |
| Train Main ROC: 58.06 | Val. Main ROC: 67.10 
=> Saving a new best


In [None]:
test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'| Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}% |')

In [78]:
def predict_sentiment(sentence):
    tokenized = tokenize(sentence)
    indexed = [TEXT.vocab.stoi[t] for t in tokenized]
    tensor = torch.LongTensor(indexed).to(device)
    
    tensor = tensor.unsqueeze(1)
#     print(tensor.shape)
    prediction = model(tensor).squeeze(1)
#     print(prediction)
    preds, ind= torch.round(torch.sigmoid(tensor))
#     print(preds)
    return preds, ind

In [79]:
# text = "My voice range is A2-C5. My chest voice goes up to F4. Included sample in my higher chest range. What is my voice type?"
# predict_sentiment(text)