# VDCNN Implementation In Torch  

In [1]:
# importing required libraries
import torch 
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchtext import datasets
from torchtext import data
import nltk
import pyprind
import math

import re
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from string import punctuation
from sklearn.metrics import roc_auc_score

In [2]:
torch.cuda.empty_cache()

In [3]:
SENT_LENGTH = 1024

In [4]:
def tokenizer(text):
    s = 'abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:’"/|_#$%ˆ&*˜‘+=<>()[]{} '
    return [l for l in list(text.lower()) if l in s]

In [5]:
text_field = data.Field(
    sequential=True,
    use_vocab=True,
#     init_token="<ios>",
#     eos_token="<eos>",
    fix_length=SENT_LENGTH,
    tokenize=tokenizer,
    batch_first=True
)
label_field = data.Field(
    sequential=False,
    use_vocab=False,
    is_target=True,
    dtype=torch.float
)

In [6]:
csv_fields = [
    ("id", None),
    ("comment_text", text_field),
    ("toxic", label_field),
    ("severe_toxic", None), ("threat", None),
    ("obscene", None), ("insult", None),
    ("identity_hate", None)
]

In [7]:
trainds, valds = data.TabularDataset.splits(
    path="data/toxic_competition_data/",
    format="csv",
    train="train_torch.csv",
    validation="test_torch.csv",
    fields=csv_fields,
    skip_header=True
)

In [8]:
len(trainds), len(valds)

(143613, 15958)

In [9]:
trainds.examples[1].toxic

'0'

In [10]:
print("Sentence/phrase ----------------- Label")
for i in range(10):
    print("{} ------------- {}".format("".join(trainds.examples[i].comment_text), trainds.examples[i].toxic))
    break

Sentence/phrase ----------------- Label
hey isambard, thanks comment. see section oed, merriam-webster dictionary.com less agree definition. right cite article - ill that. thanks again. ------------- 0


In [11]:
text_field.build_vocab(trainds)
label_field.build_vocab(trainds)

In [12]:
text_field.vocab.freqs.most_common(10)

[(' ', 5179297),
 ('e', 3978663),
 ('i', 2621386),
 ('a', 2491486),
 ('t', 2362920),
 ('s', 2196218),
 ('n', 2164892),
 ('o', 2040191),
 ('r', 2016075),
 ('l', 1646713)]

In [13]:
BATCH_SIZE = 64

In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

traindl, valdl = data.BucketIterator.splits(
    datasets=(trainds, valds),
    batch_sizes=(BATCH_SIZE, BATCH_SIZE),
    sort_key= lambda x: x.comment_text,
    repeat=False,
    device=device
)
len(traindl), len(valdl)

cuda


(2244, 250)

In [15]:
batch = next(traindl.__iter__()); batch

    Found GPU0 GeForce GT 755M which is of cuda capability 3.0.
    PyTorch no longer supports this GPU because it is too old.
    



[torchtext.data.batch.Batch of size 64]
	[.comment_text]:[torch.cuda.LongTensor of size 64x1024 (GPU 0)]
	[.toxic]:[torch.cuda.FloatTensor of size 64 (GPU 0)]

In [16]:
class BatchWrapper:
    def __init__(self, dl, x_var, y_vars):
        self.dl, self.x_var, self.y_vars = dl, x_var, y_vars # we pass in the list of attributes for x and y
    
    def __iter__(self):
        for batch in self.dl:
            x = getattr(batch, self.x_var) # we assume only one input in this wrapper
            
            y = getattr(batch, self.y_vars)
#             if self.y_vars is not None: # we will concatenate y into a single tensor
#                 y = torch.cat([getattr(batch, feat).unsqueeze(1) for feat in self.y_vars], dim=1).float()
#             else:
#                 y = torch.zeros((1))

            yield (x, y)
    
    def __len__(self):
        return len(self.dl)

In [17]:
train_dl = BatchWrapper(traindl, "comment_text", "toxic")
valid_dl = BatchWrapper(valdl, "comment_text", "toxic")

In [18]:
next(train_dl.__iter__())

(tensor([[18,  5, 18,  ...,  1,  1,  1],
         [18,  4,  2,  ...,  1,  1,  1],
         [35,  4, 17,  ...,  1,  1,  1],
         ...,
         [16,  9, 30,  ...,  1,  1,  1],
         [ 8,  9,  6,  ...,  1,  1,  1],
         [ 5, 10,  6,  ...,  1,  1,  1]], device='cuda:0'),
 tensor([0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
         0., 1., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0'))

In [19]:
class ConvolutionalBlockRes(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding=1, shortcut=False, pool_type="max_pool"):
        super().__init__()
        self.shortcut = shortcut
        self.pool_type = pool_type
        
        self.conv_1 = nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, padding=1)
        self.batch_norm_1 = nn.BatchNorm1d(out_channels)

        self.conv_2 = nn.Conv1d(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size, padding=1)
        self.batch_norm_2 = nn.BatchNorm1d(out_channels)

        if shortcut is True:
            self.conv_res = nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=2)
            self.batch_norm_res = nn.BatchNorm1d(out_channels)
    
    def forward(self, x):
        out_1 = self.conv_1(x)
        out_1 = F.relu(self.batch_norm_1(out_1))

        out = self.conv_2(out_1)
        out = F.relu(self.batch_norm_2(out))

        if self.shortcut is False:
            return out
        else:
            residual = self.conv_res(x)
            residual = F.relu(self.batch_norm_res(residual))
            if self.pool_type == "k_max":
                k_ = math.ceil(out.shape[2]/2.0)
                downsampled = downsample_k_max_pool(out, k=k_, dim=2)[0]
            else:
                downsampled = downsample_max_pool(out, 3, 2)
            out = downsampled + residual
            return out

class ConvolutionalIdentityBlock(nn.Module):
    def __init__(self, in_channels, kernel_size, padding=1, shortcut=False):
        super().__init__()
        
        self.shortcut = shortcut
        
        self.conv_1 = nn.Conv1d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, padding=1)
        self.batch_norm_1 = nn.BatchNorm1d(in_channels)
        
        self.conv_2 = nn.Conv1d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, padding=1)
        self.batch_norm_2 = nn.BatchNorm1d(in_channels)
    
    def forward(self, x):
        out_1 = self.conv_1(x)
        out_1 = F.relu(self.batch_norm_1(out_1))

        out = self.conv_2(out_1)
        out = F.relu(self.batch_norm_2(out))

        if self.shortcut is True:
            out = out + x
        else:
            out = out

        return out

def downsample_max_pool(x, kernel_size, stride):
    pool = nn.MaxPool1d(kernel_size=kernel_size, stride=stride, padding=1)
    return pool(x)


def downsample_k_max_pool(inp, k, dim):
    return inp.topk(k, dim)

In [20]:
class VDCNN(nn.Module):
    def __init__(self, embedding_dim, vocab_size, n_classes):
        super().__init__()
        self.embedding = nn.Embedding(embedding_dim=embedding_dim, num_embeddings=vocab_size)
        
        self.conv_64 = nn.Conv1d(in_channels=embedding_dim, out_channels=64, kernel_size=3, padding=1)
        
        self.id_64 = ConvolutionalIdentityBlock(64, kernel_size=3, padding=1, shortcut=False)
        
#         self.res_128 = ConvolutionalBlockRes(in_channels=64, out_channels=128, kernel_size=3, padding=1, shortcut=False, pool_type="max_pool")
        
#         self.id_128 = ConvolutionalIdentityBlock(128, kernel_size=3, padding=1, shortcut=False)
        
#         self.res_256 = ConvolutionalBlockRes(in_channels=128, out_channels=256, kernel_size=3, padding=1, shortcut=True, pool_type="max_pool")

#         self.id_256 = ConvolutionalIdentityBlock(256, kernel_size=3, padding=1, shortcut=True)
        
#         self.res_512 = ConvolutionalBlockRes(in_channels=256, out_channels=512, kernel_size=3, padding=1, shortcut=True, pool_type="max_pool")
        
#         self.id_512 = ConvolutionalIdentityBlock(512, kernel_size=3, padding=1, shortcut=True)
        
        self.drop1 = nn.Dropout(0.3)
        self.drop2 = nn.Dropout(0.3)
        
        self.linear_1 = nn.Linear(3*64, 2048)
        self.linear_2 = nn.Linear(2048, 2048)
        self.linear_3 = nn.Linear(2048, n_classes)
        
    def forward(self, inp):
        
        # [batch_size, sent_length]
        embedded = self.embedding(inp)
#         print(embedded.shape)
        
        # [batch_size, sent_lenght, emb_dim]
        embedded = embedded.permute(0, 2, 1)
#         print(embedded.shape)
        
        # [batch_size, emb_dim, sent_length]
        out = self.conv_64(embedded)
#         print(out.shape)
        
        # [batch_size, 64, sent_length]
        out = self.id_64(out)
#         print(out.shape)
        
#         # [batch_size, 64, sent_length]
#         out = self.res_128(out)
#         print(out.shape)
        
#         # [batch_size, 128, sent_length/2]
#         out = self.id_128(out)
#         print(out.shape)
        
#         # [batch_size, 128, sent_length/2]
#         out = self.res_256(out)
#         print(out.shape)
        
#         # [batch_size, 256, sent_length/4]
#         out = self.id_256(out)
#         print(out.shape)
        
#         # [batch_size, 256, sent_length/4]
#         out = self.res_512(out)
#         print(out.shape)
        
#         # [batch_size, 512, sent_length/8]
#         out = self.id_512(out)
#         print(out.shape)
        
#         # [batch_size, 512, sent_length/8]
        out = downsample_k_max_pool(out, k=3, dim=2)[0]
#         return k_max_pooled
#         print(out.shape)
        
        out = out.reshape(out.shape[0], -1)
#         print(out.shape)
        # [batch_size, 512, 8]
        out = F.relu(self.linear_1(out))
        out = self.drop1(out)
        
        # [batch_size, 4096]
        out = F.relu(self.linear_2(out))
#         print(out.shape)
        out = self.drop2(out)

        # [batch_size, 512, 2048
        out = self.linear_3(out)
#         print(out.shape)
        
        # [batch_size, n_class]
        return out

In [21]:
embedding_dim = 16
vocab_size = len(text_field.vocab.stoi)
n_classes = 1

model = VDCNN(embedding_dim, vocab_size, n_classes)

In [22]:
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print("Number of trainable parameters in the model are : {}".format(params))

Number of trainable parameters in the model are : 4622849


In [23]:
criterion = nn.BCEWithLogitsLoss()

optimizer = optim.Adam(model.parameters(), lr = 0.01)

model.to(device)
criterion.to(device)

BCEWithLogitsLoss()

In [24]:
print(model)

VDCNN(
  (embedding): Embedding(68, 16)
  (conv_64): Conv1d(16, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (id_64): ConvolutionalIdentityBlock(
    (conv_1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (batch_norm_1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv_2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (batch_norm_2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (drop1): Dropout(p=0.3)
  (drop2): Dropout(p=0.3)
  (linear_1): Linear(in_features=192, out_features=2048, bias=True)
  (linear_2): Linear(in_features=2048, out_features=2048, bias=True)
  (linear_3): Linear(in_features=2048, out_features=1, bias=True)
)


In [25]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """
    preds = torch.round(torch.sigmoid(preds))
    correct = (preds == y).float()
    acc = correct.sum()/float(len(correct))
    return acc

def roc_auc_score_FIXED(y_true, y_pred):
    if len(np.unique(y_true)) == 1: # bug in roc_auc_score
        return 0.5
    return roc_auc_score(y_true, y_pred)

def get_avg_roc_value(y, output):
    out = torch.sigmoid(output)
    out = out.cpu().detach().numpy()
    y = y.cpu().detach().numpy()
    
    roc = roc_auc_score_FIXED(y, out)
    return roc

def get_avg_roc_value_2(y_fin, output_fin):
    n = len(y_fin)
    out_list = []
    y_list = []
    for i in range(n):
        out_list.extend(list(output_fin[i]))
        y_list.extend(list(y_fin[i]))
            
    roc = roc_auc_score_FIXED(y_list, out_list)

    return roc

In [26]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    epoch_roc = 0
    all_y = []
    all_out_list = []
    
    model.train()
    bar = pyprind.ProgBar(len(iterator), bar_char='█')
    for x, y in iterator:
        optimizer.zero_grad()
        outputs = model(x).squeeze(1)
        loss = criterion(outputs, y)
        acc = binary_accuracy(outputs, y)
        roc = get_avg_roc_value(y, outputs)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        epoch_roc += roc
        
        all_out_list.append(torch.sigmoid(outputs).cpu().detach().numpy())
        all_y.append(y.cpu().detach().numpy())
        
        bar.update()
    roc_main = get_avg_roc_value_2(all_y, all_out_list)
    return epoch_loss / len(iterator), epoch_acc / len(iterator), epoch_roc / len(iterator), roc_main

In [27]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    epoch_roc = 0
    
    all_y = []
    all_out_list = []
    model.eval()
    
    with torch.no_grad():
        bar = pyprind.ProgBar(len(iterator), bar_char='█')
        for x, y in iterator:
            outputs = model(x).squeeze(1)
            loss = criterion(outputs, y)
            acc = binary_accuracy(outputs, y)
            roc = get_avg_roc_value(y, outputs)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
            epoch_roc += roc
            
            all_out_list.append(torch.sigmoid(outputs).cpu().detach().numpy())
            all_y.append(y.cpu().detach().numpy())
            
            bar.update()
    roc_main = get_avg_roc_value_2(all_y, all_out_list)
    return epoch_loss / len(iterator), epoch_acc / len(iterator), epoch_roc / len(iterator), roc_main

In [28]:
# import math

# def sigmoid(x):
#     return 1 / (1 + math.exp(-x))

In [29]:
MODEL_PATH = "data/vdcnn_just_toxic_model_128.tar"
def save_checkpoint(state, is_best, filename):
    """Save checkpoint if a new best is achieved"""
    if is_best:
        print ("=> Saving a new best")
        torch.save(state, filename)  # save checkpoint
    else:
        print ("=> Validation roc did not improve")
    return

In [30]:
N_EPOCHS = 20
base_dev_roc = 0
for epoch in range(N_EPOCHS):

    train_loss, train_acc, train_roc, train_roc_main = train(model, train_dl, optimizer, criterion)
    valid_loss, valid_acc, valid_roc, valid_roc_main = evaluate(model, valid_dl, criterion)
    
    print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train ROC: {train_roc*100:.2f} | Train Acc: {train_acc*100:.2f}%')
    print(f'| Epoch: {epoch+1:02} | Val. Loss: {valid_loss:.3f} | Val. ROC: {valid_roc*100:.2f} | Val. Acc: {valid_acc*100:.2f}% |')
    print(f'| Train Main ROC: {train_roc_main*100:.2f} | Val. Main ROC: {valid_roc_main*100:.2f} ')
    is_best = False
    if base_dev_roc < valid_roc_main:
        is_best = True,
        base_dev_roc = valid_roc_main
    
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'best_loss': valid_loss,
        'best_dev_accuracy': valid_acc
    }, is_best, MODEL_PATH)

0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:10:33
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 01 | Train Loss: 0.326 | Train ROC: 64.00 | Train Acc: 90.32%
| Epoch: 01 | Val. Loss: 0.293 | Val. ROC: 67.30 | Val. Acc: 90.58% |
| Train Main ROC: 62.14 | Val. Main ROC: 73.17 
=> Saving a new best



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:07
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 02 | Train Loss: 0.289 | Train ROC: 72.66 | Train Acc: 90.34%
| Epoch: 02 | Val. Loss: 0.278 | Val. ROC: 69.46 | Val. Acc: 90.58% |
| Train Main ROC: 71.73 | Val. Main ROC: 75.21 
=> Saving a new best



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:07
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 03 | Train Loss: 0.284 | Train ROC: 74.16 | Train Acc: 90.32%
| Epoch: 03 | Val. Loss: 0.275 | Val. ROC: 70.45 | Val. Acc: 90.58% |
| Train Main ROC: 73.40 | Val. Main ROC: 76.93 
=> Saving a new best



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:07
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 04 | Train Loss: 0.281 | Train ROC: 75.61 | Train Acc: 90.28%
| Epoch: 04 | Val. Loss: 0.268 | Val. ROC: 70.12 | Val. Acc: 90.58% |
| Train Main ROC: 74.54 | Val. Main ROC: 77.25 
=> Saving a new best



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:07
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 05 | Train Loss: 0.279 | Train ROC: 75.51 | Train Acc: 90.29%
| Epoch: 05 | Val. Loss: 0.273 | Val. ROC: 67.98 | Val. Acc: 90.58% |
| Train Main ROC: 74.75 | Val. Main ROC: 75.65 
=> Validation roc did not improve



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:07
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 06 | Train Loss: 0.279 | Train ROC: 74.81 | Train Acc: 90.32%
| Epoch: 06 | Val. Loss: 0.274 | Val. ROC: 70.13 | Val. Acc: 90.58% |
| Train Main ROC: 74.42 | Val. Main ROC: 76.90 
=> Validation roc did not improve



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:07
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 07 | Train Loss: 0.279 | Train ROC: 74.99 | Train Acc: 90.31%
| Epoch: 07 | Val. Loss: 0.270 | Val. ROC: 70.89 | Val. Acc: 90.58% |
| Train Main ROC: 74.37 | Val. Main ROC: 77.33 
=> Saving a new best



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:06
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 08 | Train Loss: 0.278 | Train ROC: 75.34 | Train Acc: 90.37%
| Epoch: 08 | Val. Loss: 0.279 | Val. ROC: 70.46 | Val. Acc: 90.58% |
| Train Main ROC: 74.86 | Val. Main ROC: 76.77 
=> Validation roc did not improve



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:06
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 09 | Train Loss: 0.277 | Train ROC: 75.45 | Train Acc: 90.35%
| Epoch: 09 | Val. Loss: 0.282 | Val. ROC: 71.61 | Val. Acc: 90.58% |
| Train Main ROC: 75.02 | Val. Main ROC: 77.96 
=> Saving a new best



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:06
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 10 | Train Loss: 0.276 | Train ROC: 75.99 | Train Acc: 90.39%
| Epoch: 10 | Val. Loss: 0.267 | Val. ROC: 71.17 | Val. Acc: 90.58% |
| Train Main ROC: 75.31 | Val. Main ROC: 77.79 
=> Validation roc did not improve



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:06
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 11 | Train Loss: 0.275 | Train ROC: 75.60 | Train Acc: 90.39%
| Epoch: 11 | Val. Loss: 0.270 | Val. ROC: 71.97 | Val. Acc: 90.58% |
| Train Main ROC: 75.26 | Val. Main ROC: 78.39 
=> Saving a new best



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:07
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 12 | Train Loss: 0.274 | Train ROC: 76.32 | Train Acc: 90.45%
| Epoch: 12 | Val. Loss: 0.265 | Val. ROC: 70.67 | Val. Acc: 90.58% |
| Train Main ROC: 75.88 | Val. Main ROC: 78.11 
=> Validation roc did not improve



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:06
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 13 | Train Loss: 0.273 | Train ROC: 76.34 | Train Acc: 90.54%
| Epoch: 13 | Val. Loss: 0.262 | Val. ROC: 70.27 | Val. Acc: 90.81% |
| Train Main ROC: 75.74 | Val. Main ROC: 77.73 
=> Validation roc did not improve



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:06
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 14 | Train Loss: 0.272 | Train ROC: 76.54 | Train Acc: 90.56%
| Epoch: 14 | Val. Loss: 0.260 | Val. ROC: 72.62 | Val. Acc: 90.58% |
| Train Main ROC: 76.01 | Val. Main ROC: 79.18 
=> Saving a new best



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:07
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 15 | Train Loss: 0.270 | Train ROC: 76.67 | Train Acc: 90.66%
| Epoch: 15 | Val. Loss: 0.266 | Val. ROC: 69.82 | Val. Acc: 90.97% |
| Train Main ROC: 76.29 | Val. Main ROC: 77.60 
=> Validation roc did not improve



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:07
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 16 | Train Loss: 0.270 | Train ROC: 77.05 | Train Acc: 90.63%
| Epoch: 16 | Val. Loss: 0.265 | Val. ROC: 69.55 | Val. Acc: 90.58% |
| Train Main ROC: 76.43 | Val. Main ROC: 77.22 
=> Validation roc did not improve



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:07
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 17 | Train Loss: 0.269 | Train ROC: 77.65 | Train Acc: 90.70%
| Epoch: 17 | Val. Loss: 0.266 | Val. ROC: 71.52 | Val. Acc: 90.58% |
| Train Main ROC: 76.98 | Val. Main ROC: 78.36 
=> Validation roc did not improve



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:07
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 18 | Train Loss: 0.270 | Train ROC: 76.67 | Train Acc: 90.72%
| Epoch: 18 | Val. Loss: 0.265 | Val. ROC: 70.01 | Val. Acc: 90.96% |
| Train Main ROC: 76.16 | Val. Main ROC: 77.27 
=> Validation roc did not improve



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:07
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 19 | Train Loss: 0.268 | Train ROC: 77.58 | Train Acc: 90.80%
| Epoch: 19 | Val. Loss: 0.272 | Val. ROC: 70.29 | Val. Acc: 90.58% |
| Train Main ROC: 76.75 | Val. Main ROC: 77.81 
=> Validation roc did not improve



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:07
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 20 | Train Loss: 0.267 | Train ROC: 78.22 | Train Acc: 90.76%
| Epoch: 20 | Val. Loss: 0.269 | Val. ROC: 72.52 | Val. Acc: 90.58% |
| Train Main ROC: 77.51 | Val. Main ROC: 79.32 
=> Saving a new best



Total time elapsed: 00:00:43


In [31]:
N_EPOCHS = 10
base_dev_roc = 0.7932
for epoch in range(N_EPOCHS):

    train_loss, train_acc, train_roc, train_roc_main = train(model, train_dl, optimizer, criterion)
    valid_loss, valid_acc, valid_roc, valid_roc_main = evaluate(model, valid_dl, criterion)
    
    print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train ROC: {train_roc*100:.2f} | Train Acc: {train_acc*100:.2f}%')
    print(f'| Epoch: {epoch+1:02} | Val. Loss: {valid_loss:.3f} | Val. ROC: {valid_roc*100:.2f} | Val. Acc: {valid_acc*100:.2f}% |')
    print(f'| Train Main ROC: {train_roc_main*100:.2f} | Val. Main ROC: {valid_roc_main*100:.2f} ')
    is_best = False
    if base_dev_roc < valid_roc_main:
        is_best = True,
        base_dev_roc = valid_roc_main
    
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'best_loss': valid_loss,
        'best_dev_accuracy': valid_acc
    }, is_best, MODEL_PATH)

0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:17
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 01 | Train Loss: 0.266 | Train ROC: 77.98 | Train Acc: 90.81%
| Epoch: 01 | Val. Loss: 0.262 | Val. ROC: 72.08 | Val. Acc: 90.58% |
| Train Main ROC: 77.41 | Val. Main ROC: 79.04 
=> Validation roc did not improve



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:06
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 02 | Train Loss: 0.265 | Train ROC: 78.25 | Train Acc: 90.85%
| Epoch: 02 | Val. Loss: 0.266 | Val. ROC: 72.65 | Val. Acc: 90.58% |
| Train Main ROC: 77.86 | Val. Main ROC: 79.32 
=> Validation roc did not improve



Total time elapsed: 00:00:43
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:11:07
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 03 | Train Loss: 0.265 | Train ROC: 78.10 | Train Acc: 90.92%
| Epoch: 03 | Val. Loss: 0.257 | Val. ROC: 70.41 | Val. Acc: 91.11% |
| Train Main ROC: 77.41 | Val. Main ROC: 78.14 
=> Validation roc did not improve



Total time elapsed: 00:00:45
0% [███████                       ] 100% | ETA: 00:08:31

KeyboardInterrupt: 

In [None]:
test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'| Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}% |')

In [78]:
def predict_sentiment(sentence):
    tokenized = tokenize(sentence)
    indexed = [TEXT.vocab.stoi[t] for t in tokenized]
    tensor = torch.LongTensor(indexed).to(device)
    
    tensor = tensor.unsqueeze(1)
#     print(tensor.shape)
    prediction = model(tensor).squeeze(1)
#     print(prediction)
    preds, ind= torch.round(torch.sigmoid(tensor))
#     print(preds)
    return preds, ind

In [79]:
# text = "My voice range is A2-C5. My chest voice goes up to F4. Included sample in my higher chest range. What is my voice type?"
# predict_sentiment(text)