# VDCNN Implementation In Torch  

In [1]:
# importing required libraries
import torch 
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchtext import datasets
from torchtext import data
import nltk
import pyprind
import math

In [2]:
# s = r'''abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:’"/|$%ˆ&*˜‘+=<>()[]{}'''
# nltk.regexp_tokenize("I love,  this ship", s)

In [3]:
def tokenize(text):
    s = 'abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:’"/|$%ˆ&*˜‘+=<>()[]{} '
    return [l for l in list(text.lower()) if l in s]

In [4]:
TEXT = data.Field(tokenize=tokenize, fix_length=1014)
LABEL = data.LabelField(dtype=torch.float)

In [5]:
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)

In [6]:
print(f'Number of training examples: {len(train_data)}')
print(f'Number of testing examples: {len(test_data)}')

Number of training examples: 25000
Number of testing examples: 25000


In [7]:
TEXT.build_vocab(train_data)
LABEL.build_vocab(train_data)

In [8]:
# TEXT.vocab.stoi
LABEL.vocab.freqs

Counter({'pos': 12500, 'neg': 12500})

In [9]:
BATCH_SIZE = 64

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

train_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, test_data), 
    batch_size=BATCH_SIZE,
    device=device
)

cuda


In [10]:
for batch in train_iterator:
    te, y = batch.text, batch.label
    break

    Found GPU0 GeForce GT 755M which is of cuda capability 3.0.
    PyTorch no longer supports this GPU because it is too old.
    


In [11]:
te.shape[0]

1014

In [None]:
class ConvolutionalBlockRes(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding=1, shortcut=False, pool_type="max_pool"):
        super().__init__()
        self.shortcut = shortcut
        self.pool_type = pool_type
        self.conv_1 = nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, padding=1)
        self.batch_norm_1 = nn.BatchNorm1d(out_channels)
        self.conv_2 = nn.Conv1d(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size, padding=1)
        self.batch_norm_2 = nn.BatchNorm1d(out_channels)

        if shortcut is True:
            self.conv_res = nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=2)
            self.batch_norm_res = nn.BatchNorm1d(out_channels)
    
    def forward(self, x):
        out = self.conv_1(x)
        out = F.relu(self.batch_norm_1(out))

        out = self.conv_2(out)
        out = F.relu(self.batch_norm_2(out))
        
        # downsampled
        if self.pool_type == "k_max":
            k_ = math.ceil(out.shape[2]/2.0)
            out = downsample_k_max_pool(out, k=k_, dim=2)[0]
        else:
            out = downsample_max_pool(out, 3, 2)

        if self.shortcut is True:
            residual = self.conv_res(x)
            residual = F.relu(self.batch_norm_res(residual))
            out = out + residual
        return out

class ConvolutionalIdentityBlock(nn.Module):
    def __init__(self, in_channels, kernel_size, padding=1, shortcut=False):
        super().__init__()
        
        self.shortcut = shortcut
        self.conv_1 = nn.Conv1d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, padding=1)
        self.batch_norm_1 = nn.BatchNorm1d(in_channels)
        self.conv_2 = nn.Conv1d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, padding=1)
        self.batch_norm_2 = nn.BatchNorm1d(in_channels)
    
    def forward(self, x):
        out = self.conv_1(x)
        out = F.relu(self.batch_norm_1(out))

        out = self.conv_2(out)
        out = F.relu(self.batch_norm_2(out))

        if self.shortcut is True:
            out = out + x
        else:
            out = out

        return out

def downsample_max_pool(x, kernel_size, stride):
    pool = nn.MaxPool1d(kernel_size=kernel_size, stride=stride, padding=1)
    return pool(x)


def downsample_k_max_pool(inp, k, dim):
    return inp.topk(k, dim)

In [None]:
class VDCNN(nn.Module):
    def __init__(self, embedding_dim, vocab_size, n_classes):
        super().__init__()
        self.embedding = nn.Embedding(embedding_dim=embedding_dim, num_embeddings=vocab_size)
        
        self.conv_64 = nn.Conv1d(in_channels=embedding_dim, out_channels=64, kernel_size=3, padding=1)
        
        self.id_64 = ConvolutionalIdentityBlock(64, kernel_size=3, padding=1, shortcut=True)
        
        self.res_128 = ConvolutionalBlockRes(in_channels=64, out_channels=128, kernel_size=3, padding=1, shortcut=True, pool_type="k_max")
        
        self.id_128 = ConvolutionalIdentityBlock(128, kernel_size=3, padding=1, shortcut=True)
        
        self.res_256 = ConvolutionalBlockRes(in_channels=128, out_channels=256, kernel_size=3, padding=1, shortcut=True, pool_type="k_max")

        self.id_256 = ConvolutionalIdentityBlock(256, kernel_size=3, padding=1, shortcut=True)
        
        self.res_512 = ConvolutionalBlockRes(in_channels=256, out_channels=512, kernel_size=3, padding=1, shortcut=True, pool_type="k_max")
        
        self.id_512 = ConvolutionalIdentityBlock(512, kernel_size=3, padding=1, shortcut=True)
        
        self.linear_1 = nn.Linear(8*512, 2048)
        self.linear_2 = nn.Linear(2048, 2048)
        self.linear_3 = nn.Linear(2048, n_classes)
        
    def forward(self, inp):
        
        # [sent_length, batch_size]
        inp = inp.permute(1, 0)
        
        # [batch_size, sent_length]
        embedded = self.embedding(inp)
        
        # [batch_size, sent_lenght, emb_dim]
        embedded = embedded.permute(0, 2, 1)
        
        # [batch_size, emb_dim, sent_length]
        out = self.conv_64(embedded)
#         print(out.shape)
        
        # [batch_size, 64, sent_length]
        out = self.id_64(out)
#         print(out.shape)
        
#         # [batch_size, 64, sent_length]
        out = self.res_128(out)
#         print(out.shape)
        
#         # [batch_size, 128, sent_length/2]
        out = self.id_128(out)
#         print(out.shape)
        
#         # [batch_size, 128, sent_length/2]
        out = self.res_256(out)
#         print(out.shape)
        
#         # [batch_size, 256, sent_length/4]
        out = self.id_256(out)
#         print(out.shape)
        
#         # [batch_size, 256, sent_length/4]
        out = self.res_512(out)
#         print(out.shape)
        
#         # [batch_size, 512, sent_length/8]
        out = self.id_512(out)
#         print(out.shape)
        
#         # [batch_size, 512, sent_length/8]
        out = downsample_k_max_pool(out, k=8, dim=2)[0]
#         return k_max_pooled
#         print(out.shape)
        
        out = out.reshape(out.shape[0], -1)
#         print(out.shape)
        # [batch_size, 512, 8]
        out = self.linear_1(out)
        
        # [batch_size, 4096]
        out = self.linear_2(out)
#         print(out.shape)

        # [batch_size, 512, 2048
        out = self.linear_3(out)
#         print(out.shape)
        
        # [batch_size, n_class]
        
        return out

In [20]:
a = model(te)
# a.shape

# a = a.squeeze(1)
# a

# a.shape

# y

# preds = torch.round(torch.sigmoid(a))
# preds
# #     correct = (preds == y).float()
# #     acc = correct.sum()/float(len(correct))

# correct = (preds == y).float()
# correct.sum()/len(correct)

# binary_accuracy(a, y)

NameError: name 'model' is not defined

In [None]:
embedding_dim = 16
vocab_size = len(TEXT.vocab.stoi)
n_classes = 1

model = VDCNN(embedding_dim, vocab_size, n_classes)

In [None]:
criterion = nn.BCEWithLogitsLoss()

optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum=0.9)

model.to(device)
criterion.to(device)

BCEWithLogitsLoss()

In [None]:
print(model)

VDCNN(
  (embedding): Embedding(65, 16)
  (conv_64): Conv1d(16, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (id_64): ConvolutionalIdentityBlock(
    (conv_1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (batch_norm_1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv_2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (batch_norm_2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (res_128): ConvolutionalBlockRes(
    (conv_1): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
    (batch_norm_1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv_2): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
    (batch_norm_2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv_res): Conv1d(64, 128, kernel_size=(1,), stride=(2,))
    (batch_norm_res): BatchNorm1d(128, eps=1

In [None]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    preds = torch.round(torch.sigmoid(preds))
    correct = (preds == y).float()
    acc = correct.sum()/float(len(correct))
    return acc

In [None]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    bar = pyprind.ProgBar(len(iterator), bar_char='█')
#     bar = pyprind.ProgBar(100, bar_char='█')

    for i, batch in enumerate(iterator):
        
        optimizer.zero_grad()
                
        predictions = model(batch.text).squeeze(1)
#         print(predictions.shape, batch.Label.shape, model(batch.Text).shape)
        loss = criterion(predictions, batch.label)
#         print(loss.shape)
        acc = binary_accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        bar.update()
        
#         if i > 100:
#             break
#         break
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
        bar = pyprind.ProgBar(len(iterator), bar_char='█')
#         bar = pyprind.ProgBar(100, bar_char='█')
        for i, batch in enumerate(iterator):

            predictions = model(batch.text).squeeze(1)
            
            loss = criterion(predictions, batch.label)
            
            acc = binary_accuracy(predictions, batch.label)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
            bar.update()
#             if i > 100:
#                 break
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
# te.shape

# out = model(te).squeeze(1)
# out.shape

# loss = criterion(out, y)
# loss.shape

# loss

# sig = torch.sigmoid(out)
# sig

# preds = torch.round(sig)
# preds.shape

# preds

# correct = (preds == y).float()
# correct

In [None]:
N_EPOCHS = 50

for epoch in range(N_EPOCHS):

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, test_iterator, criterion)
    
    print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}% | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}% |')

0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:33
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 01 | Train Loss: 0.745 | Train Acc: 50.25% | Val. Loss: 0.692 | Val. Acc: 51.19% |



Total time elapsed: 00:06:31
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:27
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 02 | Train Loss: 0.592 | Train Acc: 65.65% | Val. Loss: 0.679 | Val. Acc: 69.44% |



Total time elapsed: 00:06:33
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:28
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 03 | Train Loss: 0.426 | Train Acc: 80.20% | Val. Loss: 0.777 | Val. Acc: 66.37% |



Total time elapsed: 00:06:34
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:28
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 04 | Train Loss: 0.384 | Train Acc: 82.83% | Val. Loss: 0.480 | Val. Acc: 77.60% |



Total time elapsed: 00:06:35
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:29
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 05 | Train Loss: 0.351 | Train Acc: 84.64% | Val. Loss: 0.400 | Val. Acc: 81.86% |



Total time elapsed: 00:06:34
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:29
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 06 | Train Loss: 0.337 | Train Acc: 85.43% | Val. Loss: 0.371 | Val. Acc: 83.31% |



Total time elapsed: 00:06:36
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:29
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 07 | Train Loss: 0.312 | Train Acc: 86.55% | Val. Loss: 0.370 | Val. Acc: 83.40% |



Total time elapsed: 00:06:35
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:29
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 08 | Train Loss: 0.297 | Train Acc: 87.42% | Val. Loss: 0.388 | Val. Acc: 82.45% |



Total time elapsed: 00:06:34
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:30
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 09 | Train Loss: 0.282 | Train Acc: 88.23% | Val. Loss: 0.378 | Val. Acc: 83.47% |



Total time elapsed: 00:06:35
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:30
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 10 | Train Loss: 0.270 | Train Acc: 88.87% | Val. Loss: 0.541 | Val. Acc: 75.53% |



Total time elapsed: 00:06:36
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:31
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 11 | Train Loss: 0.252 | Train Acc: 89.54% | Val. Loss: 0.371 | Val. Acc: 83.08% |



Total time elapsed: 00:06:36
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:30
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 12 | Train Loss: 0.243 | Train Acc: 90.15% | Val. Loss: 0.399 | Val. Acc: 82.74% |



Total time elapsed: 00:06:36
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:31
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 13 | Train Loss: 0.229 | Train Acc: 90.63% | Val. Loss: 0.410 | Val. Acc: 83.07% |



Total time elapsed: 00:06:36
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:30
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 14 | Train Loss: 0.212 | Train Acc: 91.50% | Val. Loss: 0.432 | Val. Acc: 82.35% |



Total time elapsed: 00:06:36
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:30
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 15 | Train Loss: 0.201 | Train Acc: 91.95% | Val. Loss: 0.439 | Val. Acc: 82.90% |



Total time elapsed: 00:06:35
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:31
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 16 | Train Loss: 0.191 | Train Acc: 92.29% | Val. Loss: 0.473 | Val. Acc: 82.05% |



Total time elapsed: 00:06:36
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:31
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 17 | Train Loss: 0.179 | Train Acc: 92.91% | Val. Loss: 0.571 | Val. Acc: 79.75% |



Total time elapsed: 00:06:37
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:31
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 18 | Train Loss: 0.172 | Train Acc: 93.20% | Val. Loss: 0.434 | Val. Acc: 82.11% |



Total time elapsed: 00:06:37
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:31
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 19 | Train Loss: 0.157 | Train Acc: 93.75% | Val. Loss: 0.546 | Val. Acc: 83.01% |



Total time elapsed: 00:06:38
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:31
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 20 | Train Loss: 0.151 | Train Acc: 94.02% | Val. Loss: 0.541 | Val. Acc: 80.87% |



Total time elapsed: 00:06:35
0% [██████████████████████████████] 100% | ETA: 00:00:00
Total time elapsed: 00:13:31
0% [██████████████████████████████] 100% | ETA: 00:00:00

| Epoch: 21 | Train Loss: 0.139 | Train Acc: 94.64% | Val. Loss: 0.579 | Val. Acc: 83.00% |



Total time elapsed: 00:06:37
0% [██                            ] 100% | ETA: 00:12:41

In [None]:
test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'| Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}% |')

In [78]:
def predict_sentiment(sentence):
    tokenized = tokenize(sentence)
    indexed = [TEXT.vocab.stoi[t] for t in tokenized]
    tensor = torch.LongTensor(indexed).to(device)
    
    tensor = tensor.unsqueeze(1)
#     print(tensor.shape)
    prediction = model(tensor).squeeze(1)
#     print(prediction)
    preds, ind= torch.round(torch.sigmoid(tensor))
#     print(preds)
    return preds, ind

In [79]:
# text = "My voice range is A2-C5. My chest voice goes up to F4. Included sample in my higher chest range. What is my voice type?"
# predict_sentiment(text)