In [1]:
# utils 
import torch

# data 
from torchtext import datasets
from torchtext.data import Field, LabelField, BucketIterator

# model
import torch.nn as nn
import torch.nn.functional as F

# training
import torch.optim as optim
import tqdm

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
device

device(type='cuda', index=0)

### Data Preparation

In [4]:
# creating text and label fields
TEXT = Field(lower=True, tokenize="spacy", batch_first=True)
LABEL = LabelField(batch_first=True, dtype=torch.float)

In [5]:
# download the dataset
train, val = datasets.IMDB.splits(text_field=TEXT, label_field=LABEL)

In [6]:
# build the vocabulary
TEXT.build_vocab(train)
LABEL.build_vocab(train)

In [7]:
# data loaders
BATCH_SIZE = 128
train_loader, val_loader = BucketIterator.splits(
    datasets=(train, val),
    batch_sizes=(BATCH_SIZE, BATCH_SIZE),
    device=device
    
)

In [8]:
for batch in train_loader:
  print(type(batch))
  break

<class 'torchtext.data.batch.Batch'>


### Model Building

In [12]:
class Model(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super(Model, self).__init__()
                
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
                
        #text = [batch size, sent len]
        
        embedded = self.embedding(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
                
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat(pooled, dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]
            
        return self.fc(cat)

In [62]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
N_FILTERS = 100
FILTER_SIZES = [3,4,5]
OUTPUT_DIM = 1
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

In [63]:
model = Model(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX).to(device)

In [64]:
out = model(batch.text)

In [65]:
print(out.shape)

torch.Size([128, 1])


### Training

In [66]:
def accuracy(y, y_):
  correct = 0
  for i in range(y.shape[0]):
    if y[i]==y_[i]:
      correct += 1
  return correct/y.shape[0]

In [67]:
def eval(model, data, criterion):
  losses = []
  acc = []
  
  with torch.no_grad():
    for batch in data:
      text, label = batch.text, batch.label
      outputs = model(text)
      l = criterion(outputs.view(-1), label.view(-1))
      a = accuracy(label, outputs.view(-1) > 0.5)
      losses.append(l.item())
      acc.append(a)
    
  return sum(losses)/len(losses), sum(acc)/len(acc)


In [68]:
lr = 1e-3
EPOCHS = 10

In [69]:
# optimizer and criterion
criterion = nn.MSELoss().to(device)
optimizer = optim.Adam(params=model.parameters(), lr=lr)

In [70]:
epoch_progress = tqdm.tqdm(total=EPOCHS, desc="Epoch", position=0)
total_steps = len(train_loader)
steps = 0
for epoch in range(EPOCHS):

  epoch_loss = []
  epoch_acc = []
  step_progress = tqdm.tqdm(total=len(train_loader), desc="Epoch", position=0)
  for batch in train_loader:
    text, label = batch.text, batch.label
    outputs = model(text)

    optimizer.zero_grad()
    loss = criterion(outputs.view(-1), label.view(-1))
    loss.backward()
    optimizer.step()

    acc = accuracy(label, outputs.view(-1) > 0.5)

    if steps%100==0:
      print(f'Step {steps}/{len(train_loader)*EPOCHS} | Train_loss {loss.item():.4f} | Train_acc {acc:.4f}')


    epoch_loss.append(loss.item())
    epoch_acc.append(acc)
    steps +=1 
    step_progress.update(1)



  avg_loss = sum(epoch_loss)/len(epoch_loss)
  avg_acc = sum(epoch_acc)/len(epoch_acc)

  v_loss, v_acc = eval(model, val_loader, criterion)

  print(f'Epoch {epoch}/{EPOCHS} | Train_loss {avg_loss:.4f} | Train_acc {avg_acc:.4f} | Val_loss {v_loss:.4f} | Val_acc {v_acc:.4f}')
  epoch_progress.update(1)


Epoch:   1%|          | 2/196 [00:00<00:34,  5.66it/s]

Step 0/1960 | Train_loss 1.1310 | Train_acc 0.5078


Epoch:  52%|█████▏    | 102/196 [00:11<00:09,  9.67it/s]

Step 100/1960 | Train_loss 0.3305 | Train_acc 0.5703


Epoch:   1%|          | 1/196 [00:00<00:26,  7.41it/s]

Epoch 0/10 | Train_loss 0.4283 | Train_acc 0.5633 | Val_loss 0.2315 | Val_acc 0.6263


Epoch:   3%|▎         | 6/196 [00:00<00:21,  8.80it/s]

Step 200/1960 | Train_loss 0.2332 | Train_acc 0.6719


Epoch:  55%|█████▍    | 107/196 [00:11<00:09,  9.40it/s]

Step 300/1960 | Train_loss 0.1934 | Train_acc 0.7266


Epoch:   1%|          | 1/196 [00:00<00:36,  5.32it/s]

Epoch 1/10 | Train_loss 0.2066 | Train_acc 0.6800 | Val_loss 0.1987 | Val_acc 0.6946


Epoch:   5%|▌         | 10/196 [00:01<00:21,  8.77it/s]

Step 400/1960 | Train_loss 0.1833 | Train_acc 0.7578


Epoch:  57%|█████▋    | 111/196 [00:12<00:08,  9.90it/s]

Step 500/1960 | Train_loss 0.1541 | Train_acc 0.7969


Epoch:   1%|          | 1/196 [00:00<00:28,  6.73it/s]

Epoch 2/10 | Train_loss 0.1836 | Train_acc 0.7274 | Val_loss 0.1820 | Val_acc 0.7293


Epoch:   8%|▊         | 15/196 [00:01<00:18,  9.58it/s]

Step 600/1960 | Train_loss 0.1386 | Train_acc 0.7734


Epoch:  58%|█████▊    | 114/196 [00:12<00:08,  9.79it/s]

Step 700/1960 | Train_loss 0.1790 | Train_acc 0.7031


Epoch:   1%|          | 1/196 [00:00<00:28,  6.77it/s]

Epoch 3/10 | Train_loss 0.1682 | Train_acc 0.7606 | Val_loss 0.1821 | Val_acc 0.7325


Epoch:   9%|▉         | 18/196 [00:01<00:19,  9.33it/s]

Step 800/1960 | Train_loss 0.1728 | Train_acc 0.7500


Epoch:  61%|██████    | 119/196 [00:12<00:07,  9.89it/s]

Step 900/1960 | Train_loss 0.1778 | Train_acc 0.7812


Epoch:   1%|          | 1/196 [00:00<00:24,  7.95it/s]

Epoch 4/10 | Train_loss 0.1566 | Train_acc 0.7843 | Val_loss 0.1624 | Val_acc 0.7697


Epoch:  11%|█         | 22/196 [00:02<00:19,  9.07it/s]

Step 1000/1960 | Train_loss 0.1552 | Train_acc 0.7578


Epoch:  63%|██████▎   | 123/196 [00:13<00:07,  9.75it/s]

Step 1100/1960 | Train_loss 0.1625 | Train_acc 0.7812


Epoch:   1%|          | 1/196 [00:00<00:30,  6.41it/s]

Epoch 5/10 | Train_loss 0.1422 | Train_acc 0.8124 | Val_loss 0.1539 | Val_acc 0.7879


Epoch:  13%|█▎        | 26/196 [00:03<00:19,  8.87it/s]

Step 1200/1960 | Train_loss 0.1225 | Train_acc 0.8750


Epoch:  64%|██████▍   | 126/196 [00:14<00:07,  9.31it/s]

Step 1300/1960 | Train_loss 0.1173 | Train_acc 0.8828


Epoch:   1%|          | 1/196 [00:00<00:25,  7.63it/s]

Epoch 6/10 | Train_loss 0.1316 | Train_acc 0.8317 | Val_loss 0.1519 | Val_acc 0.7910


Epoch:  15%|█▌        | 30/196 [00:03<00:21,  7.75it/s]

Step 1400/1960 | Train_loss 0.1233 | Train_acc 0.8516


Epoch:  66%|██████▋   | 130/196 [00:14<00:06,  9.44it/s]

Step 1500/1960 | Train_loss 0.1123 | Train_acc 0.8828


Epoch:   1%|          | 1/196 [00:00<00:28,  6.77it/s]

Epoch 7/10 | Train_loss 0.1222 | Train_acc 0.8502 | Val_loss 0.1443 | Val_acc 0.8063


Epoch:  17%|█▋        | 34/196 [00:03<00:17,  9.38it/s]

Step 1600/1960 | Train_loss 0.1109 | Train_acc 0.8672


Epoch:  68%|██████▊   | 134/196 [00:14<00:06,  9.09it/s]

Step 1700/1960 | Train_loss 0.1125 | Train_acc 0.8594


Epoch:   1%|          | 1/196 [00:00<00:31,  6.29it/s]

Epoch 8/10 | Train_loss 0.1101 | Train_acc 0.8711 | Val_loss 0.1389 | Val_acc 0.8188


Epoch:  19%|█▉        | 38/196 [00:04<00:17,  9.01it/s]

Step 1800/1960 | Train_loss 0.1200 | Train_acc 0.8750


Epoch:  70%|███████   | 138/196 [00:15<00:06,  9.25it/s]

Step 1900/1960 | Train_loss 0.0963 | Train_acc 0.8906


Epoch: 100%|██████████| 10/10 [09:03<00:00, 54.37s/it]

Epoch 9/10 | Train_loss 0.0994 | Train_acc 0.8972 | Val_loss 0.1369 | Val_acc 0.8222


In [71]:
eval(model, val_loader, criterion)

(0.13683705818744338, 0.823126594387755)