In [1]:
# utils 
import torch

# data 
from torchtext.datasets import imdb
from torchtext.data import Field, BucketIterator

# model 
import torch.nn as nn
import torch.nn.functional as F

# training 
import torch.optim as optim
import tqdm

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
device

device(type='cuda', index=0)

### Data Preparation

In [4]:
text = Field(
    lower=True,
    tokenize="spacy",
    batch_first=True,
)
label = Field(
    is_target=True,
    batch_first=True
)

In [5]:
# download the dataset
train, val = imdb.IMDB.splits(text_field=text, label_field=label)

downloading aclImdb_v1.tar.gz


aclImdb_v1.tar.gz: 100%|██████████| 84.1M/84.1M [00:08<00:00, 9.68MB/s]


In [6]:
# build vocabulary
text.build_vocab(train, min_freq=2)
label.build_vocab(train)

In [7]:
# create data loaders
BATCH_SIZE = 64

train_loader, val_loader = BucketIterator.splits(
    datasets=(train, val),
    batch_sizes=(BATCH_SIZE, BATCH_SIZE),
    device=device
)

In [None]:
x, y = next(iter(train_loader))

In [None]:
print(x.shape, y.shape)

torch.Size([64, 989]) torch.Size([64, 1])


In [None]:
for batch in train_loader:
    print(batch.text.shape)
    print(batch.label.shape)
    break

torch.Size([64, 1607])
torch.Size([64, 1])


In [None]:
len(train_loader)

391

### Model

In [8]:
class GRU(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, hidden_size, n_classes = 4, dropout = 0.15, num_layers = 4):
        
        super(GRU, self).__init__()
        
        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim)
        self.gru = nn.GRU(
            input_size = embedding_dim, 
            hidden_size = hidden_size, 
            num_layers = num_layers,
            dropout = dropout,
            bidirectional = True,
            batch_first =  True
        )
        
        self.fc = nn.Linear(in_features=hidden_size, out_features=n_classes)
        
    
    
    def forward(self, x, hidden=None):
        # a better initialization of initial hidden state could be very useful
        embedded = self.embedding(x)
        
        _, hidden = self.gru(embedded)
        # since gru is multilayer bidirectional , hidden shape will [2*num_layers, batch, hidden_size]
        
        hidden_mean = hidden.mean(dim=0)
        
        out = F.softmax(self.fc(hidden_mean), dim=1)
        
        return out
        

### Training 

In [11]:
# !pip install pytorch-lightning

In [12]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint

In [18]:
def accuracy(y, y_):
    
    correct = 0
    for i in range(y.shape[0]):
        if y[i]==y_[i]:
            correct += 1
    return torch.tensor([correct/y.shape[0]])

In [19]:
class Model(pl.LightningModule):
    
    
    def __init__(self, vocab_size, embedding_dim, hidden_size, n_classes = 4, dropout = 0.15, num_layers = 4):

        super(Model, self).__init__()
        # initialize model
        self.model = GRU(
            vocab_size=vocab_size, 
            embedding_dim=embedding_dim, 
            hidden_size=hidden_size,
            n_classes=n_classes,
            dropout=dropout,
            num_layers=num_layers
        )
    
    
    def forward(self, x):
        outputs = self.model(x)
        return outputs
        
    
    def configure_optimizers(self):
        return optim.Adam(params=self.parameters(), lr=1e-3)
    
    def train_dataloader(self):
        return train_loader
    
    def training_step(self, batch, batch_idx):
        x, y = batch.text, batch.label
        outputs = self(x)
        loss =  F.cross_entropy(outputs, y.view(-1))
        acc = accuracy(y, torch.argmax(outputs, dim=1))
        tensorboard_logs = {'train_loss': loss, 'train_acc': acc}
        return {'loss': loss, 'acc':acc, 'log': tensorboard_logs}
    
    def val_dataloader(self):
        return val_loader
    
    def validation_step(self, batch, batch_idx):
        x =  batch.text
        y = batch.label
        outputs = self(x)    
        loss =  F.cross_entropy(outputs, y.view(-1))
        acc = accuracy(y, torch.argmax(outputs, dim=1))
        return {"val_loss":loss, "val_acc":acc}
    
    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        avg_acc = [torch.stack([x['val_acc'] for x in outputs]).mean()]
        tensorboard_logs = {'val_loss': avg_loss}
        return {'val_loss': avg_loss, 'log': tensorboard_logs}  

In [20]:
model = Model(vocab_size=len(text.vocab),
    embedding_dim=100,
    hidden_size=64)

In [28]:
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.05)
checkpoint = ModelCheckpoint(filepath="./model.pth")


In [29]:
trainer = pl.Trainer(
    max_epochs=5, 
    gpus=[0], 
    show_progress_bar=True, 
    checkpoint_callback=checkpoint, 
    early_stop_callback=early_stopping
  )

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]


In [31]:
trainer.fit(model)


  | Name  | Type | Params
-------------------------------
0 | model | GRU  | 5 M   


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…




1