In [1]:
import torch
from torchtext import data
from torchtext import datasets
import spacy

In [2]:
SEED = 42
torch.manual_seed(SEED)

text_field = data.Field(tokenize='spacy', include_lengths=True)
label_field = data.LabelField(dtype=torch.float)

In [3]:
train_data, test_data = datasets.IMDB.splits(text_field, label_field)

In [4]:
import random

train_data, valid_data = train_data.split(random_state = random.seed(SEED))

In [10]:
MAX_VOCAB_SIZE = 5_000
EMBEDDINGS_FILE = 'glove.6B.50d'

text_field.build_vocab(train_data, 
                       max_size = MAX_VOCAB_SIZE, 
                       vectors = EMBEDDINGS_FILE, 
                       unk_init = torch.Tensor.normal_)

label_field.build_vocab(train_data)

In [11]:
from argparse import Namespace

args = Namespace(
    # Data and Path hyper parameters
    embeddings_file='imdb_embeddings.pkl',
    model_state_file='imdb_model.torch',
    log_file='imdb.log',
    train_state_file='train_state.json',
    save_dir='.save/imdb/',
    PAD_IDX = text_field.vocab.stoi[text_field.pad_token],
    UNK_IDX = text_field.vocab.stoi[text_field.unk_token],
    # Model hyper parameters
    input_dim = len(text_field.vocab),
    embedding_dim=50,
    hidden_dim=100,
    output_dim = 1,
    num_layers=1,
    # Training hyper parameter
    seed=42,
    learning_rate=0.001,
    dropout_p=0.,
    batch_size=64,
    num_epochs=5,
    early_stopping_criteria=5,
    # Runtime option
    reload_from_files=True,
    expand_filepaths_to_save_dir=True,
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size=args.batch_size,
    sort_within_batch=True,
    device=args.device)


pretrained_embeddings = text_field.vocab.vectors
pretrained_embeddings[args.UNK_IDX] = torch.zeros(args.embedding_dim)
pretrained_embeddings[args.PAD_IDX] = torch.zeros(args.embedding_dim)

In [12]:
from models import BiLSTM

model = BiLSTM(
    args.embedding_dim, 
    args.hidden_dim, 
    args.output_dim, 
    args.num_layers,
    pretrained_embeddings,
    args.dropout_p, 
    args.PAD_IDX
)

In [13]:
import torch.nn as nn
import torch.optim as optim

loss_func = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)

In [14]:
import torch
from data_utils import (json_dump, generate_batches)


def make_train_state(args):
    return {'stop_early': False,
            'early_stopping_step': 0,
            'early_stopping_best_val': 1e8,
            'learning_rate': args.learning_rate,
            'epoch_index': 0,
            'train_loss': [],
            'train_acc': [],
            'val_loss': [],
            'val_acc': [],
            'test_loss': -1,
            'test_acc': -1,
            'model_filename': args.model_state_file}


def update_train_state(args, model, train_state):
    """Handle the training state updates.

    Components:
     - Early Stopping: Prevent overfitting.
     - Model Checkpoint: Model is saved if the model is better

    :param args: main arguments
    :param model: model to train
    :param train_state: a dictionary representing the training state values
    :returns:
        a new train_state
    """

    # Save one model at least
    if train_state['epoch_index'] == 0:
        torch.save(model.state_dict(), train_state['model_filename'])
        train_state['stop_early'] = False

    # Save model if performance improved
    elif train_state['epoch_index'] >= 1:
        loss_tm1, loss_t = train_state['val_loss'][-2:]

        # If loss worsened
        if loss_t >= train_state['early_stopping_best_val']:
            # Update step
            train_state['early_stopping_step'] += 1
        # Loss decreased
        else:
            # Save the best model
            if loss_t < train_state['early_stopping_best_val']:
                torch.save(model.state_dict(), train_state['model_filename'])

            # Reset early stopping step
            train_state['early_stopping_step'] = 0

        # Stop early ?
        train_state['stop_early'] = \
            train_state['early_stopping_step'] >= args.early_stopping_criteria

    return train_state


def dump_train_state_to_json(train_state, path):
    obj = dict(epochs=train_state['epoch_index']+1,
               train_loss=train_state['train_loss'],
               train_acc=train_state['train_acc'],
               val_loss=train_state['val_loss'],
               val_acc=train_state['val_acc'],
               test_loss=train_state['test_loss'],
               test_acc=train_state['test_acc'])
    json_dump(obj, path)


def compute_accuracy_binary(y_pred, y_target):
    y_target = y_target.cpu()
    y_pred_indices = (torch.sigmoid(y_pred) > 0.5).cpu().long()
    n_correct = torch.eq(y_pred_indices, y_target).sum().item()
    return n_correct / len(y_pred_indices) * 100


def compute_accuracy(y_pred, y_target):
    _, y_pred_indices = y_pred.max(dim=1)
    n_correct = torch.eq(y_pred_indices, y_target).sum().item()
    return n_correct / len(y_pred_indices) * 100





def run(args, model, loss_func, optimizer, iterator):

    train_state = make_train_state(args)


    for epoch_index in range(args.num_epochs):
        train_state['epoch_index'] = epoch_index
        
        print('Training started...')

        running_loss = 0.0
        running_acc = 0.0
        model.train()

        for batch_index, batch in enumerate(iterator['train']):
            # 5 step training routine

            # --------------------------------------
            # 1) zero the gradients
            print('Zero the gradients...')
            optimizer.zero_grad()

            # 2) compute the output
            print('Compute the output...')
            x_in, lengths = batch.text
            y_pred = model(x_in, lengths).squeeze()

            # 3) compute the loss
            print('Compute the loss...')
            loss = loss_func(y_pred, batch.label)
            loss_t = loss.item()
            running_loss += (loss_t - running_loss) / (batch_index + 1)

            # 4) use loss to produce gradients
            print('Backward...')
            loss.backward()

            # 5) use optimizer to take gradient step
            print('Gradient step...')
            optimizer.step()
            # -----------------------------------------

            # compute the accuracy
            print('Computing accuracy...')
            acc_t = compute_accuracy_binary(y_pred, batch.label)
            running_acc += (acc_t - running_acc) / (batch_index + 1)
            print(f'Epoch {epoch_index+1}/{args.num_epochs} | '
                        f'batch index: {batch_index+1} | '
                        f'train_loss = {running_loss}; train_acc = {running_acc}\n')

        train_state['train_loss'].append(running_loss)
        train_state['train_acc'].append(running_acc)
        print(f'Epoch {epoch_index+1}/{args.num_epochs} | '
                    f'train_loss = {running_loss}; train_acc = {running_acc}\n')

        # Iterate over val dataset

        # setup: batch generator, set loss and acc to 0; set eval mode on
        running_loss=0.
        running_acc=0.
        model.eval()

        for batch_index, batch_dict in enumerate(iterator['valid']):

            # compute the output
            x_in, lengths = batch.text
            y_pred = model(x_in, lengths).squeeze()

            # compute the loss
            loss = loss_func(y_pred, batch.label)
            loss_t = loss.item()
            running_loss += (loss_t - running_loss) / (batch_index + 1)

            # compute the accuracy
            acc_t = compute_accuracy_binary(y_pred, batch.label)
            running_acc += (acc_t - running_acc) / (batch_index + 1)

        train_state['val_loss'].append(running_loss)
        train_state['val_acc'].append(running_acc)
        print(f'Epoch {epoch_index+1}/{args.num_epochs} | '\
                    f'val_loss = {running_loss}; val_acc = {running_acc}\n')

        train_state = update_train_state(args=args, model=model,
                                         train_state=train_state)

        if train_state['stop_early']:
            break

In [None]:
iterator = dict(train=train_iterator, test=test_iterator, valid=valid_iterator)
run(args, model, loss_func,
               optimizer, iterator)

Training started...
Zero the gradients...
Compute the output...
Compute the loss...
Backward...
