In [None]:
!nvidia-smi

Sat Sep 19 00:06:36 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.66       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   44C    P8     9W /  70W |      0MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install tensorboardX

Collecting tensorboardX
[?25l  Downloading https://files.pythonhosted.org/packages/af/0c/4f41bcd45db376e6fe5c619c01100e9b7531c55791b7244815bac6eac32c/tensorboardX-2.1-py2.py3-none-any.whl (308kB)
[K     |█                               | 10kB 25.4MB/s eta 0:00:01[K     |██▏                             | 20kB 2.8MB/s eta 0:00:01[K     |███▏                            | 30kB 3.8MB/s eta 0:00:01[K     |████▎                           | 40kB 4.1MB/s eta 0:00:01[K     |█████▎                          | 51kB 3.4MB/s eta 0:00:01[K     |██████▍                         | 61kB 3.8MB/s eta 0:00:01[K     |███████▍                        | 71kB 4.1MB/s eta 0:00:01[K     |████████▌                       | 81kB 4.4MB/s eta 0:00:01[K     |█████████▌                      | 92kB 4.8MB/s eta 0:00:01[K     |██████████▋                     | 102kB 4.5MB/s eta 0:00:01[K     |███████████▊                    | 112kB 4.5MB/s eta 0:00:01[K     |████████████▊                   | 122kB 4.

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


**Data Loader**

In [None]:
import torch
import torch.utils.data as data
import numpy as np
import json

In [None]:
# read json for dictionary mapping
# open from json file for character mapping
with open('gdrive/My Drive/MemeGenerator/dataset_final_m2/char2idx.json', 'r', encoding = 'UTF-8') as json_file:
    char2idx = json.load(json_file)

# open from json file for image mapping
with open('gdrive/My Drive/MemeGenerator/dataset_final_m2/img2idx.json', 'r', encoding = 'UTF-8') as json_file:
    img2idx = json.load(json_file)

idx2char = {value:key for key, value in char2idx.items()}
idx2img = {value:key for key, value in img2idx.items()}

In [None]:
class MemeDataset(data.Dataset):
    """Meme Dataset.

    Each item in the dataset is a tuple with the following entries (in order):
         source = np.array(X_train),
         label = np.array(y_train),
         img = img_train,
         ids = list(range(X_train.shape[0]))
    Args:
        data_path (str): Path to .npz file containing pre-processed dataset.
    """
    def __init__(self, data_path):
        super(MemeDataset, self).__init__()
        dataset = np.load(data_path, allow_pickle=True)

        self.source = torch.from_numpy(dataset['source']).long()
        self.label = torch.from_numpy(dataset['label']).long()
        self.img = torch.from_numpy(dataset['img']).long()
        self.ids = torch.from_numpy(dataset['ids']).long()

        # index
        self.valid_idxs = [idx for idx in range(len(self.ids))]

    def __getitem__(self, idx):
        idx = self.valid_idxs[idx]
        example = (self.source[idx],
                   self.label[idx],
                   self.img[idx],
                   self.ids[idx])
        return example

    def __len__(self):
        return len(self.valid_idxs)

In [None]:
# traind and dev data loader
train_dataset = MemeDataset('gdrive/My Drive/MemeGenerator/dataset_final_m2/train.npz')
train_loader = data.DataLoader(train_dataset,
                                batch_size=32,
                                shuffle=True,
                                num_workers=4,
                                )

dev_dataset = MemeDataset('gdrive/My Drive/MemeGenerator/dataset_final_m2/test.npz')
dev_loader = data.DataLoader(dev_dataset,
                                batch_size=32,
                                shuffle=False,
                                num_workers=4,
                                )

**Modelling**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data
from torch.optim.lr_scheduler import StepLR, ExponentialLR

import numpy as np
import tqdm
from collections import OrderedDict
from json import dumps
import random
import os
import logging
import queue
import shutil
import string
import json

random.seed(224)
np.random.seed(224)
torch.manual_seed(224)
torch.cuda.manual_seed_all(224)

from tensorboardX import SummaryWriter

In [None]:
class AverageMeter:
    """Keep track of average values over time.

    Adapted from:
        > https://github.com/pytorch/examples/blob/master/imagenet/main.py
    """
    def __init__(self):
        self.avg = 0
        self.sum = 0
        self.count = 0

    def reset(self):
        """Reset meter."""
        self.__init__()

    def update(self, val, num_samples=1):
        """Update meter with new value `val`, the average of `num` samples.

        Args:
            val (float): Average value to update the meter with.
            num_samples (int): Number of samples that were averaged to
                produce `val`.
        """
        self.count += num_samples
        self.sum += val * num_samples
        self.avg = self.sum / self.count


class EMA:
    """Exponential moving average of model parameters.
    Args:
        model (torch.nn.Module): Model with parameters whose EMA will be kept.
        decay (float): Decay rate for exponential moving average.
    """
    def __init__(self, model, decay):
        self.decay = decay
        self.shadow = {}
        self.original = {}

        # Register model parameters
        for name, param in model.named_parameters():
            if param.requires_grad:
                self.shadow[name] = param.data.clone()

    def __call__(self, model, num_updates):
        decay = min(self.decay, (1.0 + num_updates) / (10.0 + num_updates))
        for name, param in model.named_parameters():
            if param.requires_grad:
                assert name in self.shadow
                new_average = \
                    (1.0 - decay) * param.data + decay * self.shadow[name]
                self.shadow[name] = new_average.clone()

    def assign(self, model):
        """Assign exponential moving average of parameter values to the
        respective parameters.
        Args:
            model (torch.nn.Module): Model to assign parameter values.
        """
        for name, param in model.named_parameters():
            if param.requires_grad:
                assert name in self.shadow
                self.original[name] = param.data.clone()
                param.data = self.shadow[name]

    def resume(self, model):
        """Restore original parameters to a model. That is, put back
        the values that were in each parameter at the last call to `assign`.
        Args:
            model (torch.nn.Module): Model to assign parameter values.
        """
        for name, param in model.named_parameters():
            if param.requires_grad:
                assert name in self.shadow
                param.data = self.original[name]


class CheckpointSaver:
    """Class to save and load model checkpoints.

    Save the best checkpoints as measured by a metric value passed into the
    `save` method. Overwrite checkpoints with better checkpoints once
    `max_checkpoints` have been saved.

    Args:
        save_dir (str): Directory to save checkpoints.
        max_checkpoints (int): Maximum number of checkpoints to keep before
            overwriting old ones.
        metric_name (str): Name of metric used to determine best model.
        maximize_metric (bool): If true, best checkpoint is that which maximizes
            the metric value passed in via `save`. Otherwise, best checkpoint
            minimizes the metric.
        log (logging.Logger): Optional logger for printing information.
    """
    def __init__(self, save_dir, max_checkpoints, metric_name,
                 maximize_metric=False, log=None):
        super(CheckpointSaver, self).__init__()

        self.save_dir = save_dir
        self.max_checkpoints = max_checkpoints
        self.metric_name = metric_name
        self.maximize_metric = maximize_metric
        self.best_val = None
        self.ckpt_paths = queue.PriorityQueue()
        self.log = log
        self._print(f"Saver will {'max' if maximize_metric else 'min'}imize {metric_name}...")

    def is_best(self, metric_val):
        """Check whether `metric_val` is the best seen so far.

        Args:
            metric_val (float): Metric value to compare to prior checkpoints.
        """
        if metric_val is None:
            # No metric reported
            return False

        if self.best_val is None:
            # No checkpoint saved yet
            return True

        return ((self.maximize_metric and self.best_val < metric_val)
                or (not self.maximize_metric and self.best_val > metric_val))

    def _print(self, message):
        """Print a message if logging is enabled."""
        if self.log is not None:
            self.log.info(message)

    def save(self, step, model, metric_val, device):
        """Save model parameters to disk.

        Args:
            step (int): Total number of examples seen during training so far.
            model (torch.nn.DataParallel): Model to save.
            metric_val (float): Determines whether checkpoint is best so far.
            device (torch.device): Device where model resides.
        """
        ckpt_dict = {
            'model_name': model.__class__.__name__,
            'model_state': model.cpu().state_dict(),
            'step': step
        }
        model.to(device)

        checkpoint_path = os.path.join(self.save_dir,
                                       f'step_{step}.pth.tar')
        torch.save(ckpt_dict, checkpoint_path)
        self._print(f'Saved checkpoint: {checkpoint_path}')

        if self.is_best(metric_val):
            # Save the best model
            self.best_val = metric_val
            best_path = os.path.join(self.save_dir, 'best.pth.tar')
            shutil.copy(checkpoint_path, best_path)
            self._print(f'New best checkpoint at step {step}...')

        # Add checkpoint path to priority queue (lowest priority removed first)
        if self.maximize_metric:
            priority_order = metric_val
        else:
            priority_order = -metric_val

        self.ckpt_paths.put((priority_order, checkpoint_path))

        # Remove a checkpoint if more than max_checkpoints have been saved
        if self.ckpt_paths.qsize() > self.max_checkpoints:
            _, worst_ckpt = self.ckpt_paths.get()
            try:
                os.remove(worst_ckpt)
                self._print(f'Removed checkpoint: {worst_ckpt}')
            except OSError:
                # Avoid crashing if checkpoint has been removed or protected
                pass

def load_model(model, checkpoint_path, gpu_ids, return_step=True):
    """Load model parameters from disk.

    Args:
        model (torch.nn.DataParallel): Load parameters into this model.
        checkpoint_path (str): Path to checkpoint to load.
        gpu_ids (list): GPU IDs for DataParallel.
        return_step (bool): Also return the step at which checkpoint was saved.

    Returns:
        model (torch.nn.DataParallel): Model loaded from checkpoint.
        step (int): Step at which checkpoint was saved. Only if `return_step`.
    """
    device = f"cuda:{gpu_ids[0] if gpu_ids else 'cpu'}"
    ckpt_dict = torch.load(checkpoint_path, map_location=device)

    # Build model, load parameters
    model.load_state_dict(ckpt_dict['model_state'])

    if return_step:
        step = ckpt_dict['step']
        return model, step

    return model

def get_logger(log_dir, name):
    """Get a `logging.Logger` instance that prints to the console
    and an auxiliary file.

    Args:
        log_dir (str): Directory in which to create the log file.
        name (str): Name to identify the logs.

    Returns:
        logger (logging.Logger): Logger instance for logging events.
    """
    class StreamHandlerWithTQDM(logging.Handler):
        """Let `logging` print without breaking `tqdm` progress bars.

        See Also:
            > https://stackoverflow.com/questions/38543506
        """
        def emit(self, record):
            try:
                msg = self.format(record)
                tqdm.tqdm.write(msg)
                self.flush()
            except (KeyboardInterrupt, SystemExit):
                raise
            except:
                self.handleError(record)

    # Create logger
    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    # Log everything (i.e., DEBUG level and above) to a file
    log_path = os.path.join(log_dir, 'log.txt')
    file_handler = logging.FileHandler(log_path)
    file_handler.setLevel(logging.DEBUG)

    # Log everything except DEBUG level (i.e., INFO level and above) to console
    console_handler = StreamHandlerWithTQDM()
    console_handler.setLevel(logging.INFO)

    # Create format for the logs
    file_formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                       datefmt='%m.%d.%y %H:%M:%S')
    file_handler.setFormatter(file_formatter)
    console_formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                          datefmt='%m.%d.%y %H:%M:%S')
    console_handler.setFormatter(console_formatter)

    # add the handlers to the logger
    logger.addHandler(file_handler)
    logger.addHandler(console_handler)

    return logger

In [None]:
class MemeGeneratorLSTM(nn.Module):
    def __init__(self):
        super(MemeGeneratorLSTM, self).__init__()
        self.embedding_dim = 128
        self.img_embedding = 32
        self.seqlen = 1
        self.num_classes = len(char2idx)

        self.lstm_hidden_size = 1024
        self.lstm_layer_size = 1
        self.lstm_num_directions = 1
        
        # Embedding Layer for Images
        self.embedding_img = nn.Embedding(len(img2idx), self.img_embedding)
        # Embedding Layer for character embeddings
        self.embedding_layer = nn.Embedding(len(char2idx), self.embedding_dim, padding_idx = char2idx['<pad>'])
        
        # project to embedding dim
        self.project_down = nn.Linear(self.img_embedding + self.embedding_dim, self.embedding_dim)

        # LSTM layer
        self.lstm_layer = nn.LSTM(input_size = self.embedding_dim, hidden_size = self.lstm_hidden_size, num_layers=self.lstm_layer_size, bidirectional=False, batch_first = True)
        # fc layer
        self.fc = nn.Linear(self.lstm_hidden_size, self.num_classes) 


    def forward(self, input_img, x, prev_state_h, prev_state_c):
        # input_img (batch_size)
        # x is the decoder input (batch_size, 1) where 1 is seqlen
        # prev_state_h (num_layers_dec * num_directions_dec, batch_size, hidden_size_dec)
        # prev_state_c (num_layers_dec * num_directions_dec, batch_size, hidden_size_dec)
        batch_size = x.size()[0]

        # input_img (batch_size, 1)
        input_img = torch.unsqueeze(input_img, dim=1)
        
        # repeat for replication (batch_size, 1)
        input_img = input_img.repeat(1,1)

        # image embeddings (batch_size, 1, img_embedding_dim)
        img_out = self.embedding_img(input_img)

        # embedding shape (batch_size, 1, embedding_dim) where 1 is seqlen
        text_out = self.embedding_layer(x)

        
        # concatenate between image and caption embeddings
        # (batch_size, 1, text_emb + img_emb) where 1 is seqlen
        cat = torch.cat((img_out, text_out), dim=2)

        # project down to (batch_size, 1, 128) where 1 is seqlen
        embedding_out = self.project_down(cat)

        # apply LSTM layer
        # HERE IN THE DECODER WE PASS IN SEQ_LEN = 1 to force feed decoder
        # input = batchsize x seq_len x input_size -> Here input_size = 128
        # lstm_out = (batch, seq_len, num_directions * hidden_size)
        # hn = hidden at t=seq_len  (numdirection x num_layers, batchsize, hidden_size)
        # cn = cell at t=seq_len (numdirection x num_layers, batchsize, hidden_size)
        lstm_out, (hn, cn) = self.lstm_layer(embedding_out, (prev_state_h, prev_state_c))

        # output shape before squeeze == (batch_size, 1, hidden_size)
        # output shape after squeeze == (batch_size, hidden_size)
        output = torch.squeeze(lstm_out, dim = 1)
        
        # output shape == (batch_size, vocab)
        out = self.fc(output)
        return out, hn, cn #, attention_weights


    def init_state(self, batch_size):
        # first one is layer size * num_directions
        return (torch.zeros(1, batch_size, self.lstm_hidden_size),
                torch.zeros(1, batch_size, self.lstm_hidden_size))

In [None]:
class MemeGeneratorM2(nn.Module):
    def __init__(self):
        super(MemeGeneratorM2, self).__init__()
        self.memegeneratorlstm = MemeGeneratorLSTM()

    def forward(self, input_img, x, label, device, prediction_mode = False):
        # input_img (batch_size)
        # x is the lstm input (batch_size, 199) where 199 is seqlen
        # label is the label (batch_size, 199) where 199 is seqlen
        # device = "cpu"/"cuda:0"
        # prediction_mode = True/False

        batch_size = input_img.size()[0]

        # LSTM hidden state initialization
        prev_state_h, prev_state_c = self.memegeneratorlstm.init_state(batch_size)
        prev_state_h = prev_state_h.to(device)
        prev_state_c = prev_state_c.to(device)

        # store predictions and outputs
        predictions_arr = []
        output_tensor = torch.zeros((batch_size, 1, len(char2idx)))
        output_tensor = output_tensor.to(device)

        lstm_input = torch.unsqueeze(x[:,0], dim = 1)

        # for prediction only
        first_nonzero = (x == 0).nonzero(as_tuple=False)[0][1].item()

        # Teacher forcing - feeding the target as the next input
        for t in range(0, x.size()[1]): # iterate until len of sequence
             # prediction size (batchsize, num_vocab)
             predictions, prev_state_h, prev_state_c = self.memegeneratorlstm(input_img, lstm_input, prev_state_h, prev_state_c)

             # store lstm_output_tensor
             output_tensor = torch.cat([output_tensor, torch.unsqueeze(predictions, dim=1)], dim = 1)            
             # get one prediction
             one_prediction = torch.max(predictions, dim = 1).indices
             # save the prediction in list
             predictions_arr.append(one_prediction.detach().cpu().numpy())

             if prediction_mode == False:
                 # using teacher forcing
                 lstm_input = torch.unsqueeze(label[:, t], dim = 1)
             else:
                 # only for batchsize = 1
                 # use teacher forcing for initial starter string
                 if t < first_nonzero - 1:
                    lstm_input = torch.zeros((1, 1)).long()
                    lstm_input[0] = x[0][t+1]
                 else:
                    # use prediction as previous output previous input
                    one_prediction = torch.unsqueeze(one_prediction, dim = 1)
                    lstm_input = one_prediction
        
        # remove the original zeros for output_tensor
        # output_tensor shape (batch_size, seqlen, numclasses)
        # predictions_arr shape (seqlen, batch_size)
        output_tensor = output_tensor[:,1:,:]
        predictions_arr = np.array(predictions_arr)
        predictions_arr = np.transpose(predictions_arr)

        return output_tensor, predictions_arr

In [None]:
def calculate_loss(pred, real, device):
    # pred shape (batch_size, seqlen, numclasses labels)
    # real shape (batch_size, seqlen)

    batch_size = pred.size()[0]    

    loss = torch.zeros(batch_size, requires_grad=True)
    loss = loss.to(device)

    # iterate through batch_size
    for b in range(pred.size()[0]):
        # iterate through the sequence length
        for i in range(pred.size()[1]):
            # cross entropy loss combines log_softmax + NLL loss
            loss_ = F.cross_entropy(torch.unsqueeze(pred[b,i,:], dim=0), torch.unsqueeze(real[b,i], dim=0), ignore_index=char2idx["<pad>"], reduction="mean")
            loss[b] += loss_

    # average out the loss along the sequence (not counting 0)
    num_nonzero = real > 0
    num_nonzero = num_nonzero.long()
    num_nonzero = torch.sum(num_nonzero, dim = 1)

    # divide by number of nonzeros
    loss = torch.div(loss,num_nonzero)
    loss = torch.mean(loss)

    return loss

In [None]:
def evaluate_dict(gold_dict, pred_dict):
    sum_acc = 0
    total = 0
    
    # iterate through all items and get accuracy
    for key, value in pred_dict.items():  #array
        ground_truths = gold_dict[key]
        for idx, elem in enumerate(value):
            if ground_truths[idx] != 0:
                total += 1
                if ground_truths[idx] == value[idx]:
                    sum_acc += 1

    eval_dict = {'acc': 100. * sum_acc / total }
    
    return eval_dict

In [None]:
def evaluate(model, dev_loader, dev_path, device, idx2char):
    """Evaluates Model.

    Args:
        Model (model): The model to evaluate
        dev_loader (loader): data loader as presented above
        dev_path (string): Path to eval npz file.
        device: "cpu" or "cuda:0" for gpu
        idx2char (dict): dictionary mapping integers to characters
    """
    nll_meter = AverageMeter()

    model.eval()
    pred_dict = {}

    # get all true labels for ratings
    test_dataset = np.load(dev_path)
    true_labels = torch.from_numpy(test_dataset['label']).long()
    uniq_ids = torch.from_numpy(test_dataset['ids']).long()
    
    # gold_dict[id] = tensor of labels
    gold_dict = {}
    for ids in uniq_ids:
        gold_dict[ids.item()] = true_labels[ids.item()].detach().cpu().numpy()
    
    # put in evaluation mode
    with torch.no_grad(), tqdm.notebook.tqdm(total=len(dev_loader.dataset), position=1, leave=True) as progress_bar:
        for source, label, img, ids in dev_loader:
            batch_size = source.size(0) 

            # Setup for forward
            source = source.to(device) # (batchsize, 199) where 199 is max seqlen of text
            label = label.to(device)  # (batchsize, 199) where 199 is max seqlen of text 
            img = img.to(device)  # (batchsize)

            # Forward
            # output_tensor shape (batch_size, 199, # classes) -> Tensor
            # prediction_arr shape (batch_size, 199) -> Numpy array
            output_tensor, prediction_arr = model(img, source, label, device)

            # calculate loss
            loss = calculate_loss(output_tensor, label, device)
            loss_val = loss.item()

            nll_meter.update(loss_val, batch_size)

            # Get maximum prediction for prediction
            preds = {}
            for idx, elem in enumerate(ids):
                preds[elem.item()] = prediction_arr[idx]

            # Get maximum prediction for prediction
            pred_dict.update(preds)

            # Log info
            progress_bar.update(batch_size)
            progress_bar.set_postfix(NLL=nll_meter.avg)

    model.train()
    
    # return results
    results = evaluate_dict(gold_dict, pred_dict)
    results_list = [('NLL', nll_meter.avg),
                    ('acc', results['acc'])
                   ]

    results = OrderedDict(results_list)
    return results, pred_dict

In [None]:
def visualize(tbx, pred_dict, eval_path, step, split, num_visuals):
      """Visualize text examples to TensorBoard.

      Args:
          tbx (tensorboardX.SummaryWriter): Summary writer.
          pred_dict (dict): dict of predictions of the form id -> pred.
          eval_path (string): Path to eval npz file.
          step (int): Number of examples seen so far during training.
          split (str): Name of data split being visualized.
          num_visuals (int): Number of visuals to select at random from preds.
      """

      if num_visuals <= 0:
          return
      if num_visuals > len(pred_dict):
          num_visuals = len(pred_dict)

      # pick randomly from indexes
      visual_ids = np.random.choice(list(pred_dict), size=num_visuals, replace=False)

      # get test dataset
      test_dataset = np.load(eval_path, allow_pickle=True)
      true_labels = test_dataset['label']
      test_source = test_dataset['source']
      test_img = test_dataset['img']

      # iterate through index and append to tensorboard
      for i, id_ in enumerate(visual_ids):
          pred = pred_dict[id_]
          source = test_source[id_]
          label = true_labels[id_]
          img_name = test_img[id_]

          # convert back to encodings
          source = [idx2char[elem] for elem in source]
          source = ''.join(source)
          label = [idx2char[elem] for elem in label]
          label = ''.join(label)
          pred = [idx2char[elem] for elem in pred]
          pred = ''.join(pred)

          # need to replace <start>, <end>, <sep>, <pad> token for visualization purposes
          source = source.replace('<start>', 'START ')
          source = source.replace('<end>', ' END ')
          source = source.replace('<sep>', ' SEP ')
          source = source.replace('<pad>', ' PAD ')
          
          label = label.replace('<start>', 'START ')
          label = label.replace('<end>', ' END ')
          label = label.replace('<sep>', ' SEP ')
          label = label.replace('<pad>', ' PAD ')

          pred = pred.replace('<start>', 'START ')
          pred = pred.replace('<end>', ' END ')
          pred = pred.replace('<sep>', ' SEP ')
          pred = pred.replace('<pad>', ' PAD ')
          
          img_name = idx2img[img_name]

          tbl_fmt = (f'- **Source:** {source}\n'
                    + f'- **Label:** {label}\n'
                    + f'- **Prediction:** {pred}\n'
                    + f'- **Img Name:** {img_name}')
          tbx.add_text(tag=f'{split}/{i+1}_of_{num_visuals}',
                      text_string=tbl_fmt,
                      global_step=step)

In [None]:
save_dir = 'gdrive/My Drive/MemeGenerator/save/train/baseline-03'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

log = get_logger(save_dir, 'baseline')
tbx = SummaryWriter(save_dir)
log.info(f'Using random seed 224 ...')

# create checkpoint saver
saver = CheckpointSaver(save_dir,
                        max_checkpoints=5,
                        metric_name='acc',
                        maximize_metric='acc',
                        log=log)

# create model and train
model = MemeGeneratorM2()
model = model.to('cuda:0')
model.train()

# optimizer and smoothing
optimizer = optim.Adam(model.parameters(), lr = 0.001)
ema = EMA(model, 0.999) # Exponentially Smooth parameters

[09.19.20 00:07:07] Using random seed 224 ...
[09.19.20 00:07:07] Saver will maximize acc...


In [None]:
epoch = 0
step = 0
steps_till_eval = 30000 # evaluate model after 2000000 iterations
device = 'cuda:0'

while epoch != 10:  # Num Epochs to train on 
    epoch += 1

    log.info(f'Starting epoch {epoch}...')
    with torch.enable_grad(), tqdm.tqdm(total=len(train_loader.dataset), position=0, leave=True) as progress_bar:
        for source, label, img, ids in train_loader:
            batch_size = source.size(0) 

            # Setup for forward
            source = source.to(device) # (batchsize, 199) where 199 is max seqlen of text
            label = label.to(device)  # (batchsize, 199) where 199 is max seqlen of text 
            img = img.to(device)  # (batchsize)
            optimizer.zero_grad()

            # Forward
            # output_tensor shape (batch_size, 199, # classes) -> Tensor
            # prediction_arr shape (batch_size, 199) -> Numpy array
            output_tensor, prediction_arr = model(img, source, label, device)

            # calculate loss
            loss = calculate_loss(output_tensor, label, device)
            loss_val = loss.item()

            # Backward
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1)  # clip max gradients to 1
            optimizer.step()
            ema(model, step // batch_size)

            # Log info
            step += batch_size
            progress_bar.update(batch_size)
            progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
            
            tbx.add_scalar('train/NLL', loss_val, step)
            tbx.add_scalar('train/LR',
                             optimizer.param_groups[0]['lr'],
                             step)
            
            steps_till_eval -= batch_size
            if steps_till_eval <= 0:
                steps_till_eval = 30000

                # Evaluate and save checkpoint
                log.info(f'Evaluating at step {step}...')
                ema.assign(model)
                results, pred_dict = evaluate(model, dev_loader, 'gdrive/My Drive/MemeGenerator/dataset_final_m2/test.npz', device, idx2char)
                
                saver.save(step, model, results['acc'], device)
                ema.resume(model)

                # Log to console
                results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items())
                log.info(f'Dev {results_str}')

                # Log to TensorBoard
                log.info('Visualizing in TensorBoard...')
                for k, v in results.items():
                      tbx.add_scalar(f'dev/{k}', v, step)  

                visualize(tbx, pred_dict, eval_path='gdrive/My Drive/MemeGenerator/dataset_final_m2/test.npz', step=step, split='dev', num_visuals=30)

  0%|          | 0/83266 [00:00<?, ?it/s]

[09.19.20 00:07:17] Starting epoch 1...


 36%|███▌      | 30016/83266 [41:01<1:17:09, 11.50it/s, NLL=1.6, epoch=1]

[09.19.20 00:48:19] Evaluating at step 30016...


HBox(children=(FloatProgress(value=0.0, max=4383.0), HTML(value='')))

  self.dropout, self.training, self.bidirectional, self.batch_first)
 36%|███▌      | 30016/83266 [41:20<1:17:09, 11.50it/s, NLL=1.6, epoch=1]




 36%|███▌      | 30016/83266 [42:24<1:17:09, 11.50it/s, NLL=1.6, epoch=1]

[09.19.20 00:49:42] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-03/step_30016.pth.tar
[09.19.20 00:49:42] New best checkpoint at step 30016...
[09.19.20 00:49:42] Dev NLL: 01.52, acc: 55.28
[09.19.20 00:49:42] Visualizing in TensorBoard...


 72%|███████▏  | 60032/83266 [1:23:47<31:53, 12.15it/s, NLL=1.47, epoch=1]

[09.19.20 01:31:05] Evaluating at step 60032...


HBox(children=(FloatProgress(value=0.0, max=4383.0), HTML(value='')))

 72%|███████▏  | 60032/83266 [1:24:00<31:53, 12.15it/s, NLL=1.47, epoch=1]




 72%|███████▏  | 60032/83266 [1:25:06<31:53, 12.15it/s, NLL=1.47, epoch=1]

[09.19.20 01:32:24] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-03/step_60032.pth.tar
[09.19.20 01:32:24] New best checkpoint at step 60032...
[09.19.20 01:32:24] Dev NLL: 01.39, acc: 58.97
[09.19.20 01:32:24] Visualizing in TensorBoard...


100%|██████████| 83266/83266 [1:57:09<00:00, 11.84it/s, NLL=1.14, epoch=1]
  0%|          | 0/83266 [00:00<?, ?it/s]

[09.19.20 02:04:27] Starting epoch 2...


  8%|▊         | 6784/83266 [09:14<1:43:39, 12.30it/s, NLL=1.28, epoch=2]

[09.19.20 02:13:42] Evaluating at step 90050...


HBox(children=(FloatProgress(value=0.0, max=4383.0), HTML(value='')))

  8%|▊         | 6784/83266 [09:30<1:43:39, 12.30it/s, NLL=1.28, epoch=2]




  8%|▊         | 6784/83266 [10:34<1:43:39, 12.30it/s, NLL=1.28, epoch=2]

[09.19.20 02:15:01] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-03/step_90050.pth.tar
[09.19.20 02:15:02] New best checkpoint at step 90050...
[09.19.20 02:15:02] Dev NLL: 01.33, acc: 60.49
[09.19.20 02:15:02] Visualizing in TensorBoard...


 44%|████▍     | 36800/83266 [50:56<1:03:41, 12.16it/s, NLL=1.34, epoch=2]

[09.19.20 02:55:24] Evaluating at step 120066...


HBox(children=(FloatProgress(value=0.0, max=4383.0), HTML(value='')))

 44%|████▍     | 36800/83266 [51:10<1:03:41, 12.16it/s, NLL=1.34, epoch=2]




 44%|████▍     | 36800/83266 [52:18<1:03:41, 12.16it/s, NLL=1.34, epoch=2]

[09.19.20 02:56:45] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-03/step_120066.pth.tar
[09.19.20 02:56:45] New best checkpoint at step 120066...
[09.19.20 02:56:45] Dev NLL: 01.29, acc: 61.46
[09.19.20 02:56:45] Visualizing in TensorBoard...


 80%|████████  | 66816/83266 [1:34:05<24:03, 11.40it/s, NLL=1.25, epoch=2]

[09.19.20 03:38:33] Evaluating at step 150082...


HBox(children=(FloatProgress(value=0.0, max=4383.0), HTML(value='')))

 80%|████████  | 66816/83266 [1:34:20<24:03, 11.40it/s, NLL=1.25, epoch=2]




 80%|████████  | 66816/83266 [1:35:28<24:03, 11.40it/s, NLL=1.25, epoch=2]

[09.19.20 03:39:56] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-03/step_150082.pth.tar
[09.19.20 03:39:56] New best checkpoint at step 150082...
[09.19.20 03:39:56] Dev NLL: 01.27, acc: 62.14
[09.19.20 03:39:56] Visualizing in TensorBoard...


100%|██████████| 83266/83266 [1:59:12<00:00, 11.64it/s, NLL=0.968, epoch=2]
  0%|          | 0/83266 [00:00<?, ?it/s]

[09.19.20 04:03:39] Starting epoch 3...


 16%|█▋        | 13568/83266 [19:02<1:31:16, 12.73it/s, NLL=1.13, epoch=3]

[09.19.20 04:22:42] Evaluating at step 180100...


HBox(children=(FloatProgress(value=0.0, max=4383.0), HTML(value='')))

 16%|█▋        | 13568/83266 [19:20<1:31:16, 12.73it/s, NLL=1.13, epoch=3]




 16%|█▋        | 13568/83266 [20:20<1:31:16, 12.73it/s, NLL=1.13, epoch=3]

[09.19.20 04:24:00] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-03/step_180100.pth.tar
[09.19.20 04:24:00] New best checkpoint at step 180100...
[09.19.20 04:24:00] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-03/step_30016.pth.tar
[09.19.20 04:24:00] Dev NLL: 01.25, acc: 62.57
[09.19.20 04:24:00] Visualizing in TensorBoard...


 52%|█████▏    | 43584/83266 [1:00:17<53:16, 12.41it/s, NLL=1.39, epoch=3]

[09.19.20 05:03:57] Evaluating at step 210116...


HBox(children=(FloatProgress(value=0.0, max=4383.0), HTML(value='')))

 52%|█████▏    | 43584/83266 [1:00:30<53:16, 12.41it/s, NLL=1.39, epoch=3]




 52%|█████▏    | 43584/83266 [1:01:34<53:16, 12.41it/s, NLL=1.39, epoch=3]

[09.19.20 05:05:14] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-03/step_210116.pth.tar
[09.19.20 05:05:14] New best checkpoint at step 210116...
[09.19.20 05:05:14] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-03/step_60032.pth.tar
[09.19.20 05:05:14] Dev NLL: 01.24, acc: 62.91
[09.19.20 05:05:14] Visualizing in TensorBoard...


 88%|████████▊ | 73600/83266 [1:43:51<14:12, 11.34it/s, NLL=1.18, epoch=3]

[09.19.20 05:47:31] Evaluating at step 240132...


HBox(children=(FloatProgress(value=0.0, max=4383.0), HTML(value='')))

 88%|████████▊ | 73600/83266 [1:44:10<14:12, 11.34it/s, NLL=1.18, epoch=3]




 88%|████████▊ | 73600/83266 [1:45:18<14:12, 11.34it/s, NLL=1.18, epoch=3]

[09.19.20 05:48:57] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-03/step_240132.pth.tar
[09.19.20 05:48:57] New best checkpoint at step 240132...
[09.19.20 05:48:57] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-03/step_90050.pth.tar
[09.19.20 05:48:57] Dev NLL: 01.23, acc: 63.13
[09.19.20 05:48:57] Visualizing in TensorBoard...


100%|██████████| 83266/83266 [1:59:26<00:00, 11.62it/s, NLL=0.704, epoch=3]
  0%|          | 0/83266 [00:00<?, ?it/s]

[09.19.20 06:03:05] Starting epoch 4...


 24%|██▍       | 20352/83266 [29:42<1:33:04, 11.27it/s, NLL=1.1, epoch=4]

[09.19.20 06:32:48] Evaluating at step 270150...


HBox(children=(FloatProgress(value=0.0, max=4383.0), HTML(value='')))

 24%|██▍       | 20352/83266 [30:00<1:33:04, 11.27it/s, NLL=1.1, epoch=4]




 24%|██▍       | 20352/83266 [31:09<1:33:04, 11.27it/s, NLL=1.1, epoch=4]

[09.19.20 06:34:15] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-03/step_270150.pth.tar
[09.19.20 06:34:15] New best checkpoint at step 270150...
[09.19.20 06:34:15] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-03/step_120066.pth.tar
[09.19.20 06:34:15] Dev NLL: 01.23, acc: 63.35
[09.19.20 06:34:15] Visualizing in TensorBoard...


 51%|█████▏    | 42720/83266 [1:03:22<54:38, 12.37it/s, NLL=1.07, epoch=4]