In [None]:
!nvidia-smi

Tue Sep 15 20:21:44 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.66       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   49C    P0    28W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install tensorboardX



In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


**Data Loader**

In [None]:
import torch
import torch.utils.data as data
import numpy as np
import json
from keras.preprocessing.sequence import pad_sequences

In [None]:
# read json for dictionary mapping
# open from json file for character mapping
with open('gdrive/My Drive/MemeGenerator/dataset_final/char2idx.json', 'r', encoding = 'UTF-8') as json_file:
    char2idx = json.load(json_file)

# open from json file for image mapping
with open('gdrive/My Drive/MemeGenerator/dataset_final/img2idx.json', 'r', encoding = 'UTF-8') as json_file:
    img2idx = json.load(json_file)

idx2char = {value:key for key, value in char2idx.items()}
idx2img = {value:key for key, value in img2idx.items()}

In [None]:
class MemeDataset(data.Dataset):
    """Meme Dataset.

    Each item in the dataset is a tuple with the following entries (in order):
         source = np.array(X_train),
         label = np.array(y_train),
         img = img_train,
         ids = list(range(X_train.shape[0]))
    Args:
        data_path (str): Path to .npz file containing pre-processed dataset.
    """
    def __init__(self, data_path):
        super(MemeDataset, self).__init__()
        dataset = np.load(data_path, allow_pickle=True)

        # prepadding source data
        temp = dataset['source']
        temp = pad_sequences(temp, maxlen=128, dtype='int32', padding='pre')
        temp = np.array(temp)

        self.source = torch.from_numpy(temp).long()
        self.label = torch.from_numpy(dataset['label']).long()
        self.img = torch.from_numpy(dataset['img']).long()
        self.ids = torch.from_numpy(dataset['ids']).long()

        # index
        self.valid_idxs = [idx for idx in range(len(self.ids))]

    def __getitem__(self, idx):
        idx = self.valid_idxs[idx]
        example = (self.source[idx],
                   self.label[idx],
                   self.img[idx],
                   self.ids[idx])
        return example

    def __len__(self):
        return len(self.valid_idxs)

In [None]:
# traind and dev data loader
train_dataset = MemeDataset('gdrive/My Drive/MemeGenerator/dataset_final/train.npz')
train_loader = data.DataLoader(train_dataset,
                                batch_size=256,
                                shuffle=True,
                                num_workers=4,
                                )

dev_dataset = MemeDataset('gdrive/My Drive/MemeGenerator/dataset_final/test.npz')
dev_loader = data.DataLoader(dev_dataset,
                                batch_size=256,
                                shuffle=False,
                                num_workers=4,
                                )

**Modelling**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data
from torch.optim.lr_scheduler import StepLR, ExponentialLR

import numpy as np
import tqdm
from collections import OrderedDict
from json import dumps
import random
import os
import logging
import queue
import shutil
import string
import json

random.seed(224)
np.random.seed(224)
torch.manual_seed(224)
torch.cuda.manual_seed_all(224)

from tensorboardX import SummaryWriter

In [None]:
class AverageMeter:
    """Keep track of average values over time.

    Adapted from:
        > https://github.com/pytorch/examples/blob/master/imagenet/main.py
    """
    def __init__(self):
        self.avg = 0
        self.sum = 0
        self.count = 0

    def reset(self):
        """Reset meter."""
        self.__init__()

    def update(self, val, num_samples=1):
        """Update meter with new value `val`, the average of `num` samples.

        Args:
            val (float): Average value to update the meter with.
            num_samples (int): Number of samples that were averaged to
                produce `val`.
        """
        self.count += num_samples
        self.sum += val * num_samples
        self.avg = self.sum / self.count


class EMA:
    """Exponential moving average of model parameters.
    Args:
        model (torch.nn.Module): Model with parameters whose EMA will be kept.
        decay (float): Decay rate for exponential moving average.
    """
    def __init__(self, model, decay):
        self.decay = decay
        self.shadow = {}
        self.original = {}

        # Register model parameters
        for name, param in model.named_parameters():
            if param.requires_grad:
                self.shadow[name] = param.data.clone()

    def __call__(self, model, num_updates):
        decay = min(self.decay, (1.0 + num_updates) / (10.0 + num_updates))
        for name, param in model.named_parameters():
            if param.requires_grad:
                assert name in self.shadow
                new_average = \
                    (1.0 - decay) * param.data + decay * self.shadow[name]
                self.shadow[name] = new_average.clone()

    def assign(self, model):
        """Assign exponential moving average of parameter values to the
        respective parameters.
        Args:
            model (torch.nn.Module): Model to assign parameter values.
        """
        for name, param in model.named_parameters():
            if param.requires_grad:
                assert name in self.shadow
                self.original[name] = param.data.clone()
                param.data = self.shadow[name]

    def resume(self, model):
        """Restore original parameters to a model. That is, put back
        the values that were in each parameter at the last call to `assign`.
        Args:
            model (torch.nn.Module): Model to assign parameter values.
        """
        for name, param in model.named_parameters():
            if param.requires_grad:
                assert name in self.shadow
                param.data = self.original[name]


class CheckpointSaver:
    """Class to save and load model checkpoints.

    Save the best checkpoints as measured by a metric value passed into the
    `save` method. Overwrite checkpoints with better checkpoints once
    `max_checkpoints` have been saved.

    Args:
        save_dir (str): Directory to save checkpoints.
        max_checkpoints (int): Maximum number of checkpoints to keep before
            overwriting old ones.
        metric_name (str): Name of metric used to determine best model.
        maximize_metric (bool): If true, best checkpoint is that which maximizes
            the metric value passed in via `save`. Otherwise, best checkpoint
            minimizes the metric.
        log (logging.Logger): Optional logger for printing information.
    """
    def __init__(self, save_dir, max_checkpoints, metric_name,
                 maximize_metric=False, log=None):
        super(CheckpointSaver, self).__init__()

        self.save_dir = save_dir
        self.max_checkpoints = max_checkpoints
        self.metric_name = metric_name
        self.maximize_metric = maximize_metric
        self.best_val = None
        self.ckpt_paths = queue.PriorityQueue()
        self.log = log
        self._print(f"Saver will {'max' if maximize_metric else 'min'}imize {metric_name}...")

    def is_best(self, metric_val):
        """Check whether `metric_val` is the best seen so far.

        Args:
            metric_val (float): Metric value to compare to prior checkpoints.
        """
        if metric_val is None:
            # No metric reported
            return False

        if self.best_val is None:
            # No checkpoint saved yet
            return True

        return ((self.maximize_metric and self.best_val < metric_val)
                or (not self.maximize_metric and self.best_val > metric_val))

    def _print(self, message):
        """Print a message if logging is enabled."""
        if self.log is not None:
            self.log.info(message)

    def save(self, step, model, metric_val, device):
        """Save model parameters to disk.

        Args:
            step (int): Total number of examples seen during training so far.
            model (torch.nn.DataParallel): Model to save.
            metric_val (float): Determines whether checkpoint is best so far.
            device (torch.device): Device where model resides.
        """
        ckpt_dict = {
            'model_name': model.__class__.__name__,
            'model_state': model.cpu().state_dict(),
            'step': step
        }
        model.to(device)

        checkpoint_path = os.path.join(self.save_dir,
                                       f'step_{step}.pth.tar')
        torch.save(ckpt_dict, checkpoint_path)
        self._print(f'Saved checkpoint: {checkpoint_path}')

        if self.is_best(metric_val):
            # Save the best model
            self.best_val = metric_val
            best_path = os.path.join(self.save_dir, 'best.pth.tar')
            shutil.copy(checkpoint_path, best_path)
            self._print(f'New best checkpoint at step {step}...')

        # Add checkpoint path to priority queue (lowest priority removed first)
        if self.maximize_metric:
            priority_order = metric_val
        else:
            priority_order = -metric_val

        self.ckpt_paths.put((priority_order, checkpoint_path))

        # Remove a checkpoint if more than max_checkpoints have been saved
        if self.ckpt_paths.qsize() > self.max_checkpoints:
            _, worst_ckpt = self.ckpt_paths.get()
            try:
                os.remove(worst_ckpt)
                self._print(f'Removed checkpoint: {worst_ckpt}')
            except OSError:
                # Avoid crashing if checkpoint has been removed or protected
                pass

def load_model(model, checkpoint_path, gpu_ids, return_step=True):
    """Load model parameters from disk.

    Args:
        model (torch.nn.DataParallel): Load parameters into this model.
        checkpoint_path (str): Path to checkpoint to load.
        gpu_ids (list): GPU IDs for DataParallel.
        return_step (bool): Also return the step at which checkpoint was saved.

    Returns:
        model (torch.nn.DataParallel): Model loaded from checkpoint.
        step (int): Step at which checkpoint was saved. Only if `return_step`.
    """
    device = f"cuda:{gpu_ids[0] if gpu_ids else 'cpu'}"
    ckpt_dict = torch.load(checkpoint_path, map_location=device)

    # Build model, load parameters
    model.load_state_dict(ckpt_dict['model_state'])

    if return_step:
        step = ckpt_dict['step']
        return model, step

    return model

def get_logger(log_dir, name):
    """Get a `logging.Logger` instance that prints to the console
    and an auxiliary file.

    Args:
        log_dir (str): Directory in which to create the log file.
        name (str): Name to identify the logs.

    Returns:
        logger (logging.Logger): Logger instance for logging events.
    """
    class StreamHandlerWithTQDM(logging.Handler):
        """Let `logging` print without breaking `tqdm` progress bars.

        See Also:
            > https://stackoverflow.com/questions/38543506
        """
        def emit(self, record):
            try:
                msg = self.format(record)
                tqdm.tqdm.write(msg)
                self.flush()
            except (KeyboardInterrupt, SystemExit):
                raise
            except:
                self.handleError(record)

    # Create logger
    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    # Log everything (i.e., DEBUG level and above) to a file
    log_path = os.path.join(log_dir, 'log.txt')
    file_handler = logging.FileHandler(log_path)
    file_handler.setLevel(logging.DEBUG)

    # Log everything except DEBUG level (i.e., INFO level and above) to console
    console_handler = StreamHandlerWithTQDM()
    console_handler.setLevel(logging.INFO)

    # Create format for the logs
    file_formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                       datefmt='%m.%d.%y %H:%M:%S')
    file_handler.setFormatter(file_formatter)
    console_formatter = logging.Formatter('[%(asctime)s] %(message)s',
                                          datefmt='%m.%d.%y %H:%M:%S')
    console_handler.setFormatter(console_formatter)

    # add the handlers to the logger
    logger.addHandler(file_handler)
    logger.addHandler(console_handler)

    return logger

In [None]:
class MemeGenerator(nn.Module):
    def __init__(self):
        super(MemeGenerator, self).__init__()
        self.embedding_dim = 16
        self.img_embedding = 8
        self.num_classes = len(char2idx)
        
        # Embedding Layer for Images
        self.embedding_img = nn.Embedding(len(img2idx), self.img_embedding)
        # Embedding Layer for character embeddings
        self.embedding_layer = nn.Embedding(len(char2idx), self.embedding_dim, padding_idx = char2idx['<pad>'])
        
        # project to embedding dim
        self.project_down = nn.Linear(self.img_embedding + self.embedding_dim, self.embedding_dim)

        # convolution block
        self.conv1 = nn.Conv1d(in_channels = 16, out_channels = 1024, kernel_size = 5, padding=2)
        self.relu1 = nn.ReLU()
        self.batchnorm1 = nn.BatchNorm1d(1024)
        self.maxpool1 = nn.MaxPool1d(kernel_size=2)
        self.dropout1 = nn.Dropout(p=0.25)

        # convolution block
        self.conv2 = nn.Conv1d(in_channels = 1024, out_channels= 1024, kernel_size=5, padding=2)
        self.relu2 = nn.ReLU()
        self.batchnorm2 = nn.BatchNorm1d(1024)
        self.maxpool2 = nn.MaxPool1d(kernel_size=2)  # default value of stride = kernel_size
        self.dropout2 = nn.Dropout(p=0.25)

        # convolution block
        self.conv3 = nn.Conv1d(in_channels = 1024, out_channels = 1024, kernel_size = 5, padding=2)
        self.relu3 = nn.ReLU()
        self.batchnorm3 = nn.BatchNorm1d(1024)
        self.maxpool3 = nn.MaxPool1d(kernel_size=2)
        self.dropout3 = nn.Dropout(p=0.25)
        
        # convolution block
        self.conv4 = nn.Conv1d(in_channels = 1024, out_channels = 1024, kernel_size = 5, padding=2)
        self.relu4 = nn.ReLU()
        self.batchnorm4 = nn.BatchNorm1d(1024)
        self.maxpool4 = nn.MaxPool1d(kernel_size=2)
        self.dropout4 = nn.Dropout(p=0.25)

        # convolution block
        self.conv5 = nn.Conv1d(in_channels = 1024, out_channels = 1024, kernel_size = 5, padding=2)
        self.relu5 = nn.ReLU()
        self.batchnorm5 = nn.BatchNorm1d(1024)

        # final layers
        self.dropout5 = nn.Dropout(p=0.25)
        self.Linear1 = nn.Linear(1024, 1024)
        self.LinearRelu1 = nn.ReLU()
        self.batchnorm6 = nn.BatchNorm1d(1024)
        self.dropout6 = nn.Dropout(p=0.25)
        self.fc = nn.Linear(1024, self.num_classes)


    def forward(self, input_img, x):
        # input_img (batch_size)
        # x is the decoder input (batch_size, 128) where 128 is seqlen

        # input_img (batch_size, 1)
        input_img = torch.unsqueeze(input_img, dim=1)

        # need to repeat for concat (batch_size, 128) 
        input_img = input_img.repeat(1,128)
        # image embeddings (batch_size, 128, img_embedding_dim)
        img_out = self.embedding_img(input_img)

        # embedding shape (batch_size, 128, embedding_dim)
        text_out = self.embedding_layer(x)
        
        # concatenate between image and caption embeddings
        # (batch_size, 128, text_emb + img_emb)
        cat = torch.cat((img_out, text_out), dim=2)

        # project down to (batch_size, 128, 16)
        embedding_out = self.project_down(cat)

        # need to permute in order to match convnets
        # embedding shape (batch_size, embedding_dim, 128) -> (batch_size, 16, 128)
        embedding_out = embedding_out.permute(0, 2, 1)

        # apply convolution (batch_size, out_channels, 128) -> (batch_size, 1024, 128)
        conv1_out = self.conv1(embedding_out)
        conv1_out = self.relu1(conv1_out)
        # apply batchnorm -> (batch_size, 1024, 128)
        batchnorm1_out = self.batchnorm1(conv1_out)
        # apply maxpooling1 -> (batch_size, 1024, 64) kernel is 2 here
        maxpool1_out = self.maxpool1(batchnorm1_out)
        # apply dropout 1 -> (batch_size, 1024, 64)
        dropout1_out = self.dropout1(maxpool1_out)

        # apply convolution (batch_size, 1024, out_channels) -> (batch_size, 1024, 64)
        conv2_out = self.conv2(dropout1_out)
        conv2_out = self.relu2(conv2_out)
        # apply batchnorm -> (batch_size, 1024, 64)
        batchnorm2_out = self.batchnorm2(conv2_out)
        # apply maxpooling2 -> (batch_size, 1024, 32) kernel is 2 here
        maxpool2_out = self.maxpool2(batchnorm2_out)
        # apply dropout 1 -> (batch_size, 1024, 32)
        dropout2_out = self.dropout2(maxpool2_out)

        # apply convolution (batch_size, 1024, out_channels) -> (batch_size, 1024, 32)
        conv3_out = self.conv3(dropout2_out)
        conv3_out = self.relu3(conv3_out)
        # apply batchnorm -> (batch_size, 1024, 32)
        batchnorm3_out = self.batchnorm3(conv3_out)
        # apply maxpooling3 -> (batch_size, 1024, 16) kernel is 2 here
        maxpool3_out = self.maxpool3(batchnorm3_out)
        # apply dropout 1 -> (batch_size, 1024, 16)
        dropout3_out = self.dropout3(maxpool3_out)

        # apply convolution (batch_size, 1024, out_channels) -> (batch_size, 1024, 32)
        conv4_out = self.conv4(dropout3_out)
        conv4_out = self.relu4(conv4_out)
        # apply batchnorm -> (batch_size, 1024, 16)
        batchnorm4_out = self.batchnorm4(conv4_out)
        # apply maxpooling4 -> (batch_size, 1024, 8) kernel is 2 here
        maxpool4_out = self.maxpool4(batchnorm4_out)
        # apply dropout 1 -> (batch_size, 1024, 8)
        dropout4_out = self.dropout4(maxpool4_out)

        # apply convolution (batch_size, 1024, out_channels) -> (batch_size, 1024, 8)
        conv5_out = self.conv5(dropout4_out)
        conv5_out = self.relu5(conv5_out)
        # apply batchnorm -> (batch_size, 1024, 8)
        batchnorm5_out = self.batchnorm5(conv5_out)

        # Global MaxPooling1d shape (batch_size, 1024)
        # this takes maximum among all channels
        gmaxpool_out = torch.max(batchnorm5_out, dim=2)
        gmaxpool_out = gmaxpool_out.values
        gmaxpool_out = self.dropout5(gmaxpool_out)

        # apply dense layer (batch_size, 1024)
        linear_out = self.Linear1(gmaxpool_out)
        linear_out = self.LinearRelu1(linear_out)
        batchnorm6_out = self.batchnorm6(linear_out)
        dropout6_out = self.dropout6(batchnorm6_out)

        final = self.fc(dropout6_out)

        return final

In [None]:
def evaluate_dict(gold_dict, pred_dict):
    sum_acc = 0
    total = 0
    
    # iterate through all items and get accuracy
    for key, value in pred_dict.items():
        total += 1
        ground_truths = gold_dict[key]
        if ground_truths == value:
            sum_acc += 1

    eval_dict = {'acc': 100. * sum_acc / total }
    
    return eval_dict

In [None]:
def evaluate(model, dev_loader, dev_path, device, idx2char):
    """Evaluates Model.

    Args:
        Model (model): The model to evaluate
        dev_loader (loader): data loader as presented above
        dev_path (string): Path to eval npz file.
        device: "cpu" or "cuda:0" for gpu
        idx2char (dict): dictionary mapping integers to characters
    """
    nll_meter = AverageMeter()

    model.eval()
    pred_dict = {}

    # get all true labels for ratings
    test_dataset = np.load(dev_path)
    true_labels = torch.from_numpy(test_dataset['label']).long()
    uniq_ids = torch.from_numpy(test_dataset['ids']).long()
    
    # gold_dict[id] = tensor of labels
    gold_dict = {}
    for ids in uniq_ids:
        gold_dict[ids.item()] = true_labels[ids.item()]

    # put in evaluation mode
    with torch.no_grad(), tqdm.notebook.tqdm(total=len(dev_loader.dataset), position=1, leave=True) as progress_bar:
        for source, label, img, ids in dev_loader:
            batch_size = source.size(0) 

            # Setup for forward
            source = source.to(device) # (batchsize, 128) where 128 is max seqlen of text
            label = label.to(device)  # (batchsize, 1)
            img = img.to(device)  # (batchsize)
            ids = ids.to(device)

            # Forward
            logits = model(img, source)
            loss = F.nll_loss(F.log_softmax(logits), label)  #NLL loss for correct position
            loss_val = loss.item()

            nll_meter.update(loss_val, batch_size)

            # take maximum value at each state for prediction
            predictions = torch.max(F.log_softmax(logits), dim=1)
            predictions = predictions.indices

            # Get maximum prediction for prediction
            preds = {}
            for idx, elem in enumerate(ids):
                preds[elem.item()] = predictions[idx].item()

            # Get maximum prediction for prediction
            pred_dict.update(preds)

            # Log info
            progress_bar.update(batch_size)
            progress_bar.set_postfix(NLL=nll_meter.avg)

    model.train()
    
    # return results
    results = evaluate_dict(gold_dict, pred_dict)
    results_list = [('NLL', nll_meter.avg),
                    ('acc', results['acc'])
                   ]

    results = OrderedDict(results_list)
    return results, pred_dict

In [None]:
def visualize(tbx, pred_dict, eval_path, step, split, num_visuals):
      """Visualize text examples to TensorBoard.

      Args:
          tbx (tensorboardX.SummaryWriter): Summary writer.
          pred_dict (dict): dict of predictions of the form id -> pred.
          eval_path (string): Path to eval npz file.
          step (int): Number of examples seen so far during training.
          split (str): Name of data split being visualized.
          num_visuals (int): Number of visuals to select at random from preds.
      """
      if num_visuals <= 0:
          return
      if num_visuals > len(pred_dict):
          num_visuals = len(pred_dict)

      # pick randomly from indexes
      visual_ids = np.random.choice(list(pred_dict), size=num_visuals, replace=False)

      # get test dataset
      test_dataset = np.load(eval_path, allow_pickle=True)
      true_labels = test_dataset['label']
      test_source = test_dataset['source']
      test_img = test_dataset['img']

      # iterate through index and append to tensorboard
      for i, id_ in enumerate(visual_ids):
          pred = pred_dict[id_]
          source = test_source[id_]
          label = true_labels[id_]
          img_name = test_img[id_]

          # convert back to encodings
          source = [idx2char[elem] for elem in source]
          source = ''.join(source)
          label = idx2char[label]
          pred = idx2char[pred]
          
          img_name = idx2img[img_name]

          tbl_fmt = (f'- **Source:** {source}\n'
                    + f'- **Label:** {label}\n'
                    + f'- **Prediction:** {pred}\n'
                    + f'- **Img Name:** {img_name}')
          tbx.add_text(tag=f'{split}/{i+1}_of_{num_visuals}',
                      text_string=tbl_fmt,
                      global_step=step)

In [None]:
save_dir = 'gdrive/My Drive/MemeGenerator/save/train/baseline-02'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

log = get_logger(save_dir, 'baseline')
tbx = SummaryWriter(save_dir)
log.info(f'Using random seed 224 ...')

# create checkpoint saver
saver = CheckpointSaver(save_dir,
                        max_checkpoints=5,
                        metric_name='acc',
                        maximize_metric='acc',
                        log=log)

# create model and train
model = MemeGenerator()
model = model.to('cuda:0')
model.train()

# optimizer and smoothing
optimizer = optim.Adam(model.parameters(), lr = 0.001)
ema = EMA(model, 0.999) # Exponentially Smooth parameters

[09.15.20 20:22:50] Using random seed 224 ...
[09.15.20 20:22:50] Saver will maximize acc...


In [None]:
epoch = 0
step = 0
steps_till_eval = 2000000 # evaluate model after 2000000 iterations
device = 'cuda:0'

while epoch != 10:  # Num Epochs to train on 
    epoch += 1

    log.info(f'Starting epoch {epoch}...')
    with torch.enable_grad(), tqdm.tqdm(total=len(train_loader.dataset), position=0, leave=True) as progress_bar:
        for source, label, img, ids in train_loader:
            batch_size = source.size(0) 

            # Setup for forward
            source = source.to(device) # (batchsize, 128) where 128 is max seqlen of text
            label = label.to(device)  # (batchsize, 1) 
            img = img.to(device)  # (batchsize) 
            optimizer.zero_grad()

            # Forward
            logits = model(img, source)
            loss = F.nll_loss(F.log_softmax(logits), label)  #NLL loss
            loss_val = loss.item()

            # Backward
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1)  # clip max gradients to 1
            optimizer.step()
            ema(model, step // batch_size)

            # Log info
            step += batch_size
            progress_bar.update(batch_size)
            progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
            
            tbx.add_scalar('train/NLL', loss_val, step)
            tbx.add_scalar('train/LR',
                             optimizer.param_groups[0]['lr'],
                             step)
            
            steps_till_eval -= batch_size
            if steps_till_eval <= 0:
                steps_till_eval = 2000000

                # Evaluate and save checkpoint
                log.info(f'Evaluating at step {step}...')
                ema.assign(model)
                results, pred_dict = evaluate(model, dev_loader, 'gdrive/My Drive/MemeGenerator/dataset_final/test.npz', device, idx2char)
                
                saver.save(step, model, results['acc'], device)
                ema.resume(model)

                # Log to console
                results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items())
                log.info(f'Dev {results_str}')

                # Log to TensorBoard
                log.info('Visualizing in TensorBoard...')
                for k, v in results.items():
                      tbx.add_scalar(f'dev/{k}', v, step)  

                visualize(tbx, pred_dict, eval_path='gdrive/My Drive/MemeGenerator/dataset_final/test.npz', step=step, split='dev', num_visuals=30)

  0%|          | 0/5020919 [00:00<?, ?it/s]

[09.15.20 20:22:54] Starting epoch 1...


 40%|███▉      | 2000128/5020919 [22:00<33:13, 1515.09it/s, NLL=1.74, epoch=1]

[09.15.20 20:44:55] Evaluating at step 2000128...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 40%|███▉      | 2000128/5020919 [22:20<33:13, 1515.09it/s, NLL=1.74, epoch=1]




 40%|███▉      | 2000128/5020919 [23:09<33:13, 1515.09it/s, NLL=1.74, epoch=1]

[09.15.20 20:46:04] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_2000128.pth.tar


 40%|███▉      | 2000128/5020919 [23:10<33:13, 1515.09it/s, NLL=1.74, epoch=1]

[09.15.20 20:46:05] New best checkpoint at step 2000128...
[09.15.20 20:46:05] Dev NLL: 27883.34, acc: 50.65
[09.15.20 20:46:05] Visualizing in TensorBoard...


 80%|███████▉  | 4000256/5020919 [45:04<11:06, 1531.04it/s, NLL=1.44, epoch=1]

[09.15.20 21:07:58] Evaluating at step 4000256...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 80%|███████▉  | 4000256/5020919 [45:20<11:06, 1531.04it/s, NLL=1.44, epoch=1]




 80%|███████▉  | 4000256/5020919 [46:13<11:06, 1531.04it/s, NLL=1.44, epoch=1]

[09.15.20 21:09:08] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_4000256.pth.tar


 80%|███████▉  | 4000256/5020919 [46:14<11:06, 1531.04it/s, NLL=1.44, epoch=1]

[09.15.20 21:09:08] New best checkpoint at step 4000256...
[09.15.20 21:09:08] Dev NLL: 01.46, acc: 56.27
[09.15.20 21:09:08] Visualizing in TensorBoard...


100%|██████████| 5020919/5020919 [57:25<00:00, 1457.42it/s, NLL=1.54, epoch=1]
  0%|          | 0/5020919 [00:00<?, ?it/s]

[09.15.20 21:20:19] Starting epoch 2...


 20%|█▉        | 979456/5020919 [10:44<43:57, 1532.56it/s, NLL=1.35, epoch=2]

[09.15.20 21:31:04] Evaluating at step 6000375...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 20%|█▉        | 979456/5020919 [11:00<43:57, 1532.56it/s, NLL=1.35, epoch=2]




 20%|█▉        | 979456/5020919 [11:55<43:57, 1532.56it/s, NLL=1.35, epoch=2]

[09.15.20 21:32:15] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_6000375.pth.tar


 20%|█▉        | 979456/5020919 [11:55<43:57, 1532.56it/s, NLL=1.35, epoch=2]

[09.15.20 21:32:15] New best checkpoint at step 6000375...
[09.15.20 21:32:15] Dev NLL: 01.38, acc: 58.34
[09.15.20 21:32:15] Visualizing in TensorBoard...


 59%|█████▉    | 2979584/5020919 [33:45<22:16, 1527.85it/s, NLL=1.35, epoch=2]

[09.15.20 21:54:05] Evaluating at step 8000503...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 59%|█████▉    | 2979584/5020919 [34:00<22:16, 1527.85it/s, NLL=1.35, epoch=2]




 59%|█████▉    | 2979584/5020919 [34:55<22:16, 1527.85it/s, NLL=1.35, epoch=2]

[09.15.20 21:55:15] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_8000503.pth.tar


 59%|█████▉    | 2979584/5020919 [34:56<22:16, 1527.85it/s, NLL=1.35, epoch=2]

[09.15.20 21:55:15] New best checkpoint at step 8000503...
[09.15.20 21:55:15] Dev NLL: 01.35, acc: 59.38
[09.15.20 21:55:15] Visualizing in TensorBoard...


 99%|█████████▉| 4979712/5020919 [56:46<00:26, 1530.16it/s, NLL=1.3, epoch=2]

[09.15.20 22:17:05] Evaluating at step 10000631...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 99%|█████████▉| 4979712/5020919 [57:00<00:26, 1530.16it/s, NLL=1.3, epoch=2]




 99%|█████████▉| 4979712/5020919 [57:56<00:26, 1530.16it/s, NLL=1.3, epoch=2]

[09.15.20 22:18:15] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_10000631.pth.tar


 99%|█████████▉| 4979712/5020919 [57:56<00:26, 1530.16it/s, NLL=1.3, epoch=2]

[09.15.20 22:18:16] New best checkpoint at step 10000631...
[09.15.20 22:18:16] Dev NLL: 01.32, acc: 60.16
[09.15.20 22:18:16] Visualizing in TensorBoard...


100%|██████████| 5020919/5020919 [58:25<00:00, 1432.44it/s, NLL=1.45, epoch=2]
  0%|          | 0/5020919 [00:00<?, ?it/s]

[09.15.20 22:18:44] Starting epoch 3...


 39%|███▉      | 1958912/5020919 [21:25<33:11, 1537.91it/s, NLL=1.37, epoch=3]

[09.15.20 22:40:10] Evaluating at step 12000750...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 39%|███▉      | 1958912/5020919 [21:40<33:11, 1537.91it/s, NLL=1.37, epoch=3]




 39%|███▉      | 1958912/5020919 [22:36<33:11, 1537.91it/s, NLL=1.37, epoch=3]

[09.15.20 22:41:20] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_12000750.pth.tar


 39%|███▉      | 1958912/5020919 [22:36<33:11, 1537.91it/s, NLL=1.37, epoch=3]

[09.15.20 22:41:21] New best checkpoint at step 12000750...
[09.15.20 22:41:21] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_2000128.pth.tar
[09.15.20 22:41:21] Dev NLL: 01.29, acc: 60.71
[09.15.20 22:41:21] Visualizing in TensorBoard...


 79%|███████▉  | 3959040/5020919 [44:27<11:30, 1538.65it/s, NLL=1.43, epoch=3]

[09.15.20 23:03:11] Evaluating at step 14000878...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 79%|███████▉  | 3959040/5020919 [44:40<11:30, 1538.65it/s, NLL=1.43, epoch=3]




 79%|███████▉  | 3959040/5020919 [45:37<11:30, 1538.65it/s, NLL=1.43, epoch=3]

[09.15.20 23:04:22] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_14000878.pth.tar


 79%|███████▉  | 3959040/5020919 [45:37<11:30, 1538.65it/s, NLL=1.43, epoch=3]

[09.15.20 23:04:22] New best checkpoint at step 14000878...
[09.15.20 23:04:22] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_4000256.pth.tar
[09.15.20 23:04:22] Dev NLL: 01.46, acc: 60.89
[09.15.20 23:04:22] Visualizing in TensorBoard...


100%|██████████| 5020919/5020919 [57:14<00:00, 1461.97it/s, NLL=1.2, epoch=3]
  0%|          | 0/5020919 [00:00<?, ?it/s]

[09.15.20 23:15:59] Starting epoch 4...


 19%|█▊        | 938240/5020919 [10:15<44:33, 1527.19it/s, NLL=1.41, epoch=4]

[09.15.20 23:26:14] Evaluating at step 16000997...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 19%|█▊        | 938240/5020919 [10:30<44:33, 1527.19it/s, NLL=1.41, epoch=4]




 19%|█▊        | 938240/5020919 [11:26<44:33, 1527.19it/s, NLL=1.41, epoch=4]

[09.15.20 23:27:25] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_16000997.pth.tar


 19%|█▊        | 938240/5020919 [11:26<44:33, 1527.19it/s, NLL=1.41, epoch=4]

[09.15.20 23:27:25] New best checkpoint at step 16000997...
[09.15.20 23:27:25] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_6000375.pth.tar
[09.15.20 23:27:25] Dev NLL: 01.26, acc: 61.53
[09.15.20 23:27:25] Visualizing in TensorBoard...


 59%|█████▊    | 2938368/5020919 [33:17<22:40, 1530.97it/s, NLL=1.46, epoch=4]

[09.15.20 23:49:16] Evaluating at step 18001125...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 59%|█████▊    | 2938368/5020919 [33:30<22:40, 1530.97it/s, NLL=1.46, epoch=4]




 59%|█████▊    | 2938368/5020919 [34:27<22:40, 1530.97it/s, NLL=1.46, epoch=4]

[09.15.20 23:50:26] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_18001125.pth.tar


 59%|█████▊    | 2938368/5020919 [34:28<22:40, 1530.97it/s, NLL=1.46, epoch=4]

[09.15.20 23:50:27] New best checkpoint at step 18001125...


 59%|█████▊    | 2938368/5020919 [34:28<22:40, 1530.97it/s, NLL=1.46, epoch=4]

[09.15.20 23:50:27] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_8000503.pth.tar
[09.15.20 23:50:27] Dev NLL: 01.35, acc: 61.75
[09.15.20 23:50:27] Visualizing in TensorBoard...


 98%|█████████▊| 4938496/5020919 [56:20<00:54, 1523.04it/s, NLL=1.27, epoch=4]

[09.16.20 00:12:19] Evaluating at step 20001253...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 98%|█████████▊| 4938496/5020919 [56:40<00:54, 1523.04it/s, NLL=1.27, epoch=4]




 98%|█████████▊| 4938496/5020919 [57:30<00:54, 1523.04it/s, NLL=1.27, epoch=4]

[09.16.20 00:13:29] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_20001253.pth.tar


 98%|█████████▊| 4938496/5020919 [57:31<00:54, 1523.04it/s, NLL=1.27, epoch=4]

[09.16.20 00:13:30] New best checkpoint at step 20001253...
[09.16.20 00:13:30] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_10000631.pth.tar
[09.16.20 00:13:30] Dev NLL: 01.24, acc: 62.02
[09.16.20 00:13:30] Visualizing in TensorBoard...


100%|██████████| 5020919/5020919 [58:27<00:00, 1431.57it/s, NLL=1.22, epoch=4]
  0%|          | 0/5020919 [00:00<?, ?it/s]

[09.16.20 00:14:26] Starting epoch 5...


 38%|███▊      | 1917696/5020919 [20:59<33:50, 1528.40it/s, NLL=1.37, epoch=5]

[09.16.20 00:35:26] Evaluating at step 22001372...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 38%|███▊      | 1917696/5020919 [21:10<33:50, 1528.40it/s, NLL=1.37, epoch=5]




 38%|███▊      | 1917696/5020919 [22:10<33:50, 1528.40it/s, NLL=1.37, epoch=5]

[09.16.20 00:36:36] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_22001372.pth.tar


 38%|███▊      | 1917696/5020919 [22:10<33:50, 1528.40it/s, NLL=1.37, epoch=5]

[09.16.20 00:36:37] New best checkpoint at step 22001372...
[09.16.20 00:36:37] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_12000750.pth.tar
[09.16.20 00:36:37] Dev NLL: 01.24, acc: 62.26
[09.16.20 00:36:37] Visualizing in TensorBoard...


 78%|███████▊  | 3917824/5020919 [44:05<12:10, 1510.65it/s, NLL=1.17, epoch=5]

[09.16.20 00:58:31] Evaluating at step 24001500...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 78%|███████▊  | 3917824/5020919 [44:20<12:10, 1510.65it/s, NLL=1.17, epoch=5]




 78%|███████▊  | 3917824/5020919 [45:14<12:10, 1510.65it/s, NLL=1.17, epoch=5]

[09.16.20 00:59:41] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_24001500.pth.tar


 78%|███████▊  | 3917824/5020919 [45:15<12:10, 1510.65it/s, NLL=1.17, epoch=5]

[09.16.20 00:59:41] New best checkpoint at step 24001500...
[09.16.20 00:59:41] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_14000878.pth.tar
[09.16.20 00:59:41] Dev NLL: 01.23, acc: 62.48
[09.16.20 00:59:41] Visualizing in TensorBoard...


100%|██████████| 5020919/5020919 [57:20<00:00, 1459.44it/s, NLL=1.19, epoch=5]
  0%|          | 0/5020919 [00:00<?, ?it/s]

[09.16.20 01:11:46] Starting epoch 6...


 18%|█▊        | 897024/5020919 [09:50<45:27, 1511.98it/s, NLL=1.22, epoch=6]

[09.16.20 01:21:37] Evaluating at step 26001619...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 18%|█▊        | 897024/5020919 [10:10<45:27, 1511.98it/s, NLL=1.22, epoch=6]




 18%|█▊        | 897024/5020919 [11:01<45:27, 1511.98it/s, NLL=1.22, epoch=6]

[09.16.20 01:22:48] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_26001619.pth.tar


 18%|█▊        | 897024/5020919 [11:02<45:27, 1511.98it/s, NLL=1.22, epoch=6]

[09.16.20 01:22:48] New best checkpoint at step 26001619...
[09.16.20 01:22:48] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_16000997.pth.tar
[09.16.20 01:22:48] Dev NLL: 01.25, acc: 62.66
[09.16.20 01:22:48] Visualizing in TensorBoard...


 58%|█████▊    | 2897152/5020919 [32:58<23:36, 1499.04it/s, NLL=1.31, epoch=6]

[09.16.20 01:44:45] Evaluating at step 28001747...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 58%|█████▊    | 2897152/5020919 [33:10<23:36, 1499.04it/s, NLL=1.31, epoch=6]




 58%|█████▊    | 2897152/5020919 [34:08<23:36, 1499.04it/s, NLL=1.31, epoch=6]

[09.16.20 01:45:55] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_28001747.pth.tar


 58%|█████▊    | 2897152/5020919 [34:09<23:36, 1499.04it/s, NLL=1.31, epoch=6]

[09.16.20 01:45:55] New best checkpoint at step 28001747...
[09.16.20 01:45:55] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_18001125.pth.tar
[09.16.20 01:45:55] Dev NLL: 01.22, acc: 62.83
[09.16.20 01:45:55] Visualizing in TensorBoard...


 98%|█████████▊| 4897280/5020919 [56:03<01:20, 1534.83it/s, NLL=1.35, epoch=6]

[09.16.20 02:07:50] Evaluating at step 30001875...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 98%|█████████▊| 4897280/5020919 [56:20<01:20, 1534.83it/s, NLL=1.35, epoch=6]




 98%|█████████▊| 4897280/5020919 [57:14<01:20, 1534.83it/s, NLL=1.35, epoch=6]

[09.16.20 02:09:01] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_30001875.pth.tar


 98%|█████████▊| 4897280/5020919 [57:14<01:20, 1534.83it/s, NLL=1.35, epoch=6]

[09.16.20 02:09:01] New best checkpoint at step 30001875...
[09.16.20 02:09:01] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_20001253.pth.tar
[09.16.20 02:09:01] Dev NLL: 01.21, acc: 63.03
[09.16.20 02:09:01] Visualizing in TensorBoard...


100%|██████████| 5020919/5020919 [58:37<00:00, 1427.29it/s, NLL=1.25, epoch=6]
  0%|          | 0/5020919 [00:00<?, ?it/s]

[09.16.20 02:10:24] Starting epoch 7...


 37%|███▋      | 1876480/5020919 [20:34<34:30, 1518.69it/s, NLL=1.1, epoch=7]

[09.16.20 02:30:58] Evaluating at step 32001994...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 37%|███▋      | 1876480/5020919 [20:50<34:30, 1518.69it/s, NLL=1.1, epoch=7]




 37%|███▋      | 1876480/5020919 [21:44<34:30, 1518.69it/s, NLL=1.1, epoch=7]

[09.16.20 02:32:08] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_32001994.pth.tar


 37%|███▋      | 1876480/5020919 [21:44<34:30, 1518.69it/s, NLL=1.1, epoch=7]

[09.16.20 02:32:09] New best checkpoint at step 32001994...
[09.16.20 02:32:09] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_22001372.pth.tar
[09.16.20 02:32:09] Dev NLL: 01.21, acc: 63.12
[09.16.20 02:32:09] Visualizing in TensorBoard...


 77%|███████▋  | 3876608/5020919 [43:38<12:32, 1521.19it/s, NLL=1.12, epoch=7]

[09.16.20 02:54:02] Evaluating at step 34002122...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 77%|███████▋  | 3876608/5020919 [43:50<12:32, 1521.19it/s, NLL=1.12, epoch=7]




 77%|███████▋  | 3876608/5020919 [44:48<12:32, 1521.19it/s, NLL=1.12, epoch=7]

[09.16.20 02:55:13] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_34002122.pth.tar


 77%|███████▋  | 3876608/5020919 [44:48<12:32, 1521.19it/s, NLL=1.12, epoch=7]

[09.16.20 02:55:13] New best checkpoint at step 34002122...
[09.16.20 02:55:13] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_24001500.pth.tar
[09.16.20 02:55:13] Dev NLL: 01.20, acc: 63.26
[09.16.20 02:55:13] Visualizing in TensorBoard...


100%|██████████| 5020919/5020919 [57:21<00:00, 1458.90it/s, NLL=1.21, epoch=7]
  0%|          | 0/5020919 [00:00<?, ?it/s]

[09.16.20 03:07:46] Starting epoch 8...


 17%|█▋        | 855808/5020919 [09:21<45:13, 1535.09it/s, NLL=1.27, epoch=8]

[09.16.20 03:17:08] Evaluating at step 36002241...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 17%|█▋        | 855808/5020919 [09:40<45:13, 1535.09it/s, NLL=1.27, epoch=8]




 17%|█▋        | 855808/5020919 [10:32<45:13, 1535.09it/s, NLL=1.27, epoch=8]

[09.16.20 03:18:18] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_36002241.pth.tar


 17%|█▋        | 855808/5020919 [10:32<45:13, 1535.09it/s, NLL=1.27, epoch=8]

[09.16.20 03:18:18] New best checkpoint at step 36002241...
[09.16.20 03:18:18] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_26001619.pth.tar
[09.16.20 03:18:18] Dev NLL: 01.20, acc: 63.31
[09.16.20 03:18:18] Visualizing in TensorBoard...


 57%|█████▋    | 2855936/5020919 [32:28<23:30, 1534.50it/s, NLL=1.25, epoch=8]

[09.16.20 03:40:14] Evaluating at step 38002369...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 57%|█████▋    | 2855936/5020919 [32:40<23:30, 1534.50it/s, NLL=1.25, epoch=8]




 57%|█████▋    | 2855936/5020919 [33:37<23:30, 1534.50it/s, NLL=1.25, epoch=8]

[09.16.20 03:41:24] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_38002369.pth.tar


 57%|█████▋    | 2855936/5020919 [33:38<23:30, 1534.50it/s, NLL=1.25, epoch=8]

[09.16.20 03:41:24] New best checkpoint at step 38002369...
[09.16.20 03:41:24] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_28001747.pth.tar
[09.16.20 03:41:24] Dev NLL: 01.20, acc: 63.42
[09.16.20 03:41:24] Visualizing in TensorBoard...


 97%|█████████▋| 4856064/5020919 [55:31<01:47, 1527.45it/s, NLL=1.15, epoch=8]

[09.16.20 04:03:17] Evaluating at step 40002497...


HBox(children=(FloatProgress(value=0.0, max=264259.0), HTML(value='')))

 97%|█████████▋| 4856064/5020919 [55:50<01:47, 1527.45it/s, NLL=1.15, epoch=8]




 97%|█████████▋| 4856064/5020919 [56:41<01:47, 1527.45it/s, NLL=1.15, epoch=8]

[09.16.20 04:04:28] Saved checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_40002497.pth.tar


 97%|█████████▋| 4856064/5020919 [56:42<01:47, 1527.45it/s, NLL=1.15, epoch=8]

[09.16.20 04:04:28] New best checkpoint at step 40002497...
[09.16.20 04:04:28] Removed checkpoint: gdrive/My Drive/MemeGenerator/save/train/baseline-02/step_30001875.pth.tar
[09.16.20 04:04:28] Dev NLL: 01.19, acc: 63.61
[09.16.20 04:04:28] Visualizing in TensorBoard...


100%|██████████| 5020919/5020919 [58:32<00:00, 1429.55it/s, NLL=1.18, epoch=8]
  0%|          | 0/5020919 [00:00<?, ?it/s]

[09.16.20 04:06:18] Starting epoch 9...


  1%|          | 54272/5020919 [00:36<56:16, 1471.11it/s, NLL=1.23, epoch=9]


KeyboardInterrupt: ignored