In [1]:
import pickle
from matplotlib import pyplot as plt
from torch.nn.utils.rnn import pack_padded_sequence
from torch.utils.data import DataLoader
from torchvision import transforms

from datasets.flickr8k import Flickr8kDataset
from glove import embedding_matrix_creator
from metrics import *
from utils_torch import *

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [2]:
DATASET_BASE_PATH = 'data/flickr8k/'

train_set = Flickr8kDataset(
    dataset_base_path=DATASET_BASE_PATH, dist='train',
    device=device, return_type='tensor', load_img_to_memory=False)
vocab, word2idx, idx2word, max_len = vocab_set = train_set.get_vocab()

val_set = Flickr8kDataset(
    dataset_base_path=DATASET_BASE_PATH, dist='val', vocab_set=vocab_set,
    device=device, return_type='corpus', load_img_to_memory=False)

test_set = Flickr8kDataset(
    dataset_base_path=DATASET_BASE_PATH, dist='test', vocab_set=vocab_set,
    device=device, return_type='corpus', load_img_to_memory=False)

train_eval_set = Flickr8kDataset(
    dataset_base_path=DATASET_BASE_PATH, dist='train', vocab_set=vocab_set,
    device=device, return_type='corpus', load_img_to_memory=False)

with open('vocab_set.pkl', 'wb') as f:
    pickle.dump(train_set.get_vocab(), f)

print(len(train_set), len(val_set), len(test_set))

vocab_size = len(vocab)
print(vocab_size, max_len)

30000 1000 1000
7708 40


In [3]:
MODEL = "dendenet161_gru"#"resnet50_monolstm"
EMBEDDING_DIM = 50
EMBEDDING = f"GLV{EMBEDDING_DIM}"
HIDDEN_SIZE = 256
BATCH_SIZE = 16
LR = 1e-2
MODEL_NAME = f'saved_models/{MODEL}_b{BATCH_SIZE}_emd{EMBEDDING}'
NUM_EPOCHS = 100
SAVE_FREQ = 10
LOG_INTERVAL = 25

embedding_matrix = embedding_matrix_creator(embedding_dim=EMBEDDING_DIM, word2idx=word2idx)
embedding_matrix.shape

100%|██████████| 7708/7708 [00:00<00:00, 313463.60it/s]


(7708, 50)

In [4]:
def train_model(train_loader, model, loss_fn, optimizer, vocab_size, acc_fn, desc=''):
    running_acc = 0.0
    running_loss = 0.0
    model.train()
    t = tqdm(iter(train_loader), desc=f'{desc}')
    for batch_idx, batch in enumerate(t):
        images, captions, lengths = batch
        sort_ind = torch.argsort(lengths, descending=True)
        images = images[sort_ind]
        captions = captions[sort_ind]
        lengths = lengths[sort_ind]

        optimizer.zero_grad()
        # [sum_len, vocab_size]
        outputs = model(images, captions, lengths)
        # [b, max_len] -> [sum_len]
        targets = pack_padded_sequence(captions, lengths=lengths, batch_first=True, enforce_sorted=True)[0]

        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()

        running_acc += (torch.argmax(outputs, dim=1) == targets).sum().float().item() / targets.size(0)
        running_loss += loss.item()
        t.set_postfix({'loss': running_loss / (batch_idx + 1),
                       'acc': running_acc / (batch_idx + 1),
                       }, refresh=True)
        if (batch_idx + 1) % LOG_INTERVAL == 0:
            print(f'{desc} {batch_idx + 1}/{len(train_loader)} '
                  f'train_loss: {running_loss / (batch_idx + 1):.4f} '
                  f'train_acc: {running_acc / (batch_idx + 1):.4f}')

    return running_loss / len(train_loader)


def evaluate_model(data_loader, model, loss_fn, vocab_size, bleu_score_fn, tensor_to_word_fn, desc=''):
    running_bleu = [0.0] * 5
    model.eval()
    t = tqdm(iter(data_loader), desc=f'{desc}')
    for batch_idx, batch in enumerate(t):
        images, captions, lengths = batch
        outputs = tensor_to_word_fn(model.sample(images).cpu().numpy())

        for i in (1, 2, 3, 4):
            running_bleu[i] += bleu_score_fn(reference_corpus=captions, candidate_corpus=outputs, n=i)
        t.set_postfix({
            'bleu1': running_bleu[1] / (batch_idx + 1),
            'bleu4': running_bleu[4] / (batch_idx + 1),
        }, refresh=True)
    for i in (1, 2, 3, 4):
        running_bleu[i] /= len(data_loader)
    return running_bleu

In [5]:
#from models.torch.resnet50_monolstm import Captioner
from models.torch.densenet161_gru import Captioner

final_model = Captioner(EMBEDDING_DIM, HIDDEN_SIZE, vocab_size, num_layers=2,
                        embedding_matrix=embedding_matrix, train_embd=False).to(device)

loss_fn = torch.nn.CrossEntropyLoss(ignore_index=train_set.pad_value).to(device)
acc_fn = accuracy_fn(ignore_value=train_set.pad_value)
sentence_bleu_score_fn = bleu_score_fn(4, 'sentence')
corpus_bleu_score_fn = bleu_score_fn(4, 'corpus')
tensor_to_word_fn = words_from_tensors_fn(idx2word=idx2word)

params = list(final_model.decoder.parameters()) + list(final_model.encoder.embed.parameters()) + list(
    final_model.encoder.bn.parameters())

optimizer = torch.optim.Adam(params=params, lr=LR)

Downloading: "https://download.pytorch.org/models/densenet161-8d451a50.pth" to /home/albert/.cache/torch/hub/checkpoints/densenet161-8d451a50.pth
100%|██████████| 110M/110M [00:02<00:00, 49.2MB/s]


In [6]:
train_transformations = transforms.Compose([
    transforms.Resize(256),  # smaller edge of image resized to 256
    transforms.RandomCrop(224),  # get 224x224 crop from random location
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),  # convert the PIL Image to a tensor
    transforms.Normalize((0.485, 0.456, 0.406),  # normalize image for pre-trained model
                         (0.229, 0.224, 0.225))
])
eval_transformations = transforms.Compose([
    transforms.Resize(256),  # smaller edge of image resized to 256
    transforms.CenterCrop(224),  # get 224x224 crop from random location
    transforms.ToTensor(),  # convert the PIL Image to a tensor
    transforms.Normalize((0.485, 0.456, 0.406),  # normalize image for pre-trained model
                         (0.229, 0.224, 0.225))
])

train_set.transformations = train_transformations
val_set.transformations = eval_transformations
test_set.transformations = eval_transformations
train_eval_set.transformations = eval_transformations


eval_collate_fn = lambda batch: (torch.stack([x[0] for x in batch]), [x[1] for x in batch], [x[2] for x in batch])
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, sampler=None, pin_memory=False)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False, sampler=None, pin_memory=False,
                        collate_fn=eval_collate_fn)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, sampler=None, pin_memory=False,
                         collate_fn=eval_collate_fn)
train_eval_loader = DataLoader(train_eval_set, batch_size=BATCH_SIZE, shuffle=False, sampler=None, pin_memory=False,
                               collate_fn=eval_collate_fn)

In [7]:
train_loss_min = 100
val_bleu4_max = 0.0
for epoch in range(NUM_EPOCHS):
    train_loss = train_model(desc=f'Epoch {epoch + 1}/{NUM_EPOCHS}', model=final_model,
                             optimizer=optimizer, loss_fn=loss_fn, acc_fn=acc_fn,
                             train_loader=train_loader, vocab_size=vocab_size)
    with torch.no_grad():
        train_bleu = evaluate_model(desc=f'\tTrain Bleu Score: ', model=final_model,
                                    loss_fn=loss_fn, bleu_score_fn=corpus_bleu_score_fn,
                                    tensor_to_word_fn=tensor_to_word_fn,
                                    data_loader=train_eval_loader, vocab_size=vocab_size)
        val_bleu = evaluate_model(desc=f'\tValidation Bleu Score: ', model=final_model,
                                  loss_fn=loss_fn, bleu_score_fn=corpus_bleu_score_fn,
                                  tensor_to_word_fn=tensor_to_word_fn,
                                  data_loader=val_loader, vocab_size=vocab_size)
        print(f'Epoch {epoch + 1}/{NUM_EPOCHS}',
              ''.join([f'train_bleu{i}: {train_bleu[i]:.4f} ' for i in (1, 4)]),
              ''.join([f'val_bleu{i}: {val_bleu[i]:.4f} ' for i in (1, 4)]),
              )
        state = {
            'epoch': epoch + 1,
            'state_dict': final_model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'train_loss_latest': train_loss,
            'val_bleu4_latest': val_bleu[4],
            'train_loss_min': min(train_loss, train_loss_min),
            'val_bleu4_max': max(val_bleu[4], val_bleu4_max),
            'train_bleus': train_bleu,
            'val_bleus': val_bleu,
        }
        torch.save(state, f'{MODEL_NAME}_latest.pt')
        if train_loss < train_loss_min:
            train_loss_min = train_loss
            torch.save(state, f'{MODEL_NAME}''_best_train.pt')
        if val_bleu[4] > val_bleu4_max:
            val_bleu4_max = val_bleu[4]
            torch.save(state, f'{MODEL_NAME}''_best_val.pt')

torch.save(state, f'{MODEL_NAME}_ep{NUM_EPOCHS:02d}_weights.pt')
final_model.eval()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
Epoch 1/100:   1%|▏         | 26/1875 [00:04<05:29,  5.61it/s, loss=6.29, acc=0.15]

Epoch 1/100 25/1875 train_loss: 6.3171 train_acc: 0.1496


Epoch 1/100:   3%|▎         | 51/1875 [00:09<05:32,  5.49it/s, loss=5.85, acc=0.175]

Epoch 1/100 50/1875 train_loss: 5.8569 train_acc: 0.1742


Epoch 1/100:   4%|▍         | 76/1875 [00:14<05:29,  5.46it/s, loss=5.64, acc=0.188]

Epoch 1/100 75/1875 train_loss: 5.6433 train_acc: 0.1873


Epoch 1/100:   5%|▌         | 101/1875 [00:18<05:19,  5.55it/s, loss=5.51, acc=0.197]

Epoch 1/100 100/1875 train_loss: 5.5146 train_acc: 0.1969


Epoch 1/100:   7%|▋         | 126/1875 [00:23<05:20,  5.46it/s, loss=5.39, acc=0.203]

Epoch 1/100 125/1875 train_loss: 5.3960 train_acc: 0.2031


Epoch 1/100:   8%|▊         | 151/1875 [00:28<05:25,  5.30it/s, loss=5.32, acc=0.211]

Epoch 1/100 150/1875 train_loss: 5.3170 train_acc: 0.2110


Epoch 1/100:   9%|▉         | 175/1875 [00:33<05:52,  4.82it/s, loss=5.24, acc=0.22]

Epoch 1/100 175/1875 train_loss: 5.2433 train_acc: 0.2200


Epoch 1/100:  11%|█         | 201/1875 [00:37<04:53,  5.70it/s, loss=5.18, acc=0.229]

Epoch 1/100 200/1875 train_loss: 5.1827 train_acc: 0.2284


Epoch 1/100:  12%|█▏        | 226/1875 [00:42<05:10,  5.31it/s, loss=5.12, acc=0.238]

Epoch 1/100 225/1875 train_loss: 5.1251 train_acc: 0.2375


Epoch 1/100:  13%|█▎        | 251/1875 [00:47<04:55,  5.50it/s, loss=5.07, acc=0.245]

Epoch 1/100 250/1875 train_loss: 5.0692 train_acc: 0.2448


Epoch 1/100:  15%|█▍        | 276/1875 [00:51<04:45,  5.60it/s, loss=5.01, acc=0.252]

Epoch 1/100 275/1875 train_loss: 5.0152 train_acc: 0.2518


Epoch 1/100:  16%|█▌        | 301/1875 [00:56<04:38,  5.64it/s, loss=4.97, acc=0.257]

Epoch 1/100 300/1875 train_loss: 4.9754 train_acc: 0.2564


Epoch 1/100:  17%|█▋        | 326/1875 [01:00<04:37,  5.58it/s, loss=4.93, acc=0.262]

Epoch 1/100 325/1875 train_loss: 4.9323 train_acc: 0.2616


Epoch 1/100:  19%|█▊        | 351/1875 [01:05<04:29,  5.65it/s, loss=4.89, acc=0.266]

Epoch 1/100 350/1875 train_loss: 4.8918 train_acc: 0.2662


Epoch 1/100:  20%|██        | 376/1875 [01:09<04:25,  5.64it/s, loss=4.85, acc=0.271]

Epoch 1/100 375/1875 train_loss: 4.8549 train_acc: 0.2709


Epoch 1/100:  21%|██▏       | 401/1875 [01:14<04:21,  5.63it/s, loss=4.82, acc=0.275]

Epoch 1/100 400/1875 train_loss: 4.8246 train_acc: 0.2745


Epoch 1/100:  23%|██▎       | 426/1875 [01:19<04:22,  5.53it/s, loss=4.8, acc=0.278]

Epoch 1/100 425/1875 train_loss: 4.8009 train_acc: 0.2779


Epoch 1/100:  24%|██▍       | 451/1875 [01:23<04:13,  5.61it/s, loss=4.77, acc=0.281]

Epoch 1/100 450/1875 train_loss: 4.7738 train_acc: 0.2810


Epoch 1/100:  25%|██▌       | 476/1875 [01:28<04:14,  5.50it/s, loss=4.74, acc=0.285]

Epoch 1/100 475/1875 train_loss: 4.7413 train_acc: 0.2844


Epoch 1/100:  27%|██▋       | 501/1875 [01:32<04:04,  5.62it/s, loss=4.72, acc=0.287]

Epoch 1/100 500/1875 train_loss: 4.7225 train_acc: 0.2870


Epoch 1/100:  28%|██▊       | 526/1875 [01:37<04:01,  5.58it/s, loss=4.71, acc=0.288]

Epoch 1/100 525/1875 train_loss: 4.7077 train_acc: 0.2882


Epoch 1/100:  29%|██▉       | 551/1875 [01:41<03:57,  5.58it/s, loss=4.69, acc=0.29]

Epoch 1/100 550/1875 train_loss: 4.6914 train_acc: 0.2901


Epoch 1/100:  31%|███       | 576/1875 [01:46<03:52,  5.58it/s, loss=4.67, acc=0.292]

Epoch 1/100 575/1875 train_loss: 4.6737 train_acc: 0.2921


Epoch 1/100:  32%|███▏      | 601/1875 [01:51<03:48,  5.59it/s, loss=4.66, acc=0.294]

Epoch 1/100 600/1875 train_loss: 4.6552 train_acc: 0.2937


Epoch 1/100:  33%|███▎      | 626/1875 [01:55<03:41,  5.63it/s, loss=4.64, acc=0.295]

Epoch 1/100 625/1875 train_loss: 4.6409 train_acc: 0.2950


Epoch 1/100:  35%|███▍      | 651/1875 [02:00<03:42,  5.49it/s, loss=4.62, acc=0.297]

Epoch 1/100 650/1875 train_loss: 4.6238 train_acc: 0.2966


Epoch 1/100:  36%|███▌      | 676/1875 [02:04<03:35,  5.56it/s, loss=4.61, acc=0.298]

Epoch 1/100 675/1875 train_loss: 4.6111 train_acc: 0.2981


Epoch 1/100:  37%|███▋      | 701/1875 [02:09<03:32,  5.51it/s, loss=4.59, acc=0.3]

Epoch 1/100 700/1875 train_loss: 4.5953 train_acc: 0.3000


Epoch 1/100:  39%|███▊      | 726/1875 [02:14<03:28,  5.52it/s, loss=4.58, acc=0.302]

Epoch 1/100 725/1875 train_loss: 4.5828 train_acc: 0.3016


Epoch 1/100:  40%|████      | 751/1875 [02:18<03:20,  5.59it/s, loss=4.57, acc=0.303]

Epoch 1/100 750/1875 train_loss: 4.5714 train_acc: 0.3024


Epoch 1/100:  41%|████▏     | 776/1875 [02:23<03:15,  5.62it/s, loss=4.56, acc=0.304]

Epoch 1/100 775/1875 train_loss: 4.5593 train_acc: 0.3037


Epoch 1/100:  43%|████▎     | 801/1875 [02:27<03:10,  5.63it/s, loss=4.55, acc=0.305]

Epoch 1/100 800/1875 train_loss: 4.5492 train_acc: 0.3048


Epoch 1/100:  44%|████▍     | 826/1875 [02:32<03:07,  5.58it/s, loss=4.54, acc=0.306]

Epoch 1/100 825/1875 train_loss: 4.5401 train_acc: 0.3057


Epoch 1/100:  45%|████▌     | 851/1875 [02:36<02:59,  5.69it/s, loss=4.53, acc=0.307]

Epoch 1/100 850/1875 train_loss: 4.5304 train_acc: 0.3067


Epoch 1/100:  47%|████▋     | 876/1875 [02:41<03:01,  5.51it/s, loss=4.52, acc=0.308]

Epoch 1/100 875/1875 train_loss: 4.5198 train_acc: 0.3077


Epoch 1/100:  48%|████▊     | 901/1875 [02:45<02:52,  5.63it/s, loss=4.51, acc=0.309]

Epoch 1/100 900/1875 train_loss: 4.5101 train_acc: 0.3089


Epoch 1/100:  49%|████▉     | 926/1875 [02:50<02:48,  5.63it/s, loss=4.5, acc=0.31]

Epoch 1/100 925/1875 train_loss: 4.5017 train_acc: 0.3098


Epoch 1/100:  51%|█████     | 951/1875 [02:55<02:47,  5.53it/s, loss=4.49, acc=0.311]

Epoch 1/100 950/1875 train_loss: 4.4912 train_acc: 0.3108


Epoch 1/100:  52%|█████▏    | 976/1875 [02:59<02:42,  5.55it/s, loss=4.48, acc=0.312]

Epoch 1/100 975/1875 train_loss: 4.4820 train_acc: 0.3116


Epoch 1/100:  53%|█████▎    | 1001/1875 [03:04<02:36,  5.60it/s, loss=4.47, acc=0.312]

Epoch 1/100 1000/1875 train_loss: 4.4742 train_acc: 0.3124


Epoch 1/100:  55%|█████▍    | 1026/1875 [03:08<02:31,  5.60it/s, loss=4.46, acc=0.313]

Epoch 1/100 1025/1875 train_loss: 4.4636 train_acc: 0.3133


Epoch 1/100:  56%|█████▌    | 1051/1875 [03:13<02:29,  5.52it/s, loss=4.45, acc=0.314]

Epoch 1/100 1050/1875 train_loss: 4.4547 train_acc: 0.3141


Epoch 1/100:  57%|█████▋    | 1076/1875 [03:18<02:24,  5.54it/s, loss=4.44, acc=0.315]

Epoch 1/100 1075/1875 train_loss: 4.4454 train_acc: 0.3150


Epoch 1/100:  59%|█████▊    | 1101/1875 [03:22<02:19,  5.56it/s, loss=4.44, acc=0.316]

Epoch 1/100 1100/1875 train_loss: 4.4360 train_acc: 0.3157


Epoch 1/100:  60%|██████    | 1126/1875 [03:27<02:12,  5.65it/s, loss=4.43, acc=0.317]

Epoch 1/100 1125/1875 train_loss: 4.4260 train_acc: 0.3167


Epoch 1/100:  61%|██████▏   | 1151/1875 [03:31<02:09,  5.60it/s, loss=4.42, acc=0.318]

Epoch 1/100 1150/1875 train_loss: 4.4163 train_acc: 0.3176


Epoch 1/100:  63%|██████▎   | 1176/1875 [03:36<02:04,  5.61it/s, loss=4.41, acc=0.318]

Epoch 1/100 1175/1875 train_loss: 4.4102 train_acc: 0.3184


Epoch 1/100:  64%|██████▍   | 1201/1875 [03:40<02:07,  5.30it/s, loss=4.4, acc=0.319]

Epoch 1/100 1200/1875 train_loss: 4.4008 train_acc: 0.3192


Epoch 1/100:  65%|██████▌   | 1226/1875 [03:45<01:55,  5.61it/s, loss=4.39, acc=0.32]

Epoch 1/100 1225/1875 train_loss: 4.3909 train_acc: 0.3200


Epoch 1/100:  67%|██████▋   | 1251/1875 [03:50<01:52,  5.55it/s, loss=4.38, acc=0.32]

Epoch 1/100 1250/1875 train_loss: 4.3839 train_acc: 0.3205


Epoch 1/100:  68%|██████▊   | 1276/1875 [03:54<01:48,  5.52it/s, loss=4.38, acc=0.321]

Epoch 1/100 1275/1875 train_loss: 4.3769 train_acc: 0.3210


Epoch 1/100:  69%|██████▉   | 1301/1875 [03:59<01:42,  5.58it/s, loss=4.37, acc=0.321]

Epoch 1/100 1300/1875 train_loss: 4.3713 train_acc: 0.3214


Epoch 1/100:  71%|███████   | 1326/1875 [04:03<01:38,  5.57it/s, loss=4.36, acc=0.322]

Epoch 1/100 1325/1875 train_loss: 4.3635 train_acc: 0.3221


Epoch 1/100:  72%|███████▏  | 1351/1875 [04:08<01:36,  5.45it/s, loss=4.36, acc=0.323]

Epoch 1/100 1350/1875 train_loss: 4.3564 train_acc: 0.3227


Epoch 1/100:  73%|███████▎  | 1376/1875 [04:12<01:29,  5.58it/s, loss=4.35, acc=0.323]

Epoch 1/100 1375/1875 train_loss: 4.3500 train_acc: 0.3234


Epoch 1/100:  75%|███████▍  | 1401/1875 [04:17<01:24,  5.58it/s, loss=4.34, acc=0.324]

Epoch 1/100 1400/1875 train_loss: 4.3442 train_acc: 0.3237


Epoch 1/100:  76%|███████▌  | 1426/1875 [04:21<01:21,  5.49it/s, loss=4.34, acc=0.324]

Epoch 1/100 1425/1875 train_loss: 4.3363 train_acc: 0.3243


Epoch 1/100:  77%|███████▋  | 1451/1875 [04:26<01:16,  5.56it/s, loss=4.33, acc=0.325]

Epoch 1/100 1450/1875 train_loss: 4.3290 train_acc: 0.3250


Epoch 1/100:  79%|███████▊  | 1476/1875 [04:30<01:10,  5.63it/s, loss=4.32, acc=0.325]

Epoch 1/100 1475/1875 train_loss: 4.3243 train_acc: 0.3254


Epoch 1/100:  80%|████████  | 1501/1875 [04:35<01:08,  5.49it/s, loss=4.32, acc=0.326]

Epoch 1/100 1500/1875 train_loss: 4.3184 train_acc: 0.3259


Epoch 1/100:  81%|████████▏ | 1526/1875 [04:40<01:08,  5.13it/s, loss=4.31, acc=0.326]

Epoch 1/100 1525/1875 train_loss: 4.3127 train_acc: 0.3263


Epoch 1/100:  83%|████████▎ | 1551/1875 [04:44<00:58,  5.52it/s, loss=4.31, acc=0.327]

Epoch 1/100 1550/1875 train_loss: 4.3063 train_acc: 0.3268


Epoch 1/100:  84%|████████▍ | 1576/1875 [04:49<00:53,  5.57it/s, loss=4.3, acc=0.327]

Epoch 1/100 1575/1875 train_loss: 4.2989 train_acc: 0.3273


Epoch 1/100:  85%|████████▌ | 1601/1875 [04:53<00:50,  5.47it/s, loss=4.29, acc=0.328]

Epoch 1/100 1600/1875 train_loss: 4.2928 train_acc: 0.3278


Epoch 1/100:  87%|████████▋ | 1626/1875 [04:58<00:44,  5.57it/s, loss=4.29, acc=0.328]

Epoch 1/100 1625/1875 train_loss: 4.2883 train_acc: 0.3282


Epoch 1/100:  88%|████████▊ | 1651/1875 [05:03<00:39,  5.66it/s, loss=4.28, acc=0.329]

Epoch 1/100 1650/1875 train_loss: 4.2832 train_acc: 0.3287


Epoch 1/100:  89%|████████▉ | 1676/1875 [05:07<00:36,  5.49it/s, loss=4.28, acc=0.329]

Epoch 1/100 1675/1875 train_loss: 4.2781 train_acc: 0.3292


Epoch 1/100:  91%|█████████ | 1701/1875 [05:12<00:31,  5.59it/s, loss=4.27, acc=0.33]

Epoch 1/100 1700/1875 train_loss: 4.2714 train_acc: 0.3298


Epoch 1/100:  92%|█████████▏| 1726/1875 [05:16<00:26,  5.52it/s, loss=4.27, acc=0.33]

Epoch 1/100 1725/1875 train_loss: 4.2669 train_acc: 0.3301


Epoch 1/100:  93%|█████████▎| 1751/1875 [05:21<00:22,  5.50it/s, loss=4.26, acc=0.33]

Epoch 1/100 1750/1875 train_loss: 4.2637 train_acc: 0.3303


Epoch 1/100:  95%|█████████▍| 1776/1875 [05:25<00:17,  5.55it/s, loss=4.26, acc=0.331]

Epoch 1/100 1775/1875 train_loss: 4.2588 train_acc: 0.3306


Epoch 1/100:  96%|█████████▌| 1801/1875 [05:30<00:13,  5.50it/s, loss=4.25, acc=0.331]

Epoch 1/100 1800/1875 train_loss: 4.2541 train_acc: 0.3310


Epoch 1/100:  97%|█████████▋| 1826/1875 [05:34<00:08,  5.54it/s, loss=4.25, acc=0.331]

Epoch 1/100 1825/1875 train_loss: 4.2484 train_acc: 0.3314


Epoch 1/100:  99%|█████████▊| 1850/1875 [05:39<00:04,  5.67it/s, loss=4.24, acc=0.332]

Epoch 1/100 1850/1875 train_loss: 4.2439 train_acc: 0.3318


Epoch 1/100: 100%|██████████| 1875/1875 [05:43<00:00,  5.46it/s, loss=4.24, acc=0.332]


Epoch 1/100 1875/1875 train_loss: 4.2406 train_acc: 0.3320


	Train Bleu Score: 100%|██████████| 375/375 [01:08<00:00,  5.51it/s, bleu1=0.174, bleu4=0.0281]
	Validation Bleu Score: 100%|██████████| 63/63 [00:11<00:00,  5.53it/s, bleu1=0.173, bleu4=0.029]


Epoch 1/100 train_bleu1: 0.1742 train_bleu4: 0.0281  val_bleu1: 0.1734 val_bleu4: 0.0290 


Epoch 2/100:   1%|▏         | 26/1875 [00:04<05:34,  5.54it/s, loss=3.78, acc=0.367]

Epoch 2/100 25/1875 train_loss: 3.7761 train_acc: 0.3669


Epoch 2/100:   3%|▎         | 51/1875 [00:09<05:32,  5.49it/s, loss=3.8, acc=0.361]

Epoch 2/100 50/1875 train_loss: 3.8019 train_acc: 0.3611


Epoch 2/100:   4%|▍         | 76/1875 [00:13<05:21,  5.59it/s, loss=3.78, acc=0.362]

Epoch 2/100 75/1875 train_loss: 3.7880 train_acc: 0.3610


Epoch 2/100:   5%|▌         | 101/1875 [00:18<05:18,  5.56it/s, loss=3.78, acc=0.364]

Epoch 2/100 100/1875 train_loss: 3.7776 train_acc: 0.3637


Epoch 2/100:   7%|▋         | 126/1875 [00:22<05:14,  5.56it/s, loss=3.78, acc=0.363]

Epoch 2/100 125/1875 train_loss: 3.7807 train_acc: 0.3630


Epoch 2/100:   8%|▊         | 151/1875 [00:27<05:10,  5.56it/s, loss=3.77, acc=0.364]

Epoch 2/100 150/1875 train_loss: 3.7749 train_acc: 0.3637


Epoch 2/100:   9%|▉         | 176/1875 [00:31<05:07,  5.52it/s, loss=3.77, acc=0.363]

Epoch 2/100 175/1875 train_loss: 3.7711 train_acc: 0.3632


Epoch 2/100:  11%|█         | 201/1875 [00:36<05:02,  5.53it/s, loss=3.79, acc=0.362]

Epoch 2/100 200/1875 train_loss: 3.7868 train_acc: 0.3619


Epoch 2/100:  12%|█▏        | 226/1875 [00:40<04:57,  5.54it/s, loss=3.78, acc=0.361]

Epoch 2/100 225/1875 train_loss: 3.7837 train_acc: 0.3611


Epoch 2/100:  13%|█▎        | 251/1875 [00:45<05:05,  5.32it/s, loss=3.78, acc=0.36]

Epoch 2/100 250/1875 train_loss: 3.7857 train_acc: 0.3604


Epoch 2/100:  15%|█▍        | 276/1875 [00:49<04:45,  5.61it/s, loss=3.78, acc=0.361]

Epoch 2/100 275/1875 train_loss: 3.7803 train_acc: 0.3610


Epoch 2/100:  16%|█▌        | 301/1875 [00:54<04:46,  5.49it/s, loss=3.78, acc=0.362]

Epoch 2/100 300/1875 train_loss: 3.7777 train_acc: 0.3618


Epoch 2/100:  17%|█▋        | 326/1875 [00:59<04:41,  5.51it/s, loss=3.78, acc=0.362]

Epoch 2/100 325/1875 train_loss: 3.7785 train_acc: 0.3614


Epoch 2/100:  19%|█▊        | 351/1875 [01:03<04:32,  5.60it/s, loss=3.77, acc=0.361]

Epoch 2/100 350/1875 train_loss: 3.7712 train_acc: 0.3615


Epoch 2/100:  20%|██        | 376/1875 [01:08<04:29,  5.56it/s, loss=3.77, acc=0.362]

Epoch 2/100 375/1875 train_loss: 3.7698 train_acc: 0.3616


Epoch 2/100:  21%|██▏       | 401/1875 [01:12<04:24,  5.58it/s, loss=3.77, acc=0.361]

Epoch 2/100 400/1875 train_loss: 3.7701 train_acc: 0.3610


Epoch 2/100:  23%|██▎       | 426/1875 [01:17<04:22,  5.53it/s, loss=3.77, acc=0.361]

Epoch 2/100 425/1875 train_loss: 3.7700 train_acc: 0.3612


Epoch 2/100:  24%|██▍       | 451/1875 [01:21<04:13,  5.62it/s, loss=3.77, acc=0.361]

Epoch 2/100 450/1875 train_loss: 3.7714 train_acc: 0.3608


Epoch 2/100:  25%|██▌       | 476/1875 [01:26<04:11,  5.57it/s, loss=3.77, acc=0.361]

Epoch 2/100 475/1875 train_loss: 3.7732 train_acc: 0.3606


Epoch 2/100:  27%|██▋       | 501/1875 [01:30<04:10,  5.48it/s, loss=3.78, acc=0.361]

Epoch 2/100 500/1875 train_loss: 3.7756 train_acc: 0.3607


Epoch 2/100:  28%|██▊       | 526/1875 [01:35<04:02,  5.56it/s, loss=3.77, acc=0.361]

Epoch 2/100 525/1875 train_loss: 3.7716 train_acc: 0.3611


Epoch 2/100:  29%|██▉       | 551/1875 [01:39<04:12,  5.24it/s, loss=3.78, acc=0.361]

Epoch 2/100 550/1875 train_loss: 3.7746 train_acc: 0.3607


Epoch 2/100:  31%|███       | 576/1875 [01:44<04:06,  5.26it/s, loss=3.77, acc=0.361]

Epoch 2/100 575/1875 train_loss: 3.7678 train_acc: 0.3612


Epoch 2/100:  32%|███▏      | 601/1875 [01:49<03:50,  5.53it/s, loss=3.77, acc=0.362]

Epoch 2/100 600/1875 train_loss: 3.7648 train_acc: 0.3616


Epoch 2/100:  33%|███▎      | 626/1875 [01:53<03:43,  5.58it/s, loss=3.76, acc=0.362]

Epoch 2/100 625/1875 train_loss: 3.7608 train_acc: 0.3617


Epoch 2/100:  35%|███▍      | 651/1875 [01:58<03:40,  5.55it/s, loss=3.76, acc=0.362]

Epoch 2/100 650/1875 train_loss: 3.7597 train_acc: 0.3617


Epoch 2/100:  36%|███▌      | 676/1875 [02:03<03:48,  5.24it/s, loss=3.76, acc=0.362]

Epoch 2/100 675/1875 train_loss: 3.7575 train_acc: 0.3619


Epoch 2/100:  37%|███▋      | 701/1875 [02:07<03:32,  5.53it/s, loss=3.76, acc=0.362]

Epoch 2/100 700/1875 train_loss: 3.7598 train_acc: 0.3616


Epoch 2/100:  39%|███▊      | 725/1875 [02:12<03:39,  5.24it/s, loss=3.76, acc=0.362]

Epoch 2/100 725/1875 train_loss: 3.7589 train_acc: 0.3619


Epoch 2/100:  40%|████      | 751/1875 [02:17<03:37,  5.16it/s, loss=3.76, acc=0.362]

Epoch 2/100 750/1875 train_loss: 3.7581 train_acc: 0.3620


Epoch 2/100:  41%|████▏     | 776/1875 [02:21<03:22,  5.43it/s, loss=3.76, acc=0.362]

Epoch 2/100 775/1875 train_loss: 3.7562 train_acc: 0.3621


Epoch 2/100:  43%|████▎     | 800/1875 [02:26<03:40,  4.87it/s, loss=3.75, acc=0.362]

Epoch 2/100 800/1875 train_loss: 3.7511 train_acc: 0.3624


Epoch 2/100:  44%|████▍     | 826/1875 [02:31<03:21,  5.22it/s, loss=3.75, acc=0.362]

Epoch 2/100 825/1875 train_loss: 3.7534 train_acc: 0.3621


Epoch 2/100:  45%|████▌     | 851/1875 [02:36<03:12,  5.32it/s, loss=3.75, acc=0.362]

Epoch 2/100 850/1875 train_loss: 3.7532 train_acc: 0.3618


Epoch 2/100:  47%|████▋     | 876/1875 [02:41<03:18,  5.03it/s, loss=3.75, acc=0.362]

Epoch 2/100 875/1875 train_loss: 3.7483 train_acc: 0.3623


Epoch 2/100:  48%|████▊     | 901/1875 [02:46<03:02,  5.34it/s, loss=3.75, acc=0.362]

Epoch 2/100 900/1875 train_loss: 3.7490 train_acc: 0.3624


Epoch 2/100:  49%|████▉     | 925/1875 [02:50<03:05,  5.13it/s, loss=3.75, acc=0.363]

Epoch 2/100 925/1875 train_loss: 3.7470 train_acc: 0.3626


Epoch 2/100:  51%|█████     | 951/1875 [02:55<02:51,  5.38it/s, loss=3.75, acc=0.363]

Epoch 2/100 950/1875 train_loss: 3.7489 train_acc: 0.3626


Epoch 2/100:  52%|█████▏    | 976/1875 [03:00<02:50,  5.27it/s, loss=3.75, acc=0.363]

Epoch 2/100 975/1875 train_loss: 3.7489 train_acc: 0.3626


Epoch 2/100:  53%|█████▎    | 1000/1875 [03:05<02:54,  5.02it/s, loss=3.75, acc=0.363]

Epoch 2/100 1000/1875 train_loss: 3.7480 train_acc: 0.3628


Epoch 2/100:  55%|█████▍    | 1026/1875 [03:10<02:46,  5.11it/s, loss=3.75, acc=0.363]

Epoch 2/100 1025/1875 train_loss: 3.7461 train_acc: 0.3632


Epoch 2/100:  56%|█████▌    | 1051/1875 [03:15<02:30,  5.48it/s, loss=3.75, acc=0.363]

Epoch 2/100 1050/1875 train_loss: 3.7456 train_acc: 0.3633


Epoch 2/100:  57%|█████▋    | 1076/1875 [03:19<02:23,  5.55it/s, loss=3.75, acc=0.363]

Epoch 2/100 1075/1875 train_loss: 3.7459 train_acc: 0.3631


Epoch 2/100:  59%|█████▊    | 1101/1875 [03:24<02:20,  5.53it/s, loss=3.75, acc=0.363]

Epoch 2/100 1100/1875 train_loss: 3.7454 train_acc: 0.3632


Epoch 2/100:  60%|██████    | 1126/1875 [03:29<02:16,  5.49it/s, loss=3.74, acc=0.364]

Epoch 2/100 1125/1875 train_loss: 3.7444 train_acc: 0.3635


Epoch 2/100:  61%|██████▏   | 1151/1875 [03:33<02:12,  5.48it/s, loss=3.74, acc=0.364]

Epoch 2/100 1150/1875 train_loss: 3.7431 train_acc: 0.3638


Epoch 2/100:  63%|██████▎   | 1176/1875 [03:38<02:09,  5.38it/s, loss=3.74, acc=0.364]

Epoch 2/100 1175/1875 train_loss: 3.7395 train_acc: 0.3641


Epoch 2/100:  64%|██████▍   | 1201/1875 [03:42<02:01,  5.54it/s, loss=3.74, acc=0.364]

Epoch 2/100 1200/1875 train_loss: 3.7386 train_acc: 0.3641


Epoch 2/100:  65%|██████▌   | 1226/1875 [03:47<01:58,  5.48it/s, loss=3.74, acc=0.364]

Epoch 2/100 1225/1875 train_loss: 3.7375 train_acc: 0.3641


Epoch 2/100:  67%|██████▋   | 1251/1875 [03:51<01:52,  5.56it/s, loss=3.74, acc=0.364]

Epoch 2/100 1250/1875 train_loss: 3.7361 train_acc: 0.3642


Epoch 2/100:  68%|██████▊   | 1276/1875 [03:56<01:48,  5.55it/s, loss=3.73, acc=0.364]

Epoch 2/100 1275/1875 train_loss: 3.7339 train_acc: 0.3641


Epoch 2/100:  69%|██████▉   | 1301/1875 [04:00<01:44,  5.50it/s, loss=3.73, acc=0.364]

Epoch 2/100 1300/1875 train_loss: 3.7334 train_acc: 0.3643


Epoch 2/100:  71%|███████   | 1326/1875 [04:05<01:42,  5.36it/s, loss=3.74, acc=0.364]

Epoch 2/100 1325/1875 train_loss: 3.7356 train_acc: 0.3642


Epoch 2/100:  72%|███████▏  | 1351/1875 [04:09<01:34,  5.56it/s, loss=3.73, acc=0.364]

Epoch 2/100 1350/1875 train_loss: 3.7335 train_acc: 0.3643


Epoch 2/100:  73%|███████▎  | 1376/1875 [04:14<01:29,  5.59it/s, loss=3.73, acc=0.364]

Epoch 2/100 1375/1875 train_loss: 3.7323 train_acc: 0.3644


Epoch 2/100:  75%|███████▍  | 1401/1875 [04:18<01:25,  5.53it/s, loss=3.73, acc=0.364]

Epoch 2/100 1400/1875 train_loss: 3.7330 train_acc: 0.3643


Epoch 2/100:  76%|███████▌  | 1426/1875 [04:23<01:20,  5.55it/s, loss=3.73, acc=0.364]

Epoch 2/100 1425/1875 train_loss: 3.7330 train_acc: 0.3643


Epoch 2/100:  77%|███████▋  | 1451/1875 [04:27<01:15,  5.59it/s, loss=3.73, acc=0.364]

Epoch 2/100 1450/1875 train_loss: 3.7319 train_acc: 0.3645


Epoch 2/100:  79%|███████▊  | 1476/1875 [04:32<01:11,  5.60it/s, loss=3.73, acc=0.365]

Epoch 2/100 1475/1875 train_loss: 3.7304 train_acc: 0.3648


Epoch 2/100:  80%|████████  | 1501/1875 [04:36<01:07,  5.51it/s, loss=3.73, acc=0.365]

Epoch 2/100 1500/1875 train_loss: 3.7293 train_acc: 0.3646


Epoch 2/100:  81%|████████▏ | 1526/1875 [04:41<01:02,  5.58it/s, loss=3.73, acc=0.365]

Epoch 2/100 1525/1875 train_loss: 3.7281 train_acc: 0.3647


Epoch 2/100:  83%|████████▎ | 1551/1875 [04:46<00:58,  5.54it/s, loss=3.73, acc=0.365]

Epoch 2/100 1550/1875 train_loss: 3.7274 train_acc: 0.3647


Epoch 2/100:  84%|████████▍ | 1576/1875 [04:50<00:55,  5.36it/s, loss=3.73, acc=0.365]

Epoch 2/100 1575/1875 train_loss: 3.7261 train_acc: 0.3648


Epoch 2/100:  85%|████████▌ | 1601/1875 [04:55<00:49,  5.56it/s, loss=3.72, acc=0.365]

Epoch 2/100 1600/1875 train_loss: 3.7227 train_acc: 0.3649


Epoch 2/100:  87%|████████▋ | 1626/1875 [04:59<00:44,  5.54it/s, loss=3.72, acc=0.365]

Epoch 2/100 1625/1875 train_loss: 3.7206 train_acc: 0.3651


Epoch 2/100:  88%|████████▊ | 1651/1875 [05:04<00:40,  5.48it/s, loss=3.72, acc=0.365]

Epoch 2/100 1650/1875 train_loss: 3.7197 train_acc: 0.3651


Epoch 2/100:  89%|████████▉ | 1676/1875 [05:08<00:35,  5.58it/s, loss=3.72, acc=0.365]

Epoch 2/100 1675/1875 train_loss: 3.7173 train_acc: 0.3651


Epoch 2/100:  91%|█████████ | 1701/1875 [05:13<00:31,  5.52it/s, loss=3.72, acc=0.365]

Epoch 2/100 1700/1875 train_loss: 3.7164 train_acc: 0.3652


Epoch 2/100:  92%|█████████▏| 1726/1875 [05:17<00:26,  5.58it/s, loss=3.72, acc=0.365]

Epoch 2/100 1725/1875 train_loss: 3.7160 train_acc: 0.3653


Epoch 2/100:  93%|█████████▎| 1751/1875 [05:22<00:22,  5.52it/s, loss=3.71, acc=0.366]

Epoch 2/100 1750/1875 train_loss: 3.7140 train_acc: 0.3655


Epoch 2/100:  95%|█████████▍| 1776/1875 [05:26<00:17,  5.56it/s, loss=3.71, acc=0.365]

Epoch 2/100 1775/1875 train_loss: 3.7134 train_acc: 0.3655


Epoch 2/100:  96%|█████████▌| 1801/1875 [05:31<00:13,  5.60it/s, loss=3.71, acc=0.366]

Epoch 2/100 1800/1875 train_loss: 3.7108 train_acc: 0.3657


Epoch 2/100:  97%|█████████▋| 1826/1875 [05:35<00:08,  5.57it/s, loss=3.71, acc=0.366]

Epoch 2/100 1825/1875 train_loss: 3.7086 train_acc: 0.3659


Epoch 2/100:  99%|█████████▊| 1851/1875 [05:40<00:04,  5.21it/s, loss=3.71, acc=0.366]

Epoch 2/100 1850/1875 train_loss: 3.7067 train_acc: 0.3660


Epoch 2/100: 100%|██████████| 1875/1875 [05:45<00:00,  5.43it/s, loss=3.71, acc=0.366]


Epoch 2/100 1875/1875 train_loss: 3.7061 train_acc: 0.3659


	Train Bleu Score: 100%|██████████| 375/375 [01:08<00:00,  5.50it/s, bleu1=0.406, bleu4=0.0531]
	Validation Bleu Score: 100%|██████████| 63/63 [00:12<00:00,  5.15it/s, bleu1=0.411, bleu4=0.0546]


Epoch 2/100 train_bleu1: 0.4055 train_bleu4: 0.0531  val_bleu1: 0.4111 val_bleu4: 0.0546 


Epoch 3/100:   1%|▏         | 25/1875 [00:05<06:17,  4.90it/s, loss=3.46, acc=0.379]

Epoch 3/100 25/1875 train_loss: 3.4607 train_acc: 0.3785


Epoch 3/100:   3%|▎         | 51/1875 [00:10<05:59,  5.07it/s, loss=3.46, acc=0.374]

Epoch 3/100 50/1875 train_loss: 3.4656 train_acc: 0.3737


Epoch 3/100:   4%|▍         | 76/1875 [00:15<05:51,  5.11it/s, loss=3.47, acc=0.372]

Epoch 3/100 75/1875 train_loss: 3.4753 train_acc: 0.3716


Epoch 3/100:   5%|▌         | 101/1875 [00:20<05:48,  5.09it/s, loss=3.47, acc=0.373]

Epoch 3/100 100/1875 train_loss: 3.4691 train_acc: 0.3737


Epoch 3/100:   7%|▋         | 126/1875 [00:25<05:45,  5.07it/s, loss=3.47, acc=0.373]

Epoch 3/100 125/1875 train_loss: 3.4697 train_acc: 0.3733


Epoch 3/100:   8%|▊         | 151/1875 [00:30<05:38,  5.09it/s, loss=3.48, acc=0.373]

Epoch 3/100 150/1875 train_loss: 3.4780 train_acc: 0.3731


Epoch 3/100:   9%|▉         | 176/1875 [00:34<05:30,  5.14it/s, loss=3.49, acc=0.373]

Epoch 3/100 175/1875 train_loss: 3.4901 train_acc: 0.3730


Epoch 3/100:  11%|█         | 200/1875 [00:39<05:25,  5.15it/s, loss=3.5, acc=0.374]

Epoch 3/100 200/1875 train_loss: 3.5046 train_acc: 0.3737


Epoch 3/100:  12%|█▏        | 226/1875 [00:44<05:12,  5.28it/s, loss=3.5, acc=0.374]

Epoch 3/100 225/1875 train_loss: 3.5000 train_acc: 0.3735


Epoch 3/100:  13%|█▎        | 251/1875 [00:49<05:19,  5.09it/s, loss=3.5, acc=0.374]

Epoch 3/100 250/1875 train_loss: 3.4982 train_acc: 0.3743


Epoch 3/100:  15%|█▍        | 276/1875 [00:54<05:10,  5.15it/s, loss=3.51, acc=0.374]

Epoch 3/100 275/1875 train_loss: 3.5079 train_acc: 0.3737


Epoch 3/100:  16%|█▌        | 301/1875 [00:59<05:15,  4.99it/s, loss=3.51, acc=0.373]

Epoch 3/100 300/1875 train_loss: 3.5137 train_acc: 0.3732


Epoch 3/100:  17%|█▋        | 325/1875 [01:04<05:11,  4.97it/s, loss=3.51, acc=0.373]

Epoch 3/100 325/1875 train_loss: 3.5150 train_acc: 0.3733


Epoch 3/100:  19%|█▊        | 350/1875 [01:09<04:56,  5.14it/s, loss=3.52, acc=0.372]

Epoch 3/100 350/1875 train_loss: 3.5225 train_acc: 0.3724


Epoch 3/100:  20%|██        | 375/1875 [01:14<04:55,  5.08it/s, loss=3.52, acc=0.373]

Epoch 3/100 375/1875 train_loss: 3.5214 train_acc: 0.3729


Epoch 3/100:  21%|██▏       | 401/1875 [01:19<04:40,  5.25it/s, loss=3.52, acc=0.373]

Epoch 3/100 400/1875 train_loss: 3.5215 train_acc: 0.3727


Epoch 3/100:  23%|██▎       | 425/1875 [01:23<04:39,  5.19it/s, loss=3.52, acc=0.372]

Epoch 3/100 425/1875 train_loss: 3.5244 train_acc: 0.3723


Epoch 3/100:  24%|██▍       | 451/1875 [01:28<04:40,  5.08it/s, loss=3.53, acc=0.372]

Epoch 3/100 450/1875 train_loss: 3.5252 train_acc: 0.3722


Epoch 3/100:  25%|██▌       | 476/1875 [01:34<04:45,  4.90it/s, loss=3.53, acc=0.372]

Epoch 3/100 475/1875 train_loss: 3.5278 train_acc: 0.3724


Epoch 3/100:  27%|██▋       | 501/1875 [01:38<04:31,  5.06it/s, loss=3.53, acc=0.372]

Epoch 3/100 500/1875 train_loss: 3.5305 train_acc: 0.3720


Epoch 3/100:  28%|██▊       | 525/1875 [01:43<04:26,  5.07it/s, loss=3.53, acc=0.372]

Epoch 3/100 525/1875 train_loss: 3.5329 train_acc: 0.3720


Epoch 3/100:  29%|██▉       | 551/1875 [01:48<04:05,  5.39it/s, loss=3.53, acc=0.372]

Epoch 3/100 550/1875 train_loss: 3.5328 train_acc: 0.3720


Epoch 3/100:  31%|███       | 576/1875 [01:53<03:53,  5.55it/s, loss=3.53, acc=0.372]

Epoch 3/100 575/1875 train_loss: 3.5312 train_acc: 0.3723


Epoch 3/100:  32%|███▏      | 601/1875 [01:57<03:53,  5.46it/s, loss=3.53, acc=0.373]

Epoch 3/100 600/1875 train_loss: 3.5328 train_acc: 0.3725


Epoch 3/100:  33%|███▎      | 626/1875 [02:02<03:46,  5.51it/s, loss=3.54, acc=0.372]

Epoch 3/100 625/1875 train_loss: 3.5354 train_acc: 0.3722


Epoch 3/100:  35%|███▍      | 651/1875 [02:07<03:48,  5.36it/s, loss=3.54, acc=0.372]

Epoch 3/100 650/1875 train_loss: 3.5352 train_acc: 0.3724


Epoch 3/100:  36%|███▌      | 675/1875 [02:11<03:53,  5.14it/s, loss=3.54, acc=0.372]

Epoch 3/100 675/1875 train_loss: 3.5381 train_acc: 0.3723


Epoch 3/100:  37%|███▋      | 701/1875 [02:16<03:30,  5.58it/s, loss=3.54, acc=0.373]

Epoch 3/100 700/1875 train_loss: 3.5372 train_acc: 0.3726


Epoch 3/100:  39%|███▊      | 726/1875 [02:21<03:28,  5.50it/s, loss=3.54, acc=0.373]

Epoch 3/100 725/1875 train_loss: 3.5377 train_acc: 0.3729


Epoch 3/100:  40%|████      | 751/1875 [02:25<03:21,  5.57it/s, loss=3.54, acc=0.373]

Epoch 3/100 750/1875 train_loss: 3.5411 train_acc: 0.3728


Epoch 3/100:  41%|████▏     | 776/1875 [02:30<03:17,  5.55it/s, loss=3.55, acc=0.373]

Epoch 3/100 775/1875 train_loss: 3.5453 train_acc: 0.3726


Epoch 3/100:  43%|████▎     | 801/1875 [02:34<03:14,  5.54it/s, loss=3.54, acc=0.373]

Epoch 3/100 800/1875 train_loss: 3.5434 train_acc: 0.3727


Epoch 3/100:  44%|████▍     | 826/1875 [02:39<03:12,  5.44it/s, loss=3.54, acc=0.373]

Epoch 3/100 825/1875 train_loss: 3.5416 train_acc: 0.3729


Epoch 3/100:  45%|████▌     | 851/1875 [02:44<03:04,  5.55it/s, loss=3.54, acc=0.373]

Epoch 3/100 850/1875 train_loss: 3.5415 train_acc: 0.3729


Epoch 3/100:  47%|████▋     | 876/1875 [02:48<03:01,  5.50it/s, loss=3.55, acc=0.372]

Epoch 3/100 875/1875 train_loss: 3.5464 train_acc: 0.3725


Epoch 3/100:  48%|████▊     | 901/1875 [02:53<02:58,  5.46it/s, loss=3.55, acc=0.372]

Epoch 3/100 900/1875 train_loss: 3.5476 train_acc: 0.3724


Epoch 3/100:  49%|████▉     | 926/1875 [02:58<02:50,  5.56it/s, loss=3.55, acc=0.373]

Epoch 3/100 925/1875 train_loss: 3.5495 train_acc: 0.3726


Epoch 3/100:  51%|█████     | 951/1875 [03:02<02:47,  5.53it/s, loss=3.55, acc=0.373]

Epoch 3/100 950/1875 train_loss: 3.5506 train_acc: 0.3727


Epoch 3/100:  52%|█████▏    | 976/1875 [03:07<02:41,  5.57it/s, loss=3.55, acc=0.373]

Epoch 3/100 975/1875 train_loss: 3.5521 train_acc: 0.3725


Epoch 3/100:  53%|█████▎    | 1001/1875 [03:12<02:46,  5.26it/s, loss=3.56, acc=0.372]

Epoch 3/100 1000/1875 train_loss: 3.5558 train_acc: 0.3722


Epoch 3/100:  55%|█████▍    | 1026/1875 [03:16<02:35,  5.47it/s, loss=3.56, acc=0.372]

Epoch 3/100 1025/1875 train_loss: 3.5566 train_acc: 0.3722


Epoch 3/100:  56%|█████▌    | 1051/1875 [03:21<02:25,  5.66it/s, loss=3.56, acc=0.372]

Epoch 3/100 1050/1875 train_loss: 3.5574 train_acc: 0.3721


Epoch 3/100:  57%|█████▋    | 1076/1875 [03:25<02:21,  5.66it/s, loss=3.56, acc=0.372]

Epoch 3/100 1075/1875 train_loss: 3.5596 train_acc: 0.3719


Epoch 3/100:  59%|█████▊    | 1101/1875 [03:30<02:16,  5.66it/s, loss=3.56, acc=0.372]

Epoch 3/100 1100/1875 train_loss: 3.5605 train_acc: 0.3720


Epoch 3/100:  60%|██████    | 1126/1875 [03:34<02:12,  5.63it/s, loss=3.56, acc=0.372]

Epoch 3/100 1125/1875 train_loss: 3.5614 train_acc: 0.3720


Epoch 3/100:  61%|██████▏   | 1151/1875 [03:38<02:08,  5.62it/s, loss=3.56, acc=0.372]

Epoch 3/100 1150/1875 train_loss: 3.5605 train_acc: 0.3722


Epoch 3/100:  63%|██████▎   | 1176/1875 [03:43<02:03,  5.66it/s, loss=3.56, acc=0.372]

Epoch 3/100 1175/1875 train_loss: 3.5636 train_acc: 0.3720


Epoch 3/100:  64%|██████▍   | 1201/1875 [03:47<01:59,  5.62it/s, loss=3.56, acc=0.372]

Epoch 3/100 1200/1875 train_loss: 3.5635 train_acc: 0.3721


Epoch 3/100:  65%|██████▌   | 1226/1875 [03:52<01:55,  5.63it/s, loss=3.56, acc=0.372]

Epoch 3/100 1225/1875 train_loss: 3.5632 train_acc: 0.3721


Epoch 3/100:  67%|██████▋   | 1251/1875 [03:56<01:51,  5.61it/s, loss=3.57, acc=0.372]

Epoch 3/100 1250/1875 train_loss: 3.5656 train_acc: 0.3720


Epoch 3/100:  68%|██████▊   | 1276/1875 [04:01<01:48,  5.54it/s, loss=3.57, acc=0.372]

Epoch 3/100 1275/1875 train_loss: 3.5675 train_acc: 0.3719


Epoch 3/100:  69%|██████▉   | 1301/1875 [04:05<01:40,  5.69it/s, loss=3.57, acc=0.372]

Epoch 3/100 1300/1875 train_loss: 3.5655 train_acc: 0.3720


Epoch 3/100:  71%|███████   | 1326/1875 [04:10<01:36,  5.68it/s, loss=3.57, acc=0.372]

Epoch 3/100 1325/1875 train_loss: 3.5656 train_acc: 0.3721


Epoch 3/100:  72%|███████▏  | 1351/1875 [04:14<01:33,  5.59it/s, loss=3.57, acc=0.372]

Epoch 3/100 1350/1875 train_loss: 3.5655 train_acc: 0.3721


Epoch 3/100:  73%|███████▎  | 1376/1875 [04:19<01:28,  5.63it/s, loss=3.56, acc=0.372]

Epoch 3/100 1375/1875 train_loss: 3.5642 train_acc: 0.3724


Epoch 3/100:  75%|███████▍  | 1401/1875 [04:23<01:23,  5.68it/s, loss=3.56, acc=0.372]

Epoch 3/100 1400/1875 train_loss: 3.5631 train_acc: 0.3725


Epoch 3/100:  76%|███████▌  | 1426/1875 [04:27<01:19,  5.62it/s, loss=3.56, acc=0.373]

Epoch 3/100 1425/1875 train_loss: 3.5614 train_acc: 0.3726


Epoch 3/100:  77%|███████▋  | 1451/1875 [04:32<01:15,  5.65it/s, loss=3.56, acc=0.373]

Epoch 3/100 1450/1875 train_loss: 3.5608 train_acc: 0.3727


Epoch 3/100:  79%|███████▊  | 1476/1875 [04:36<01:15,  5.29it/s, loss=3.56, acc=0.373]

Epoch 3/100 1475/1875 train_loss: 3.5606 train_acc: 0.3726


Epoch 3/100:  80%|████████  | 1501/1875 [04:41<01:10,  5.29it/s, loss=3.56, acc=0.373]

Epoch 3/100 1500/1875 train_loss: 3.5600 train_acc: 0.3726


Epoch 3/100:  81%|████████▏ | 1526/1875 [04:46<01:02,  5.62it/s, loss=3.56, acc=0.373]

Epoch 3/100 1525/1875 train_loss: 3.5576 train_acc: 0.3729


Epoch 3/100:  83%|████████▎ | 1551/1875 [04:50<00:59,  5.46it/s, loss=3.56, acc=0.373]

Epoch 3/100 1550/1875 train_loss: 3.5588 train_acc: 0.3729


Epoch 3/100:  84%|████████▍ | 1576/1875 [04:55<00:54,  5.50it/s, loss=3.56, acc=0.373]

Epoch 3/100 1575/1875 train_loss: 3.5584 train_acc: 0.3729


Epoch 3/100:  85%|████████▌ | 1601/1875 [04:59<00:49,  5.59it/s, loss=3.56, acc=0.373]

Epoch 3/100 1600/1875 train_loss: 3.5579 train_acc: 0.3729


Epoch 3/100:  87%|████████▋ | 1626/1875 [05:04<00:45,  5.48it/s, loss=3.56, acc=0.373]

Epoch 3/100 1625/1875 train_loss: 3.5584 train_acc: 0.3728


Epoch 3/100:  88%|████████▊ | 1651/1875 [05:09<00:40,  5.57it/s, loss=3.56, acc=0.373]

Epoch 3/100 1650/1875 train_loss: 3.5583 train_acc: 0.3728


Epoch 3/100:  89%|████████▉ | 1676/1875 [05:13<00:36,  5.51it/s, loss=3.56, acc=0.373]

Epoch 3/100 1675/1875 train_loss: 3.5576 train_acc: 0.3728


Epoch 3/100:  91%|█████████ | 1701/1875 [05:18<00:31,  5.44it/s, loss=3.56, acc=0.373]

Epoch 3/100 1700/1875 train_loss: 3.5577 train_acc: 0.3728


Epoch 3/100:  92%|█████████▏| 1726/1875 [05:22<00:27,  5.41it/s, loss=3.56, acc=0.373]

Epoch 3/100 1725/1875 train_loss: 3.5572 train_acc: 0.3729


Epoch 3/100:  93%|█████████▎| 1751/1875 [05:27<00:23,  5.36it/s, loss=3.56, acc=0.373]

Epoch 3/100 1750/1875 train_loss: 3.5560 train_acc: 0.3730


Epoch 3/100:  95%|█████████▍| 1776/1875 [05:31<00:17,  5.54it/s, loss=3.56, acc=0.373]

Epoch 3/100 1775/1875 train_loss: 3.5560 train_acc: 0.3730


Epoch 3/100:  96%|█████████▌| 1801/1875 [05:36<00:13,  5.43it/s, loss=3.56, acc=0.373]

Epoch 3/100 1800/1875 train_loss: 3.5556 train_acc: 0.3729


Epoch 3/100:  97%|█████████▋| 1826/1875 [05:41<00:08,  5.57it/s, loss=3.56, acc=0.373]

Epoch 3/100 1825/1875 train_loss: 3.5560 train_acc: 0.3729


Epoch 3/100:  99%|█████████▊| 1851/1875 [05:45<00:04,  5.58it/s, loss=3.56, acc=0.373]

Epoch 3/100 1850/1875 train_loss: 3.5559 train_acc: 0.3731


Epoch 3/100: 100%|██████████| 1875/1875 [05:49<00:00,  5.36it/s, loss=3.56, acc=0.373]


Epoch 3/100 1875/1875 train_loss: 3.5560 train_acc: 0.3731


	Train Bleu Score: 100%|██████████| 375/375 [01:09<00:00,  5.40it/s, bleu1=0.495, bleu4=0.0856]
	Validation Bleu Score: 100%|██████████| 63/63 [00:11<00:00,  5.38it/s, bleu1=0.501, bleu4=0.0893]


Epoch 3/100 train_bleu1: 0.4947 train_bleu4: 0.0856  val_bleu1: 0.5010 val_bleu4: 0.0893 


Epoch 4/100:   1%|▏         | 26/1875 [00:04<05:33,  5.54it/s, loss=3.52, acc=0.363]

Epoch 4/100 25/1875 train_loss: 3.5433 train_acc: 0.3617


Epoch 4/100:   3%|▎         | 51/1875 [00:09<05:29,  5.53it/s, loss=3.48, acc=0.371]

Epoch 4/100 50/1875 train_loss: 3.4859 train_acc: 0.3707


Epoch 4/100:   4%|▍         | 76/1875 [00:13<05:26,  5.52it/s, loss=3.46, acc=0.372]

Epoch 4/100 75/1875 train_loss: 3.4621 train_acc: 0.3714


Epoch 4/100:   5%|▌         | 101/1875 [00:18<05:16,  5.61it/s, loss=3.44, acc=0.373]

Epoch 4/100 100/1875 train_loss: 3.4444 train_acc: 0.3730


Epoch 4/100:   7%|▋         | 126/1875 [00:22<05:21,  5.45it/s, loss=3.45, acc=0.375]

Epoch 4/100 125/1875 train_loss: 3.4473 train_acc: 0.3747


Epoch 4/100:   8%|▊         | 151/1875 [00:27<05:12,  5.51it/s, loss=3.45, acc=0.374]

Epoch 4/100 150/1875 train_loss: 3.4535 train_acc: 0.3735


Epoch 4/100:   9%|▉         | 176/1875 [00:31<05:05,  5.57it/s, loss=3.45, acc=0.374]

Epoch 4/100 175/1875 train_loss: 3.4503 train_acc: 0.3743


Epoch 4/100:  11%|█         | 201/1875 [00:36<05:08,  5.43it/s, loss=3.44, acc=0.375]

Epoch 4/100 200/1875 train_loss: 3.4455 train_acc: 0.3753


Epoch 4/100:  12%|█▏        | 226/1875 [00:41<05:04,  5.41it/s, loss=3.45, acc=0.375]

Epoch 4/100 225/1875 train_loss: 3.4494 train_acc: 0.3749


Epoch 4/100:  13%|█▎        | 251/1875 [00:45<04:53,  5.54it/s, loss=3.45, acc=0.375]

Epoch 4/100 250/1875 train_loss: 3.4465 train_acc: 0.3751


Epoch 4/100:  15%|█▍        | 276/1875 [00:50<04:47,  5.56it/s, loss=3.45, acc=0.374]

Epoch 4/100 275/1875 train_loss: 3.4473 train_acc: 0.3744


Epoch 4/100:  16%|█▌        | 301/1875 [00:54<04:43,  5.56it/s, loss=3.45, acc=0.374]

Epoch 4/100 300/1875 train_loss: 3.4508 train_acc: 0.3744


Epoch 4/100:  17%|█▋        | 326/1875 [00:59<04:37,  5.58it/s, loss=3.45, acc=0.375]

Epoch 4/100 325/1875 train_loss: 3.4508 train_acc: 0.3745


Epoch 4/100:  19%|█▊        | 351/1875 [01:03<04:31,  5.62it/s, loss=3.45, acc=0.374]

Epoch 4/100 350/1875 train_loss: 3.4503 train_acc: 0.3744


Epoch 4/100:  20%|██        | 376/1875 [01:08<04:35,  5.43it/s, loss=3.45, acc=0.375]

Epoch 4/100 375/1875 train_loss: 3.4505 train_acc: 0.3747


Epoch 4/100:  21%|██▏       | 401/1875 [01:12<04:28,  5.50it/s, loss=3.44, acc=0.375]

Epoch 4/100 400/1875 train_loss: 3.4437 train_acc: 0.3752


Epoch 4/100:  23%|██▎       | 426/1875 [01:17<04:17,  5.62it/s, loss=3.44, acc=0.376]

Epoch 4/100 425/1875 train_loss: 3.4389 train_acc: 0.3757


Epoch 4/100:  24%|██▍       | 451/1875 [01:22<04:18,  5.52it/s, loss=3.44, acc=0.375]

Epoch 4/100 450/1875 train_loss: 3.4400 train_acc: 0.3755


Epoch 4/100:  25%|██▌       | 476/1875 [01:26<04:13,  5.52it/s, loss=3.44, acc=0.375]

Epoch 4/100 475/1875 train_loss: 3.4427 train_acc: 0.3755


Epoch 4/100:  27%|██▋       | 501/1875 [01:31<04:07,  5.55it/s, loss=3.45, acc=0.376]

Epoch 4/100 500/1875 train_loss: 3.4455 train_acc: 0.3758


Epoch 4/100:  28%|██▊       | 526/1875 [01:35<04:05,  5.50it/s, loss=3.45, acc=0.376]

Epoch 4/100 525/1875 train_loss: 3.4487 train_acc: 0.3756


Epoch 4/100:  29%|██▉       | 551/1875 [01:40<03:56,  5.60it/s, loss=3.45, acc=0.376]

Epoch 4/100 550/1875 train_loss: 3.4462 train_acc: 0.3758


Epoch 4/100:  31%|███       | 576/1875 [01:44<03:51,  5.61it/s, loss=3.45, acc=0.376]

Epoch 4/100 575/1875 train_loss: 3.4487 train_acc: 0.3756


Epoch 4/100:  32%|███▏      | 601/1875 [01:49<03:45,  5.65it/s, loss=3.45, acc=0.376]

Epoch 4/100 600/1875 train_loss: 3.4516 train_acc: 0.3757


Epoch 4/100:  33%|███▎      | 626/1875 [01:53<03:44,  5.57it/s, loss=3.45, acc=0.376]

Epoch 4/100 625/1875 train_loss: 3.4483 train_acc: 0.3760


Epoch 4/100:  35%|███▍      | 651/1875 [01:58<03:40,  5.56it/s, loss=3.45, acc=0.376]

Epoch 4/100 650/1875 train_loss: 3.4482 train_acc: 0.3765


Epoch 4/100:  36%|███▌      | 676/1875 [02:02<03:31,  5.66it/s, loss=3.45, acc=0.376]

Epoch 4/100 675/1875 train_loss: 3.4488 train_acc: 0.3764


Epoch 4/100:  37%|███▋      | 701/1875 [02:07<03:29,  5.59it/s, loss=3.45, acc=0.376]

Epoch 4/100 700/1875 train_loss: 3.4497 train_acc: 0.3762


Epoch 4/100:  39%|███▊      | 726/1875 [02:11<03:28,  5.50it/s, loss=3.45, acc=0.376]

Epoch 4/100 725/1875 train_loss: 3.4510 train_acc: 0.3761


Epoch 4/100:  40%|████      | 751/1875 [02:16<03:23,  5.52it/s, loss=3.45, acc=0.376]

Epoch 4/100 750/1875 train_loss: 3.4527 train_acc: 0.3759


Epoch 4/100:  41%|████▏     | 776/1875 [02:20<03:14,  5.64it/s, loss=3.45, acc=0.376]

Epoch 4/100 775/1875 train_loss: 3.4539 train_acc: 0.3757


Epoch 4/100:  43%|████▎     | 801/1875 [02:25<03:12,  5.57it/s, loss=3.45, acc=0.376]

Epoch 4/100 800/1875 train_loss: 3.4524 train_acc: 0.3760


Epoch 4/100:  44%|████▍     | 826/1875 [02:29<03:09,  5.55it/s, loss=3.45, acc=0.376]

Epoch 4/100 825/1875 train_loss: 3.4510 train_acc: 0.3763


Epoch 4/100:  45%|████▌     | 851/1875 [02:34<03:02,  5.60it/s, loss=3.45, acc=0.376]

Epoch 4/100 850/1875 train_loss: 3.4509 train_acc: 0.3764


Epoch 4/100:  47%|████▋     | 876/1875 [02:38<02:56,  5.65it/s, loss=3.45, acc=0.376]

Epoch 4/100 875/1875 train_loss: 3.4529 train_acc: 0.3764


Epoch 4/100:  48%|████▊     | 901/1875 [02:43<02:52,  5.66it/s, loss=3.45, acc=0.376]

Epoch 4/100 900/1875 train_loss: 3.4534 train_acc: 0.3764


Epoch 4/100:  49%|████▉     | 926/1875 [02:47<02:47,  5.65it/s, loss=3.45, acc=0.376]

Epoch 4/100 925/1875 train_loss: 3.4539 train_acc: 0.3763


Epoch 4/100:  51%|█████     | 951/1875 [02:52<02:45,  5.58it/s, loss=3.45, acc=0.376]

Epoch 4/100 950/1875 train_loss: 3.4547 train_acc: 0.3762


Epoch 4/100:  52%|█████▏    | 976/1875 [02:56<02:42,  5.54it/s, loss=3.45, acc=0.376]

Epoch 4/100 975/1875 train_loss: 3.4543 train_acc: 0.3761


Epoch 4/100:  53%|█████▎    | 1001/1875 [03:00<02:35,  5.62it/s, loss=3.46, acc=0.376]

Epoch 4/100 1000/1875 train_loss: 3.4553 train_acc: 0.3758


Epoch 4/100:  55%|█████▍    | 1026/1875 [03:05<02:32,  5.56it/s, loss=3.45, acc=0.376]

Epoch 4/100 1025/1875 train_loss: 3.4543 train_acc: 0.3762


Epoch 4/100:  56%|█████▌    | 1051/1875 [03:09<02:28,  5.57it/s, loss=3.46, acc=0.376]

Epoch 4/100 1050/1875 train_loss: 3.4558 train_acc: 0.3762


Epoch 4/100:  57%|█████▋    | 1076/1875 [03:14<02:21,  5.66it/s, loss=3.46, acc=0.376]

Epoch 4/100 1075/1875 train_loss: 3.4584 train_acc: 0.3759


Epoch 4/100:  59%|█████▊    | 1101/1875 [03:18<02:16,  5.67it/s, loss=3.46, acc=0.376]

Epoch 4/100 1100/1875 train_loss: 3.4591 train_acc: 0.3758


Epoch 4/100:  60%|██████    | 1126/1875 [03:23<02:13,  5.61it/s, loss=3.46, acc=0.376]

Epoch 4/100 1125/1875 train_loss: 3.4619 train_acc: 0.3757


Epoch 4/100:  61%|██████▏   | 1151/1875 [03:27<02:10,  5.55it/s, loss=3.46, acc=0.376]

Epoch 4/100 1150/1875 train_loss: 3.4606 train_acc: 0.3759


Epoch 4/100:  63%|██████▎   | 1176/1875 [03:32<02:04,  5.60it/s, loss=3.46, acc=0.376]

Epoch 4/100 1175/1875 train_loss: 3.4582 train_acc: 0.3760


Epoch 4/100:  64%|██████▍   | 1201/1875 [03:36<02:02,  5.49it/s, loss=3.46, acc=0.376]

Epoch 4/100 1200/1875 train_loss: 3.4589 train_acc: 0.3758


Epoch 4/100:  65%|██████▌   | 1226/1875 [03:41<01:56,  5.59it/s, loss=3.46, acc=0.376]

Epoch 4/100 1225/1875 train_loss: 3.4599 train_acc: 0.3758


Epoch 4/100:  67%|██████▋   | 1251/1875 [03:45<01:52,  5.55it/s, loss=3.46, acc=0.376]

Epoch 4/100 1250/1875 train_loss: 3.4599 train_acc: 0.3759


Epoch 4/100:  68%|██████▊   | 1276/1875 [03:50<01:45,  5.67it/s, loss=3.46, acc=0.376]

Epoch 4/100 1275/1875 train_loss: 3.4603 train_acc: 0.3760


Epoch 4/100:  69%|██████▉   | 1301/1875 [03:54<01:43,  5.57it/s, loss=3.46, acc=0.376]

Epoch 4/100 1300/1875 train_loss: 3.4602 train_acc: 0.3760


Epoch 4/100:  71%|███████   | 1326/1875 [03:59<01:38,  5.58it/s, loss=3.46, acc=0.376]

Epoch 4/100 1325/1875 train_loss: 3.4599 train_acc: 0.3759


Epoch 4/100:  72%|███████▏  | 1351/1875 [04:03<01:35,  5.46it/s, loss=3.46, acc=0.376]

Epoch 4/100 1350/1875 train_loss: 3.4602 train_acc: 0.3762


Epoch 4/100:  73%|███████▎  | 1376/1875 [04:08<01:30,  5.52it/s, loss=3.46, acc=0.376]

Epoch 4/100 1375/1875 train_loss: 3.4593 train_acc: 0.3762


Epoch 4/100:  75%|███████▍  | 1401/1875 [04:12<01:25,  5.54it/s, loss=3.46, acc=0.376]

Epoch 4/100 1400/1875 train_loss: 3.4606 train_acc: 0.3761


Epoch 4/100:  76%|███████▌  | 1426/1875 [04:17<01:21,  5.50it/s, loss=3.46, acc=0.376]

Epoch 4/100 1425/1875 train_loss: 3.4607 train_acc: 0.3762


Epoch 4/100:  77%|███████▋  | 1451/1875 [04:21<01:15,  5.58it/s, loss=3.46, acc=0.376]

Epoch 4/100 1450/1875 train_loss: 3.4605 train_acc: 0.3762


Epoch 4/100:  79%|███████▊  | 1476/1875 [04:26<01:10,  5.65it/s, loss=3.46, acc=0.376]

Epoch 4/100 1475/1875 train_loss: 3.4612 train_acc: 0.3762


Epoch 4/100:  80%|████████  | 1501/1875 [04:30<01:05,  5.70it/s, loss=3.46, acc=0.376]

Epoch 4/100 1500/1875 train_loss: 3.4612 train_acc: 0.3763


Epoch 4/100:  81%|████████▏ | 1526/1875 [04:34<01:02,  5.57it/s, loss=3.46, acc=0.376]

Epoch 4/100 1525/1875 train_loss: 3.4611 train_acc: 0.3763


Epoch 4/100:  83%|████████▎ | 1551/1875 [04:39<00:57,  5.65it/s, loss=3.46, acc=0.376]

Epoch 4/100 1550/1875 train_loss: 3.4616 train_acc: 0.3763


Epoch 4/100:  84%|████████▍ | 1576/1875 [04:43<00:53,  5.60it/s, loss=3.46, acc=0.376]

Epoch 4/100 1575/1875 train_loss: 3.4622 train_acc: 0.3764


Epoch 4/100:  85%|████████▌ | 1601/1875 [04:48<00:48,  5.60it/s, loss=3.46, acc=0.376]

Epoch 4/100 1600/1875 train_loss: 3.4635 train_acc: 0.3765


Epoch 4/100:  87%|████████▋ | 1626/1875 [04:52<00:44,  5.59it/s, loss=3.46, acc=0.377]

Epoch 4/100 1625/1875 train_loss: 3.4637 train_acc: 0.3765


Epoch 4/100:  88%|████████▊ | 1651/1875 [04:57<00:39,  5.64it/s, loss=3.47, acc=0.376]

Epoch 4/100 1650/1875 train_loss: 3.4650 train_acc: 0.3764


Epoch 4/100:  89%|████████▉ | 1676/1875 [05:01<00:35,  5.55it/s, loss=3.47, acc=0.376]

Epoch 4/100 1675/1875 train_loss: 3.4650 train_acc: 0.3764


Epoch 4/100:  91%|█████████ | 1700/1875 [05:06<00:35,  4.91it/s, loss=3.46, acc=0.376]

Epoch 4/100 1700/1875 train_loss: 3.4649 train_acc: 0.3765


Epoch 4/100:  92%|█████████▏| 1726/1875 [05:11<00:27,  5.38it/s, loss=3.47, acc=0.376]

Epoch 4/100 1725/1875 train_loss: 3.4661 train_acc: 0.3765


Epoch 4/100:  93%|█████████▎| 1751/1875 [05:16<00:22,  5.45it/s, loss=3.47, acc=0.377]

Epoch 4/100 1750/1875 train_loss: 3.4669 train_acc: 0.3766


Epoch 4/100:  95%|█████████▍| 1776/1875 [05:20<00:18,  5.43it/s, loss=3.47, acc=0.376]

Epoch 4/100 1775/1875 train_loss: 3.4685 train_acc: 0.3764


Epoch 4/100:  96%|█████████▌| 1801/1875 [05:25<00:13,  5.51it/s, loss=3.47, acc=0.376]

Epoch 4/100 1800/1875 train_loss: 3.4682 train_acc: 0.3765


Epoch 4/100:  97%|█████████▋| 1826/1875 [05:30<00:08,  5.51it/s, loss=3.47, acc=0.376]

Epoch 4/100 1825/1875 train_loss: 3.4692 train_acc: 0.3764


Epoch 4/100:  99%|█████████▊| 1851/1875 [05:34<00:04,  5.55it/s, loss=3.47, acc=0.377]

Epoch 4/100 1850/1875 train_loss: 3.4689 train_acc: 0.3765


Epoch 4/100: 100%|██████████| 1875/1875 [05:38<00:00,  5.53it/s, loss=3.47, acc=0.376]


Epoch 4/100 1875/1875 train_loss: 3.4696 train_acc: 0.3765


	Train Bleu Score: 100%|██████████| 375/375 [01:07<00:00,  5.54it/s, bleu1=0.439, bleu4=0.0583]
	Validation Bleu Score: 100%|██████████| 63/63 [00:11<00:00,  5.58it/s, bleu1=0.436, bleu4=0.0595]


Epoch 4/100 train_bleu1: 0.4387 train_bleu4: 0.0583  val_bleu1: 0.4363 val_bleu4: 0.0595 


Epoch 5/100:   1%|▏         | 26/1875 [00:04<05:35,  5.51it/s, loss=3.45, acc=0.374]

Epoch 5/100 25/1875 train_loss: 3.4442 train_acc: 0.3734


Epoch 5/100:   3%|▎         | 51/1875 [00:09<05:22,  5.66it/s, loss=3.42, acc=0.373]

Epoch 5/100 50/1875 train_loss: 3.4263 train_acc: 0.3731


Epoch 5/100:   4%|▍         | 76/1875 [00:13<05:21,  5.60it/s, loss=3.41, acc=0.374]

Epoch 5/100 75/1875 train_loss: 3.4096 train_acc: 0.3740


Epoch 5/100:   5%|▌         | 101/1875 [00:18<05:18,  5.57it/s, loss=3.41, acc=0.375]

Epoch 5/100 100/1875 train_loss: 3.4080 train_acc: 0.3742


Epoch 5/100:   7%|▋         | 126/1875 [00:22<05:16,  5.53it/s, loss=3.41, acc=0.374]

Epoch 5/100 125/1875 train_loss: 3.4061 train_acc: 0.3738


Epoch 5/100:   8%|▊         | 151/1875 [00:27<05:04,  5.67it/s, loss=3.42, acc=0.373]

Epoch 5/100 150/1875 train_loss: 3.4169 train_acc: 0.3736


Epoch 5/100:   9%|▉         | 176/1875 [00:31<05:02,  5.62it/s, loss=3.42, acc=0.374]

Epoch 5/100 175/1875 train_loss: 3.4163 train_acc: 0.3743


Epoch 5/100:  11%|█         | 201/1875 [00:36<04:57,  5.62it/s, loss=3.41, acc=0.375]

Epoch 5/100 200/1875 train_loss: 3.4132 train_acc: 0.3746


Epoch 5/100:  12%|█▏        | 226/1875 [00:40<04:49,  5.69it/s, loss=3.4, acc=0.376]

Epoch 5/100 225/1875 train_loss: 3.3971 train_acc: 0.3763


Epoch 5/100:  13%|█▎        | 251/1875 [00:45<04:52,  5.55it/s, loss=3.4, acc=0.377]

Epoch 5/100 250/1875 train_loss: 3.4013 train_acc: 0.3769


Epoch 5/100:  15%|█▍        | 276/1875 [00:49<04:42,  5.66it/s, loss=3.4, acc=0.377]

Epoch 5/100 275/1875 train_loss: 3.4011 train_acc: 0.3770


Epoch 5/100:  16%|█▌        | 301/1875 [00:54<04:46,  5.50it/s, loss=3.39, acc=0.378]

Epoch 5/100 300/1875 train_loss: 3.3924 train_acc: 0.3781


Epoch 5/100:  17%|█▋        | 326/1875 [00:58<04:40,  5.52it/s, loss=3.4, acc=0.377]

Epoch 5/100 325/1875 train_loss: 3.4003 train_acc: 0.3771


Epoch 5/100:  19%|█▊        | 351/1875 [01:02<04:27,  5.70it/s, loss=3.39, acc=0.377]

Epoch 5/100 350/1875 train_loss: 3.3941 train_acc: 0.3776


Epoch 5/100:  20%|██        | 376/1875 [01:07<04:35,  5.44it/s, loss=3.4, acc=0.378]

Epoch 5/100 375/1875 train_loss: 3.3971 train_acc: 0.3777


Epoch 5/100:  21%|██▏       | 401/1875 [01:11<04:22,  5.62it/s, loss=3.4, acc=0.378]

Epoch 5/100 400/1875 train_loss: 3.4012 train_acc: 0.3775


Epoch 5/100:  23%|██▎       | 426/1875 [01:16<04:17,  5.62it/s, loss=3.4, acc=0.377]

Epoch 5/100 425/1875 train_loss: 3.4046 train_acc: 0.3771


Epoch 5/100:  24%|██▍       | 451/1875 [01:20<04:11,  5.67it/s, loss=3.4, acc=0.378]

Epoch 5/100 450/1875 train_loss: 3.4003 train_acc: 0.3777


Epoch 5/100:  25%|██▌       | 476/1875 [01:25<04:08,  5.63it/s, loss=3.4, acc=0.378]

Epoch 5/100 475/1875 train_loss: 3.4011 train_acc: 0.3777


Epoch 5/100:  27%|██▋       | 501/1875 [01:29<04:04,  5.63it/s, loss=3.41, acc=0.378]

Epoch 5/100 500/1875 train_loss: 3.4068 train_acc: 0.3777


Epoch 5/100:  28%|██▊       | 526/1875 [01:34<04:00,  5.60it/s, loss=3.4, acc=0.378]

Epoch 5/100 525/1875 train_loss: 3.4047 train_acc: 0.3778


Epoch 5/100:  29%|██▉       | 551/1875 [01:38<04:01,  5.48it/s, loss=3.41, acc=0.378]

Epoch 5/100 550/1875 train_loss: 3.4047 train_acc: 0.3782


Epoch 5/100:  31%|███       | 576/1875 [01:43<03:51,  5.61it/s, loss=3.41, acc=0.378]

Epoch 5/100 575/1875 train_loss: 3.4057 train_acc: 0.3783


Epoch 5/100:  32%|███▏      | 601/1875 [01:47<03:46,  5.63it/s, loss=3.41, acc=0.378]

Epoch 5/100 600/1875 train_loss: 3.4058 train_acc: 0.3783


Epoch 5/100:  33%|███▎      | 626/1875 [01:52<03:43,  5.60it/s, loss=3.41, acc=0.378]

Epoch 5/100 625/1875 train_loss: 3.4073 train_acc: 0.3778


Epoch 5/100:  35%|███▍      | 651/1875 [01:56<03:38,  5.59it/s, loss=3.41, acc=0.378]

Epoch 5/100 650/1875 train_loss: 3.4093 train_acc: 0.3777


Epoch 5/100:  36%|███▌      | 676/1875 [02:01<03:34,  5.59it/s, loss=3.41, acc=0.378]

Epoch 5/100 675/1875 train_loss: 3.4086 train_acc: 0.3777


Epoch 5/100:  37%|███▋      | 701/1875 [02:05<03:28,  5.62it/s, loss=3.41, acc=0.378]

Epoch 5/100 700/1875 train_loss: 3.4062 train_acc: 0.3778


Epoch 5/100:  39%|███▊      | 726/1875 [02:10<03:24,  5.61it/s, loss=3.41, acc=0.378]

Epoch 5/100 725/1875 train_loss: 3.4054 train_acc: 0.3777


Epoch 5/100:  40%|████      | 751/1875 [02:14<03:19,  5.64it/s, loss=3.41, acc=0.378]

Epoch 5/100 750/1875 train_loss: 3.4060 train_acc: 0.3776


Epoch 5/100:  41%|████▏     | 776/1875 [02:19<03:15,  5.62it/s, loss=3.41, acc=0.377]

Epoch 5/100 775/1875 train_loss: 3.4088 train_acc: 0.3775


Epoch 5/100:  43%|████▎     | 801/1875 [02:23<03:12,  5.58it/s, loss=3.41, acc=0.377]

Epoch 5/100 800/1875 train_loss: 3.4098 train_acc: 0.3773


Epoch 5/100:  44%|████▍     | 826/1875 [02:28<03:09,  5.54it/s, loss=3.41, acc=0.377]

Epoch 5/100 825/1875 train_loss: 3.4084 train_acc: 0.3775


Epoch 5/100:  45%|████▌     | 851/1875 [02:32<03:03,  5.59it/s, loss=3.41, acc=0.378]

Epoch 5/100 850/1875 train_loss: 3.4097 train_acc: 0.3778


Epoch 5/100:  47%|████▋     | 876/1875 [02:36<03:00,  5.52it/s, loss=3.41, acc=0.378]

Epoch 5/100 875/1875 train_loss: 3.4103 train_acc: 0.3777


Epoch 5/100:  48%|████▊     | 901/1875 [02:41<02:54,  5.58it/s, loss=3.41, acc=0.378]

Epoch 5/100 900/1875 train_loss: 3.4114 train_acc: 0.3780


Epoch 5/100:  49%|████▉     | 926/1875 [02:45<02:47,  5.67it/s, loss=3.41, acc=0.378]

Epoch 5/100 925/1875 train_loss: 3.4111 train_acc: 0.3782


Epoch 5/100:  51%|█████     | 951/1875 [02:50<02:43,  5.65it/s, loss=3.41, acc=0.378]

Epoch 5/100 950/1875 train_loss: 3.4137 train_acc: 0.3780


Epoch 5/100:  52%|█████▏    | 976/1875 [02:54<02:38,  5.69it/s, loss=3.42, acc=0.378]

Epoch 5/100 975/1875 train_loss: 3.4163 train_acc: 0.3777


Epoch 5/100:  53%|█████▎    | 1001/1875 [02:59<02:35,  5.63it/s, loss=3.42, acc=0.378]

Epoch 5/100 1000/1875 train_loss: 3.4173 train_acc: 0.3776


Epoch 5/100:  55%|█████▍    | 1026/1875 [03:03<02:32,  5.57it/s, loss=3.41, acc=0.378]

Epoch 5/100 1025/1875 train_loss: 3.4139 train_acc: 0.3779


Epoch 5/100:  56%|█████▌    | 1051/1875 [03:08<02:26,  5.62it/s, loss=3.42, acc=0.378]

Epoch 5/100 1050/1875 train_loss: 3.4153 train_acc: 0.3777


Epoch 5/100:  57%|█████▋    | 1076/1875 [03:12<02:22,  5.62it/s, loss=3.41, acc=0.378]

Epoch 5/100 1075/1875 train_loss: 3.4144 train_acc: 0.3779


Epoch 5/100:  59%|█████▊    | 1101/1875 [03:17<02:17,  5.64it/s, loss=3.42, acc=0.378]

Epoch 5/100 1100/1875 train_loss: 3.4154 train_acc: 0.3780


Epoch 5/100:  60%|██████    | 1126/1875 [03:21<02:14,  5.58it/s, loss=3.41, acc=0.378]

Epoch 5/100 1125/1875 train_loss: 3.4150 train_acc: 0.3780


Epoch 5/100:  61%|██████▏   | 1151/1875 [03:26<02:10,  5.55it/s, loss=3.42, acc=0.378]

Epoch 5/100 1150/1875 train_loss: 3.4165 train_acc: 0.3780


Epoch 5/100:  63%|██████▎   | 1176/1875 [03:30<02:03,  5.64it/s, loss=3.42, acc=0.378]

Epoch 5/100 1175/1875 train_loss: 3.4188 train_acc: 0.3778


Epoch 5/100:  64%|██████▍   | 1201/1875 [03:35<01:59,  5.66it/s, loss=3.42, acc=0.378]

Epoch 5/100 1200/1875 train_loss: 3.4197 train_acc: 0.3776


Epoch 5/100:  65%|██████▌   | 1226/1875 [03:39<01:55,  5.61it/s, loss=3.42, acc=0.378]

Epoch 5/100 1225/1875 train_loss: 3.4179 train_acc: 0.3777


Epoch 5/100:  67%|██████▋   | 1251/1875 [03:44<01:50,  5.63it/s, loss=3.42, acc=0.378]

Epoch 5/100 1250/1875 train_loss: 3.4185 train_acc: 0.3777


Epoch 5/100:  68%|██████▊   | 1276/1875 [03:48<01:47,  5.59it/s, loss=3.42, acc=0.377]

Epoch 5/100 1275/1875 train_loss: 3.4215 train_acc: 0.3775


Epoch 5/100:  69%|██████▉   | 1301/1875 [03:53<01:42,  5.60it/s, loss=3.42, acc=0.377]

Epoch 5/100 1300/1875 train_loss: 3.4210 train_acc: 0.3774


Epoch 5/100:  71%|███████   | 1326/1875 [03:57<01:37,  5.66it/s, loss=3.42, acc=0.377]

Epoch 5/100 1325/1875 train_loss: 3.4204 train_acc: 0.3775


Epoch 5/100:  72%|███████▏  | 1351/1875 [04:02<01:34,  5.54it/s, loss=3.42, acc=0.377]

Epoch 5/100 1350/1875 train_loss: 3.4212 train_acc: 0.3773


Epoch 5/100:  73%|███████▎  | 1376/1875 [04:06<01:29,  5.59it/s, loss=3.42, acc=0.377]

Epoch 5/100 1375/1875 train_loss: 3.4228 train_acc: 0.3772


Epoch 5/100:  75%|███████▍  | 1401/1875 [04:11<01:25,  5.51it/s, loss=3.42, acc=0.377]

Epoch 5/100 1400/1875 train_loss: 3.4229 train_acc: 0.3773


Epoch 5/100:  76%|███████▌  | 1426/1875 [04:15<01:19,  5.66it/s, loss=3.42, acc=0.377]

Epoch 5/100 1425/1875 train_loss: 3.4233 train_acc: 0.3774


Epoch 5/100:  77%|███████▋  | 1451/1875 [04:19<01:15,  5.58it/s, loss=3.42, acc=0.377]

Epoch 5/100 1450/1875 train_loss: 3.4241 train_acc: 0.3773


Epoch 5/100:  79%|███████▊  | 1476/1875 [04:24<01:11,  5.57it/s, loss=3.42, acc=0.377]

Epoch 5/100 1475/1875 train_loss: 3.4249 train_acc: 0.3774


Epoch 5/100:  80%|████████  | 1501/1875 [04:28<01:07,  5.55it/s, loss=3.42, acc=0.377]

Epoch 5/100 1500/1875 train_loss: 3.4251 train_acc: 0.3774


Epoch 5/100:  81%|████████▏ | 1526/1875 [04:33<01:02,  5.57it/s, loss=3.43, acc=0.377]

Epoch 5/100 1525/1875 train_loss: 3.4254 train_acc: 0.3774


Epoch 5/100:  83%|████████▎ | 1551/1875 [04:37<00:58,  5.52it/s, loss=3.43, acc=0.378]

Epoch 5/100 1550/1875 train_loss: 3.4249 train_acc: 0.3776


Epoch 5/100:  84%|████████▍ | 1576/1875 [04:42<00:53,  5.60it/s, loss=3.43, acc=0.378]

Epoch 5/100 1575/1875 train_loss: 3.4255 train_acc: 0.3776


Epoch 5/100:  85%|████████▌ | 1601/1875 [04:46<00:48,  5.64it/s, loss=3.43, acc=0.378]

Epoch 5/100 1600/1875 train_loss: 3.4272 train_acc: 0.3775


Epoch 5/100:  87%|████████▋ | 1626/1875 [04:51<00:44,  5.58it/s, loss=3.43, acc=0.377]

Epoch 5/100 1625/1875 train_loss: 3.4277 train_acc: 0.3773


Epoch 5/100:  88%|████████▊ | 1651/1875 [04:55<00:39,  5.65it/s, loss=3.43, acc=0.377]

Epoch 5/100 1650/1875 train_loss: 3.4278 train_acc: 0.3774


Epoch 5/100:  89%|████████▉ | 1676/1875 [05:00<00:35,  5.60it/s, loss=3.43, acc=0.377]

Epoch 5/100 1675/1875 train_loss: 3.4296 train_acc: 0.3773


Epoch 5/100:  91%|█████████ | 1701/1875 [05:04<00:31,  5.55it/s, loss=3.43, acc=0.377]

Epoch 5/100 1700/1875 train_loss: 3.4299 train_acc: 0.3774


Epoch 5/100:  92%|█████████▏| 1726/1875 [05:09<00:26,  5.66it/s, loss=3.43, acc=0.377]

Epoch 5/100 1725/1875 train_loss: 3.4295 train_acc: 0.3774


Epoch 5/100:  93%|█████████▎| 1751/1875 [05:13<00:22,  5.61it/s, loss=3.43, acc=0.377]

Epoch 5/100 1750/1875 train_loss: 3.4295 train_acc: 0.3775


Epoch 5/100:  95%|█████████▍| 1776/1875 [05:18<00:17,  5.58it/s, loss=3.43, acc=0.377]

Epoch 5/100 1775/1875 train_loss: 3.4301 train_acc: 0.3774


Epoch 5/100:  96%|█████████▌| 1801/1875 [05:22<00:13,  5.57it/s, loss=3.43, acc=0.377]

Epoch 5/100 1800/1875 train_loss: 3.4300 train_acc: 0.3774


Epoch 5/100:  97%|█████████▋| 1826/1875 [05:27<00:08,  5.59it/s, loss=3.43, acc=0.377]

Epoch 5/100 1825/1875 train_loss: 3.4308 train_acc: 0.3774


Epoch 5/100:  99%|█████████▊| 1851/1875 [05:31<00:04,  5.71it/s, loss=3.43, acc=0.377]

Epoch 5/100 1850/1875 train_loss: 3.4318 train_acc: 0.3775


Epoch 5/100: 100%|██████████| 1875/1875 [05:35<00:00,  5.58it/s, loss=3.43, acc=0.377]


Epoch 5/100 1875/1875 train_loss: 3.4321 train_acc: 0.3775


	Train Bleu Score: 100%|██████████| 375/375 [01:07<00:00,  5.56it/s, bleu1=0.529, bleu4=0.0972]
	Validation Bleu Score: 100%|██████████| 63/63 [00:11<00:00,  5.62it/s, bleu1=0.528, bleu4=0.095]


Epoch 5/100 train_bleu1: 0.5286 train_bleu4: 0.0972  val_bleu1: 0.5282 val_bleu4: 0.0950 


Epoch 6/100:   1%|▏         | 26/1875 [00:04<05:29,  5.60it/s, loss=3.33, acc=0.382]

Epoch 6/100 25/1875 train_loss: 3.3201 train_acc: 0.3846


Epoch 6/100:   3%|▎         | 51/1875 [00:09<05:42,  5.33it/s, loss=3.34, acc=0.381]

Epoch 6/100 50/1875 train_loss: 3.3483 train_acc: 0.3804


Epoch 6/100:   4%|▍         | 76/1875 [00:13<05:22,  5.58it/s, loss=3.36, acc=0.38]

Epoch 6/100 75/1875 train_loss: 3.3630 train_acc: 0.3797


Epoch 6/100:   5%|▌         | 101/1875 [00:18<05:13,  5.65it/s, loss=3.37, acc=0.38]

Epoch 6/100 100/1875 train_loss: 3.3652 train_acc: 0.3805


Epoch 6/100:   7%|▋         | 126/1875 [00:22<05:11,  5.62it/s, loss=3.37, acc=0.381]

Epoch 6/100 125/1875 train_loss: 3.3643 train_acc: 0.3814


Epoch 6/100:   8%|▊         | 151/1875 [00:27<05:11,  5.54it/s, loss=3.37, acc=0.381]

Epoch 6/100 150/1875 train_loss: 3.3652 train_acc: 0.3807


Epoch 6/100:   9%|▉         | 176/1875 [00:31<05:01,  5.63it/s, loss=3.37, acc=0.381]

Epoch 6/100 175/1875 train_loss: 3.3670 train_acc: 0.3806


Epoch 6/100:  11%|█         | 201/1875 [00:36<04:59,  5.58it/s, loss=3.37, acc=0.38]

Epoch 6/100 200/1875 train_loss: 3.3733 train_acc: 0.3799


Epoch 6/100:  12%|█▏        | 226/1875 [00:40<05:21,  5.12it/s, loss=3.39, acc=0.379]

Epoch 6/100 225/1875 train_loss: 3.3893 train_acc: 0.3791


Epoch 6/100:  12%|█▏        | 234/1875 [00:42<04:58,  5.49it/s, loss=3.39, acc=0.379]


KeyboardInterrupt: 

In [9]:
model = final_model

t_i = 1003
dset = train_set
im, cp, _ = dset[t_i]
print(''.join([idx2word[idx.item()] + ' ' for idx in model.sample(im.unsqueeze(0))[0]]))
print(dset.get_image_captions(t_i)[1])

plt.imshow(dset[t_i][0].detach().cpu().permute(1, 2, 0), interpolation="bicubic")

t_i = 500
dset = val_set
im, cp, _ = dset[t_i]
print(''.join([idx2word[idx.item()] + ' ' for idx in model.sample(im.unsqueeze(0))[0]]))
print(cp)

plt.imshow(dset[t_i][0].detach().cpu().permute(1, 2, 0), interpolation="bicubic")

t_i = 500
dset = test_set
im, cp, _ = dset[t_i]
print(''.join([idx2word[idx.item()] + ' ' for idx in model.sample(im.unsqueeze(0))[0]]))
print(cp)

plt.imshow(dset[t_i][0].detach().cpu().permute(1, 2, 0), interpolation="bicubic")

ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 50])

In [10]:
with torch.no_grad():
    model.eval()
    train_bleu = evaluate_model(desc=f'Train: ', model=final_model,
                                loss_fn=loss_fn, bleu_score_fn=corpus_bleu_score_fn,
                                tensor_to_word_fn=tensor_to_word_fn,
                                data_loader=train_eval_loader, vocab_size=vocab_size)
    val_bleu = evaluate_model(desc=f'Val: ', model=final_model,
                              loss_fn=loss_fn, bleu_score_fn=corpus_bleu_score_fn,
                              tensor_to_word_fn=tensor_to_word_fn,
                              data_loader=val_loader, vocab_size=vocab_size)
    test_bleu = evaluate_model(desc=f'Test: ', model=final_model,
                               loss_fn=loss_fn, bleu_score_fn=corpus_bleu_score_fn,
                               tensor_to_word_fn=tensor_to_word_fn,
                               data_loader=test_loader, vocab_size=vocab_size)
    for setname, result in zip(('train', 'val', 'test'), (train_bleu, val_bleu, test_bleu)):
        print(setname, end=' ')
        for ngram in (1, 2, 3, 4):
            print(f'Bleu-{ngram}: {result[ngram]}', end=' ')
        print()

Train: 100%|██████████| 375/375 [00:50<00:00,  7.46it/s, bleu1=0.378, bleu4=0.0556]
Val: 100%|██████████| 63/63 [00:08<00:00,  7.50it/s, bleu1=0.383, bleu4=0.0536]
Test: 100%|██████████| 63/63 [00:08<00:00,  7.23it/s, bleu1=0.371, bleu4=0.0505]

train Bleu-1: 0.37759354959342434 




AttributeError: 'NoneType' object has no attribute 'summary'