In [1]:
import warnings 
warnings.simplefilter('ignore')

import os
import gc
import csv
import json
import copy
import random

import numpy as np
import pandas as pd
from tqdm import tqdm
import torch.nn.functional as F

%matplotlib inline

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader

from transformers import T5ForConditionalGeneration, BartForConditionalGeneration,AutoModelForSeq2SeqLM

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from transformers import AutoTokenizer, AutoModel, get_linear_schedule_with_warmup, AutoConfig
import transformers

from evaluate import CiderD

In [2]:
def split_data(data_path='./data/train.csv'):
    if not os.path.exists('./data/test.csv'):
        df = pd.read_csv(data_path)
        train_df = df[:18000].reset_index(drop=True)
        test_df = df[18001:].reset_index(drop=True)
        
        train_df.to_csv('./data/train_df.csv', header=None, index=False)
        test_df.to_csv('./data/test_df.csv', header=None, index=False)
split_data()

In [3]:
class Config:
    def __init__(self):
        super(Config, self).__init__()
        
        self.SEED = 42
        self.tokenizer_path = 'bert_model/nezha-cn-base'
        self.MODEL_PATH = './pretrain/pretrained_0.15_dual/checkpoint-20000'
        
        # data
        self.max_length = 256 #句子最大长度
        self.batch_size = 32
        
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.full_finetuning = True
        self.lr = 3e-5
        self.optimizer = 'AdamW'
        self.n_warmup = 0
        self.save_best_only = True
        
        self.multi_gpu = False
        self.attack = 'pgd'

        #self.ema = True
        self.flooding = False
        self.loss_func = 'ce' # ce dice fl
        self.epochs = 20
config = Config()

In [4]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


np.random.seed(config.SEED)
seed_everything(seed=config.SEED)

In [5]:
class GAIIC_Dataset(Dataset):
    def __init__(self, data_file, input_len, output_len, sos_id=1, eos_id=2, pad_id=0):
        super(GAIIC_Dataset, self).__init__()
        
        with open(data_file, 'r') as fp:
            reader = csv.reader(fp)
            self.samples = [row for row in reader]#[:100]
            
        self.input_len   = input_len
        self.output_len  = output_len
        self.sos_id      = sos_id
        self.eos_id      = eos_id
        self.pad_id      = pad_id
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, index):
        source = [int(x) for x in self.samples[index][1].split()]
        
        if len(source) < self.input_len:
            source.extend([self.pad_id] * (self.input_len - len(source)))
        
        # for test
        if len(self.samples[index]) < 3:
            return np.array(source)[:self.input_len]
        
        target = [self.sos_id] + [int(x) for x in self.samples[index][2].split()] + [self.eos_id]
        if len(target) < self.output_len:
            target.extend([self.pad_id] * (self.output_len - len(target)))
            
        return  np.array(source)[:self.input_len], np.array(target)[:self.output_len]

In [6]:
class T5_Model(nn.Module):
    def __init__(self):
        super(T5_Model, self).__init__()

        self.model = AutoModelForSeq2SeqLM.from_pretrained('facebook/bart-base')
        
        self.PAD_ID = 0
        self.SOS_ID = 1
        self.EOS_ID = 2
        
        self.register_buffer('true_rep', torch.tensor(1.0))
        self.register_buffer('false_rep', torch.tensor(0.0))

    def forward(self, source_tokens, target_tokens=None, max_length=150, generate=False):
        
        if target_tokens is not None:
            feed_tensor = target_tokens[:, :-1]
            target_tensor = target_tokens.detach().clone()[:, 1:]
            input_pad_mask = torch.where(source_tokens!=self.PAD_ID, self.true_rep, self.false_rep)
            output_pad_mask = torch.where(target_tensor!=self.PAD_ID, self.true_rep, self.false_rep)
            target_tensor[target_tensor==self.PAD_ID]=-100
        
        if not generate:
            outputs = self.model(
                            input_ids = source_tokens,
                            attention_mask=input_pad_mask,
                            decoder_input_ids=feed_tensor,
                            use_cache=None,
                            return_dict=False
                )
            return outputs[0], target_tensor
        else:
            outputs = self.model.generate(
                            input_ids = source_tokens,
                            max_length=max_length,
                )
            
            return outputs

In [7]:
def array2str(arr):
    out = ''
    for i in range(len(arr)):
        if arr[i]==0 or arr[i]==2:
            break
        if arr[i]==1:
            continue
        out += str(int(arr[i])) + ' '
    if len(out.strip())==0:
        out = '0'
        
    return out.strip()

In [8]:
def val_fn(model, valid_dataloader):
    val_loss = 0

    model.eval()
    res, gts = [], {}
    tot = 0
    device = 'cuda'
    for batch_idx, (input_ids, target_ids) in enumerate(tqdm(valid_dataloader)):
        
        input_ids  = input_ids.to(device)
        target_ids = target_ids.cpu().numpy()
        
        pred = model(source_tokens=input_ids, 
                     max_length=150,
                     generate=True).cpu().numpy()
        
        for i in range(pred.shape[0]):
            res.append({'image_id':tot, 'caption': [array2str(pred[i][2:])]})
            gts[tot] = [array2str(target_ids[i][1:])]
            tot += 1
#             if i == 1:
#                 print('pred', pred[i])
#                 print('array2str(pred[i])', array2str(pred[i][2:]))
#                 print('array2str(target_ids[i])', array2str(target_ids[i]))
            
    CiderD_scorer = CiderD(df='corpus', sigma=15)
    cider_score,cider_scores = CiderD_scorer.compute_score(gts, res)
    print("cider_score:", cider_score, cider_scores)
    
    model.train()

    return cider_score, cider_scores

In [9]:
def test(model, test_dataloader):
    val_loss = 0

    model.eval()
    res = []
    tot = 0
    device = 'cuda'
    for batch_idx, input_ids in enumerate(tqdm(test_dataloader)):
        
        input_ids  = input_ids.to(device)
        
        pred = model(source_tokens=input_ids, 
                     max_length=150,
                     generate=True).cpu().numpy()
        
        for i in range(pred.shape[0]):
            res.append({'image_id':tot, 'caption': [array2str(pred[i][2:])]})
            tot += 1

#             if i == 1:
#                 print('pred', pred[i])
#                 print('array2str(pred[i])', array2str(pred[i][2:]))
#                 print('array2str(target_ids[i])', array2str(target_ids[i]))
    return res

In [10]:
def train_fn(model, train_dataloader, criterion, optimizer, scheduler=None, epoch=0):
    
    train_loss = 0
    model.train()
    device = 'cuda'

    for batch_idx, (input_ids, target_ids) in enumerate(tqdm(train_dataloader, desc='Epoch ' + str(epoch))):
        
        input_ids = input_ids.to(device)
        target_ids = target_ids.to(device)
        
        optimizer.zero_grad()
        
        logits, targets = model(source_tokens=input_ids, target_tokens=target_ids)
        
        loss = criterion(logits.view(-1, logits.shape[-1]), 
                        targets.contiguous().view(-1))
        
        train_loss += loss.item()
        
        loss.backward()
        
        optimizer.step()
        
        if batch_idx % 100 == 0:
            print(f"Epoch {epoch}, batch {batch_idx}, loss {loss.item()}")
        if scheduler:
            scheduler.step()
            
    avg_train_loss = train_loss / len(train_dataloader)
    
    print(f"Epoch {epoch}: average loss = {avg_train_loss}")

In [11]:
def main():

    for lr in [1e-4, 5e-4, 7e-4, 1e-5, 3e-5, 5e-5, 7e-5, 9e-5]:
        print("Start Training")
    
        train_dataset = GAIIC_Dataset('./data/train_df.csv', 150, 150)
        val_dataset = GAIIC_Dataset('./data/test_df.csv', 150, 150)
        test_dataset = GAIIC_Dataset('./data/preliminary_a_test.csv', 150, 150)

        train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
        val_dataloader  = DataLoader(val_dataset, batch_size=32, shuffle=False)
        test_dataloader  = DataLoader(test_dataset, batch_size=32, shuffle=False)
        model = T5_Model().to('cuda')

        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]

        optimizer_parameters = [
            {
                "params": [
                    p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
                ],
                "weight_decay": 0.01,
            },
            {
                "params": [
                    p for n, p in param_optimizer if any(nd in n for nd in no_decay)
                ],
                "weight_decay": 0.0,
            },
        ]
        optimizer = optim.AdamW(optimizer_parameters, lr=lr,  weight_decay=0.01, eps=1e-8)

        criterion = torch.nn.CrossEntropyLoss(ignore_index=-100)

        epochs = 7
        bst_val_cider = 0
        for epoch in range(epochs):
            train_fn(model, train_dataloader, criterion, optimizer, scheduler=None, epoch=epoch)

            cider_score, cider_scores = val_fn(model, val_dataloader)

            if cider_score >= bst_val_cider:
                best_model_wts = copy.deepcopy(model.state_dict())
                bst_val_cider = cider_score

        print(f"Best val cider: {bst_val_cider}")
        model.load_state_dict(best_model_wts)

        test_outputs = test(model, test_dataloader)

        df = pd.DataFrame(test_outputs)
        df['caption'] = df['caption'].apply(lambda x: x[0])
        df.to_csv(f'./data_sub_lr_{lr}_cider_{bst_val_cider}.csv', header=None, index=False)
        
        torch.cuda.empty_cache()

In [12]:
test_outputs = main()

Start Training


Epoch 0:   0%|          | 1/282 [00:01<05:22,  1.15s/it]

Epoch 0, batch 0, loss 7.701681613922119


Epoch 0:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 0, batch 100, loss 2.6557552814483643


Epoch 0:  71%|███████▏  | 201/282 [02:54<01:10,  1.14it/s]

Epoch 0, batch 200, loss 2.2125892639160156


Epoch 0: 100%|██████████| 282/282 [04:04<00:00,  1.16it/s]


Epoch 0: average loss = 2.6452819645827543


100%|██████████| 63/63 [00:41<00:00,  1.53it/s]


cider_score: 0.4583427086018448 [1.85051344e-07 2.18354302e-04 0.00000000e+00 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]


Epoch 1:   0%|          | 1/282 [00:00<04:10,  1.12it/s]

Epoch 1, batch 0, loss 1.9810715913772583


Epoch 1:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 1, batch 100, loss 2.0826311111450195


Epoch 1:  71%|███████▏  | 201/282 [02:54<01:10,  1.16it/s]

Epoch 1, batch 200, loss 1.9050500392913818


Epoch 1: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 1: average loss = 1.934614250845943


100%|██████████| 63/63 [00:42<00:00,  1.48it/s]


cider_score: 0.565890179524463 [1.85051344e-07 5.29553752e-01 0.00000000e+00 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]


Epoch 2:   0%|          | 1/282 [00:00<04:01,  1.17it/s]

Epoch 2, batch 0, loss 1.7227957248687744


Epoch 2:  36%|███▌      | 101/282 [01:27<02:32,  1.18it/s]

Epoch 2, batch 100, loss 1.756306529045105


Epoch 2:  71%|███████▏  | 201/282 [02:53<01:09,  1.16it/s]

Epoch 2, batch 200, loss 1.6658737659454346


Epoch 2: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 2: average loss = 1.7509510081710544


100%|██████████| 63/63 [00:52<00:00,  1.20it/s]


cider_score: 0.7484364900161158 [0.01005726 1.12196377 0.         ... 0.55756225 0.46806977 4.30412948]


Epoch 3:   0%|          | 1/282 [00:00<03:59,  1.17it/s]

Epoch 3, batch 0, loss 1.5798636674880981


Epoch 3:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 3, batch 100, loss 1.6122801303863525


Epoch 3:  71%|███████▏  | 201/282 [02:54<01:08,  1.18it/s]

Epoch 3, batch 200, loss 1.7135075330734253


Epoch 3: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 3: average loss = 1.6307597545021815


100%|██████████| 63/63 [00:44<00:00,  1.40it/s]


cider_score: 0.6714946668552136 [1.24486624e-03 5.90099029e-01 0.00000000e+00 ... 3.10639851e-01
 4.68069768e-01 4.30412948e+00]


Epoch 4:   0%|          | 1/282 [00:00<03:58,  1.18it/s]

Epoch 4, batch 0, loss 1.5367680788040161


Epoch 4:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 4, batch 100, loss 1.5720775127410889


Epoch 4:  71%|███████▏  | 201/282 [02:53<01:10,  1.16it/s]

Epoch 4, batch 200, loss 1.4337551593780518


Epoch 4: 100%|██████████| 282/282 [04:02<00:00,  1.16it/s]


Epoch 4: average loss = 1.5382297690033067


100%|██████████| 63/63 [00:43<00:00,  1.44it/s]


cider_score: 0.7973753615876554 [1.24486624e-03 9.14164802e-01 0.00000000e+00 ... 6.12299171e-01
 1.68884453e-01 4.30412948e+00]


Epoch 5:   0%|          | 1/282 [00:00<04:02,  1.16it/s]

Epoch 5, batch 0, loss 1.4421048164367676


Epoch 5:  36%|███▌      | 101/282 [01:27<02:35,  1.17it/s]

Epoch 5, batch 100, loss 1.4888606071472168


Epoch 5:  71%|███████▏  | 201/282 [02:54<01:09,  1.16it/s]

Epoch 5, batch 200, loss 1.5742197036743164


Epoch 5: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 5: average loss = 1.4697573455512947


100%|██████████| 63/63 [00:50<00:00,  1.24it/s]


cider_score: 0.7994228678856999 [1.24486624e-03 2.53841176e-01 4.86108283e-02 ... 6.12299171e-01
 1.46989827e-01 4.30412948e+00]


Epoch 6:   0%|          | 1/282 [00:00<04:00,  1.17it/s]

Epoch 6, batch 0, loss 1.522592306137085


Epoch 6:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 6, batch 100, loss 1.4512226581573486


Epoch 6:  71%|███████▏  | 201/282 [02:54<01:10,  1.15it/s]

Epoch 6, batch 200, loss 1.480448603630066


Epoch 6: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 6: average loss = 1.4008268392677847


100%|██████████| 63/63 [00:50<00:00,  1.26it/s]


cider_score: 0.9436559328449288 [2.51449928e-03 9.14164802e-01 4.86108283e-02 ... 6.12299171e-01
 1.46243283e-01 4.30412948e+00]
Best val cider: 0.9436559328449288


100%|██████████| 94/94 [01:12<00:00,  1.29it/s]


Start Training


Epoch 0:   0%|          | 1/282 [00:00<04:18,  1.09it/s]

Epoch 0, batch 0, loss 7.784750938415527


Epoch 0:  36%|███▌      | 101/282 [01:27<02:36,  1.16it/s]

Epoch 0, batch 100, loss 4.734499454498291


Epoch 0:  71%|███████▏  | 201/282 [02:53<01:09,  1.16it/s]

Epoch 0, batch 200, loss 2.837475538253784


Epoch 0: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 0: average loss = 3.783471079582864


100%|██████████| 63/63 [00:32<00:00,  1.92it/s]


cider_score: 0.26993157845615334 [9.06345602e-05 4.98820140e-02 4.20440077e-02 ... 6.10203582e-01
 9.56526974e-01 3.28165173e-04]


Epoch 1:   0%|          | 1/282 [00:00<04:05,  1.14it/s]

Epoch 1, batch 0, loss 2.52470064163208


Epoch 1:  36%|███▌      | 101/282 [01:27<02:38,  1.14it/s]

Epoch 1, batch 100, loss 2.245187282562256


Epoch 1:  71%|███████▏  | 201/282 [02:54<01:09,  1.16it/s]

Epoch 1, batch 200, loss 2.1553049087524414


Epoch 1: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 1: average loss = 2.2961069417338


100%|██████████| 63/63 [00:41<00:00,  1.52it/s]


cider_score: 0.5152838748217393 [1.07119220e-05 2.16105966e+00 4.65272025e-01 ... 1.26644388e-01
 1.03623797e-04 2.84120858e-04]


Epoch 2:   0%|          | 1/282 [00:00<04:01,  1.16it/s]

Epoch 2, batch 0, loss 2.0172858238220215


Epoch 2:  36%|███▌      | 101/282 [01:27<02:38,  1.14it/s]

Epoch 2, batch 100, loss 1.9633071422576904


Epoch 2:  71%|███████▏  | 201/282 [02:54<01:10,  1.15it/s]

Epoch 2, batch 200, loss 2.013953924179077


Epoch 2: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 2: average loss = 2.0536033299797816


100%|██████████| 63/63 [00:51<00:00,  1.22it/s]


cider_score: 0.7000999937910773 [2.61487975e-03 4.58307170e-01 8.39524439e-01 ... 5.98855076e-01
 1.49174178e-01 1.47290160e-04]


Epoch 3:   0%|          | 1/282 [00:00<04:01,  1.17it/s]

Epoch 3, batch 0, loss 1.9618083238601685


Epoch 3:  36%|███▌      | 101/282 [01:27<02:36,  1.16it/s]

Epoch 3, batch 100, loss 1.984132170677185


Epoch 3:  71%|███████▏  | 201/282 [02:53<01:09,  1.16it/s]

Epoch 3, batch 200, loss 2.0130410194396973


Epoch 3: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 3: average loss = 1.9329271253119125


100%|██████████| 63/63 [00:51<00:00,  1.23it/s]


cider_score: 0.5892872294334661 [2.26364809e-03 8.42374122e-02 5.22487960e-02 ... 7.25457916e-01
 2.13536595e+00 1.24688697e-04]


Epoch 4:   0%|          | 1/282 [00:00<04:00,  1.17it/s]

Epoch 4, batch 0, loss 1.9404430389404297


Epoch 4:  36%|███▌      | 101/282 [01:27<02:36,  1.16it/s]

Epoch 4, batch 100, loss 1.5702086687088013


Epoch 4:  71%|███████▏  | 201/282 [02:54<01:10,  1.15it/s]

Epoch 4, batch 200, loss 1.8844029903411865


Epoch 4: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 4: average loss = 1.8476974604823064


100%|██████████| 63/63 [01:04<00:00,  1.03s/it]


cider_score: 0.9322156014804525 [0.16600788 0.52955375 0.00571339 ... 0.31821115 2.1553633  3.36026135]


Epoch 5:   0%|          | 1/282 [00:00<03:54,  1.20it/s]

Epoch 5, batch 0, loss 1.7848509550094604


Epoch 5:  36%|███▌      | 101/282 [01:27<02:36,  1.16it/s]

Epoch 5, batch 100, loss 1.6923493146896362


Epoch 5:  71%|███████▏  | 201/282 [02:54<01:10,  1.15it/s]

Epoch 5, batch 200, loss 1.7710092067718506


Epoch 5: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 5: average loss = 1.7877674690375092


100%|██████████| 63/63 [00:49<00:00,  1.28it/s]


cider_score: 0.45071675443365977 [6.00706007e-04 1.63975698e-01 4.49808031e-02 ... 6.90335457e-01
 1.89471395e+00 2.47865388e-04]


Epoch 6:   0%|          | 1/282 [00:00<04:01,  1.16it/s]

Epoch 6, batch 0, loss 1.7448713779449463


Epoch 6:  36%|███▌      | 101/282 [01:27<02:35,  1.16it/s]

Epoch 6, batch 100, loss 1.7131677865982056


Epoch 6:  71%|███████▏  | 201/282 [02:54<01:10,  1.15it/s]

Epoch 6, batch 200, loss 1.8466103076934814


Epoch 6: 100%|██████████| 282/282 [04:04<00:00,  1.16it/s]


Epoch 6: average loss = 1.6893355795677671


100%|██████████| 63/63 [01:02<00:00,  1.01it/s]


cider_score: 0.6878218274732542 [7.87131508e-04 4.83192174e-04 4.38013053e-02 ... 6.74696644e-01
 2.15536330e+00 3.09801147e+00]
Best val cider: 0.9322156014804525


100%|██████████| 94/94 [01:34<00:00,  1.01s/it]


Start Training


Epoch 0:   0%|          | 1/282 [00:00<04:08,  1.13it/s]

Epoch 0, batch 0, loss 7.831020832061768


Epoch 0:  36%|███▌      | 101/282 [01:27<02:38,  1.14it/s]

Epoch 0, batch 100, loss 5.253675937652588


Epoch 0:  71%|███████▏  | 201/282 [02:54<01:09,  1.16it/s]

Epoch 0, batch 200, loss 5.2960076332092285


Epoch 0: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 0: average loss = 5.354172740422242


100%|██████████| 63/63 [06:22<00:00,  6.07s/it]


cider_score: 3.5996037329142277e-09 [2.66019655e-08 1.19206879e-16 6.98745139e-14 ... 1.37917849e-15
 2.12237314e-16 3.47409922e-19]


Epoch 1:   0%|          | 1/282 [00:00<03:55,  1.19it/s]

Epoch 1, batch 0, loss 5.170682907104492


Epoch 1:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 1, batch 100, loss 5.148082733154297


Epoch 1:  71%|███████▏  | 201/282 [02:54<01:10,  1.15it/s]

Epoch 1, batch 200, loss 5.203727722167969


Epoch 1: 100%|██████████| 282/282 [04:04<00:00,  1.16it/s]


Epoch 1: average loss = 5.130926218438656


100%|██████████| 63/63 [06:32<00:00,  6.23s/it]


cider_score: 3.847994381366675e-09 [6.92820988e-08 2.64854862e-16 1.71360239e-13 ... 2.84946164e-15
 4.13309651e-16 3.26340638e-19]


Epoch 2:   0%|          | 1/282 [00:00<04:05,  1.15it/s]

Epoch 2, batch 0, loss 5.066763877868652


Epoch 2:  36%|███▌      | 101/282 [01:27<02:36,  1.16it/s]

Epoch 2, batch 100, loss 5.084764003753662


Epoch 2:  71%|███████▏  | 201/282 [02:54<01:10,  1.16it/s]

Epoch 2, batch 200, loss 5.077171802520752


Epoch 2: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 2: average loss = 5.088567091218123


100%|██████████| 63/63 [06:23<00:00,  6.09s/it]


cider_score: 4.044579221361872e-09 [9.19502683e-08 2.46085543e-16 1.87775183e-13 ... 2.74881143e-15
 3.64492558e-16 3.27204723e-19]


Epoch 3:   0%|          | 1/282 [00:00<03:53,  1.20it/s]

Epoch 3, batch 0, loss 5.119492053985596


Epoch 3:  36%|███▌      | 101/282 [01:27<02:38,  1.14it/s]

Epoch 3, batch 100, loss 5.006854057312012


Epoch 3:  71%|███████▏  | 201/282 [02:54<01:09,  1.17it/s]

Epoch 3, batch 200, loss 5.039710521697998


Epoch 3: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 3: average loss = 5.05070927295279


100%|██████████| 63/63 [06:17<00:00,  5.99s/it]


cider_score: 3.805181988825939e-09 [8.06684965e-08 1.91355674e-16 9.66601505e-14 ... 2.92123048e-15
 4.28945122e-16 2.62430705e-19]


Epoch 4:   0%|          | 1/282 [00:00<04:03,  1.15it/s]

Epoch 4, batch 0, loss 4.9177093505859375


Epoch 4:  36%|███▌      | 101/282 [01:27<02:40,  1.13it/s]

Epoch 4, batch 100, loss 5.049308776855469


Epoch 4:  71%|███████▏  | 201/282 [02:54<01:11,  1.14it/s]

Epoch 4, batch 200, loss 5.104754447937012


Epoch 4: 100%|██████████| 282/282 [04:04<00:00,  1.16it/s]


Epoch 4: average loss = 5.017844490971125


100%|██████████| 63/63 [06:21<00:00,  6.06s/it]


cider_score: 3.623362183362227e-09 [8.22289022e-08 2.07351555e-16 9.87278758e-14 ... 3.04769219e-15
 4.16600086e-16 3.28974598e-19]


Epoch 5:   0%|          | 1/282 [00:00<03:54,  1.20it/s]

Epoch 5, batch 0, loss 4.927923679351807


Epoch 5:  36%|███▌      | 101/282 [01:26<02:38,  1.14it/s]

Epoch 5, batch 100, loss 5.303340911865234


Epoch 5:  71%|███████▏  | 201/282 [02:53<01:09,  1.17it/s]

Epoch 5, batch 200, loss 5.256870269775391


Epoch 5: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 5: average loss = 5.423399340176413


100%|██████████| 63/63 [00:13<00:00,  4.59it/s]


cider_score: 0.0 [0. 0. 0. ... 0. 0. 0.]


Epoch 6:   0%|          | 1/282 [00:00<03:55,  1.19it/s]

Epoch 6, batch 0, loss 5.2919182777404785


Epoch 6:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 6, batch 100, loss 5.296175479888916


Epoch 6:  71%|███████▏  | 201/282 [02:53<01:10,  1.15it/s]

Epoch 6, batch 200, loss 5.133472919464111


Epoch 6: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 6: average loss = 5.248465629334145


100%|██████████| 63/63 [00:14<00:00,  4.49it/s]


cider_score: 0.0 [0. 0. 0. ... 0. 0. 0.]
Best val cider: 4.044579221361872e-09


100%|██████████| 94/94 [09:38<00:00,  6.16s/it]


Start Training


Epoch 0:   0%|          | 1/282 [00:00<04:23,  1.06it/s]

Epoch 0, batch 0, loss 7.766271591186523


Epoch 0:  36%|███▌      | 101/282 [01:27<02:36,  1.15it/s]

Epoch 0, batch 100, loss 4.040194511413574


Epoch 0:  71%|███████▏  | 201/282 [02:53<01:09,  1.16it/s]

Epoch 0, batch 200, loss 3.2745540142059326


Epoch 0: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 0: average loss = 3.9653872927875384


100%|██████████| 63/63 [00:43<00:00,  1.43it/s]


cider_score: 0.3872557171948647 [4.70015821e-04 4.77805165e-01 1.64331157e-02 ... 6.12299171e-01
 8.25931092e-05 1.28793155e-04]


Epoch 1:   0%|          | 1/282 [00:00<04:02,  1.16it/s]

Epoch 1, batch 0, loss 2.8975603580474854


Epoch 1:  36%|███▌      | 101/282 [01:27<02:36,  1.16it/s]

Epoch 1, batch 100, loss 2.652040719985962


Epoch 1:  71%|███████▏  | 201/282 [02:54<01:09,  1.17it/s]

Epoch 1, batch 200, loss 2.7002909183502197


Epoch 1: 100%|██████████| 282/282 [04:04<00:00,  1.16it/s]


Epoch 1: average loss = 2.7560471287855863


100%|██████████| 63/63 [00:42<00:00,  1.47it/s]


cider_score: 0.36865064644265727 [1.85051344e-07 2.18354302e-04 0.00000000e+00 ... 6.12299171e-01
 8.23277303e-05 4.30412948e+00]


Epoch 2:   0%|          | 1/282 [00:00<03:56,  1.19it/s]

Epoch 2, batch 0, loss 2.5763731002807617


Epoch 2:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 2, batch 100, loss 2.501646041870117


Epoch 2:  71%|███████▏  | 201/282 [02:54<01:10,  1.15it/s]

Epoch 2, batch 200, loss 2.4166617393493652


Epoch 2: 100%|██████████| 282/282 [04:04<00:00,  1.16it/s]


Epoch 2: average loss = 2.485718850548386


100%|██████████| 63/63 [00:41<00:00,  1.51it/s]


cider_score: 0.41090785634368476 [1.85051344e-07 2.18354302e-04 0.00000000e+00 ... 6.12299171e-01
 8.23277303e-05 4.30412948e+00]


Epoch 3:   0%|          | 1/282 [00:00<03:56,  1.19it/s]

Epoch 3, batch 0, loss 2.4719631671905518


Epoch 3:  36%|███▌      | 101/282 [01:27<02:35,  1.17it/s]

Epoch 3, batch 100, loss 2.1610629558563232


Epoch 3:  71%|███████▏  | 201/282 [02:53<01:09,  1.17it/s]

Epoch 3, batch 200, loss 2.5773556232452393


Epoch 3: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 3: average loss = 2.327675366655309


100%|██████████| 63/63 [00:40<00:00,  1.55it/s]


cider_score: 0.42215681302400654 [1.85051344e-07 2.18354302e-04 0.00000000e+00 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]


Epoch 4:   0%|          | 1/282 [00:00<04:05,  1.15it/s]

Epoch 4, batch 0, loss 2.3964874744415283


Epoch 4:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 4, batch 100, loss 2.307138681411743


Epoch 4:  71%|███████▏  | 201/282 [02:54<01:10,  1.15it/s]

Epoch 4, batch 200, loss 2.114846706390381


Epoch 4: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 4: average loss = 2.218279536311508


100%|██████████| 63/63 [00:41<00:00,  1.52it/s]


cider_score: 0.45676063637749464 [1.85051344e-07 2.18354302e-04 0.00000000e+00 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]


Epoch 5:   0%|          | 1/282 [00:00<04:00,  1.17it/s]

Epoch 5, batch 0, loss 2.266842842102051


Epoch 5:  36%|███▌      | 101/282 [01:27<02:34,  1.17it/s]

Epoch 5, batch 100, loss 2.0975100994110107


Epoch 5:  71%|███████▏  | 201/282 [02:54<01:09,  1.17it/s]

Epoch 5, batch 200, loss 1.9816292524337769


Epoch 5: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 5: average loss = 2.138115558641177


100%|██████████| 63/63 [00:40<00:00,  1.56it/s]


cider_score: 0.4730086627082204 [1.85051344e-07 2.18354302e-04 0.00000000e+00 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]


Epoch 6:   0%|          | 1/282 [00:00<03:55,  1.19it/s]

Epoch 6, batch 0, loss 2.161433458328247


Epoch 6:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 6, batch 100, loss 2.1068403720855713


Epoch 6:  71%|███████▏  | 201/282 [02:54<01:11,  1.13it/s]

Epoch 6, batch 200, loss 1.9233067035675049


Epoch 6: 100%|██████████| 282/282 [04:04<00:00,  1.16it/s]


Epoch 6: average loss = 2.077764169544193


100%|██████████| 63/63 [00:40<00:00,  1.55it/s]


cider_score: 0.47872263104974816 [1.85051344e-07 2.18354302e-04 0.00000000e+00 ... 6.12299171e-01
 4.13905402e-02 4.30412948e+00]
Best val cider: 0.47872263104974816


100%|██████████| 94/94 [01:00<00:00,  1.55it/s]


Start Training


Epoch 0:   0%|          | 1/282 [00:01<04:49,  1.03s/it]

Epoch 0, batch 0, loss 7.896911144256592


Epoch 0:  36%|███▌      | 101/282 [01:27<02:36,  1.16it/s]

Epoch 0, batch 100, loss 2.838135004043579


Epoch 0:  71%|███████▏  | 201/282 [02:53<01:09,  1.17it/s]

Epoch 0, batch 200, loss 2.6694507598876953


Epoch 0: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 0: average loss = 3.1611169990918313


100%|██████████| 63/63 [00:42<00:00,  1.48it/s]


cider_score: 0.4157784296039144 [1.85051344e-07 2.18354302e-04 0.00000000e+00 ... 6.12299171e-01
 8.23277303e-05 4.30412948e+00]


Epoch 1:   0%|          | 1/282 [00:00<04:05,  1.15it/s]

Epoch 1, batch 0, loss 2.2535760402679443


Epoch 1:  36%|███▌      | 101/282 [01:27<02:35,  1.16it/s]

Epoch 1, batch 100, loss 2.380115270614624


Epoch 1:  71%|███████▏  | 201/282 [02:54<01:10,  1.15it/s]

Epoch 1, batch 200, loss 2.4243969917297363


Epoch 1: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 1: average loss = 2.2885761451213917


100%|██████████| 63/63 [00:42<00:00,  1.47it/s]


cider_score: 0.48929116517633253 [1.85051344e-07 2.18354302e-04 0.00000000e+00 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]


Epoch 2:   0%|          | 1/282 [00:00<04:00,  1.17it/s]

Epoch 2, batch 0, loss 2.241257667541504


Epoch 2:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 2, batch 100, loss 2.109250068664551


Epoch 2:  71%|███████▏  | 201/282 [02:54<01:10,  1.15it/s]

Epoch 2, batch 200, loss 2.0641307830810547


Epoch 2: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 2: average loss = 2.0756247449428478


100%|██████████| 63/63 [00:40<00:00,  1.55it/s]


cider_score: 0.5538796123065297 [1.24486624e-03 5.09574427e-01 0.00000000e+00 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]


Epoch 3:   0%|          | 1/282 [00:00<03:54,  1.20it/s]

Epoch 3, batch 0, loss 1.9259092807769775


Epoch 3:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 3, batch 100, loss 1.9403098821640015


Epoch 3:  71%|███████▏  | 201/282 [02:54<01:10,  1.15it/s]

Epoch 3, batch 200, loss 1.8173209428787231


Epoch 3: 100%|██████████| 282/282 [04:04<00:00,  1.16it/s]


Epoch 3: average loss = 1.9454241094859779


100%|██████████| 63/63 [00:43<00:00,  1.46it/s]


cider_score: 0.506160042221869 [1.24486624e-03 1.12196377e+00 0.00000000e+00 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]


Epoch 4:   0%|          | 1/282 [00:00<03:58,  1.18it/s]

Epoch 4, batch 0, loss 1.8974850177764893


Epoch 4:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 4, batch 100, loss 1.6965349912643433


Epoch 4:  71%|███████▏  | 201/282 [02:54<01:10,  1.15it/s]

Epoch 4, batch 200, loss 1.8097723722457886


Epoch 4: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 4: average loss = 1.856395272075707


100%|██████████| 63/63 [00:43<00:00,  1.45it/s]


cider_score: 0.5965459679217651 [1.24486624e-03 5.29553752e-01 4.86108283e-02 ... 3.10639851e-01
 4.68069768e-01 2.25463708e-04]


Epoch 5:   0%|          | 1/282 [00:00<03:58,  1.18it/s]

Epoch 5, batch 0, loss 1.7338478565216064


Epoch 5:  36%|███▌      | 101/282 [01:28<02:38,  1.14it/s]

Epoch 5, batch 100, loss 1.917253017425537


Epoch 5:  71%|███████▏  | 201/282 [02:55<01:11,  1.13it/s]

Epoch 5, batch 200, loss 1.6412014961242676


Epoch 5: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 5: average loss = 1.7834699263809421


100%|██████████| 63/63 [00:41<00:00,  1.51it/s]


cider_score: 0.5329764583648104 [1.24486624e-03 5.29553752e-01 0.00000000e+00 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]


Epoch 6:   0%|          | 1/282 [00:00<03:55,  1.19it/s]

Epoch 6, batch 0, loss 1.685945987701416


Epoch 6:  36%|███▌      | 101/282 [01:28<02:36,  1.15it/s]

Epoch 6, batch 100, loss 1.658734679222107


Epoch 6:  71%|███████▏  | 201/282 [02:55<01:09,  1.16it/s]

Epoch 6, batch 200, loss 1.7349289655685425


Epoch 6: 100%|██████████| 282/282 [04:05<00:00,  1.15it/s]


Epoch 6: average loss = 1.719718744568791


100%|██████████| 63/63 [00:41<00:00,  1.51it/s]


cider_score: 0.5902290326426451 [1.24486624e-03 5.77393559e-01 4.86108283e-02 ... 6.12299171e-01
 4.68069768e-01 2.97369481e+00]
Best val cider: 0.5965459679217651


100%|██████████| 94/94 [01:01<00:00,  1.53it/s]


Start Training


Epoch 0:   0%|          | 1/282 [00:01<04:48,  1.03s/it]

Epoch 0, batch 0, loss 7.437476634979248


Epoch 0:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 0, batch 100, loss 2.8805558681488037


Epoch 0:  71%|███████▏  | 201/282 [02:54<01:09,  1.16it/s]

Epoch 0, batch 200, loss 2.539367437362671


Epoch 0: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 0: average loss = 2.9025648185547364


100%|██████████| 63/63 [00:40<00:00,  1.55it/s]


cider_score: 0.44080257087171093 [1.85051344e-07 2.18354302e-04 0.00000000e+00 ... 6.12299171e-01
 8.23277303e-05 4.30412948e+00]


Epoch 1:   0%|          | 1/282 [00:00<03:56,  1.19it/s]

Epoch 1, batch 0, loss 2.2266576290130615


Epoch 1:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 1, batch 100, loss 2.0297863483428955


Epoch 1:  71%|███████▏  | 201/282 [02:54<01:11,  1.13it/s]

Epoch 1, batch 200, loss 2.113947868347168


Epoch 1: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 1: average loss = 2.122054342259752


100%|██████████| 63/63 [00:42<00:00,  1.48it/s]


cider_score: 0.5089190451588692 [1.24486624e-03 2.18354302e-04 0.00000000e+00 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]


Epoch 2:   0%|          | 1/282 [00:00<04:01,  1.16it/s]

Epoch 2, batch 0, loss 2.114386796951294


Epoch 2:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 2, batch 100, loss 1.9452495574951172


Epoch 2:  71%|███████▏  | 201/282 [02:54<01:11,  1.13it/s]

Epoch 2, batch 200, loss 2.062147378921509


Epoch 2: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 2: average loss = 1.9275906166286334


100%|██████████| 63/63 [00:41<00:00,  1.52it/s]


cider_score: 0.7103642672477752 [1.24486624e-03 1.12196377e+00 2.29687410e-02 ... 6.12299171e-01
 4.68069768e-01 1.33486728e+00]


Epoch 3:   0%|          | 1/282 [00:00<03:59,  1.17it/s]

Epoch 3, batch 0, loss 1.7394592761993408


Epoch 3:  36%|███▌      | 101/282 [01:27<02:39,  1.14it/s]

Epoch 3, batch 100, loss 1.8009711503982544


Epoch 3:  71%|███████▏  | 201/282 [02:54<01:11,  1.13it/s]

Epoch 3, batch 200, loss 1.8129178285598755


Epoch 3: 100%|██████████| 282/282 [04:05<00:00,  1.15it/s]


Epoch 3: average loss = 1.803941638757151


100%|██████████| 63/63 [00:41<00:00,  1.53it/s]


cider_score: 0.8025332059811857 [1.24486624e-03 5.29553752e-01 4.86108283e-02 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]


Epoch 4:   0%|          | 1/282 [00:00<04:01,  1.17it/s]

Epoch 4, batch 0, loss 1.4863499402999878


Epoch 4:  36%|███▌      | 101/282 [01:27<02:40,  1.13it/s]

Epoch 4, batch 100, loss 1.783321499824524


Epoch 4:  71%|███████▏  | 201/282 [02:54<01:10,  1.14it/s]

Epoch 4, batch 200, loss 1.7275774478912354


Epoch 4: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 4: average loss = 1.7101537728985996


100%|██████████| 63/63 [00:38<00:00,  1.64it/s]


cider_score: 0.6991156904836298 [1.24486624e-03 1.12196377e+00 4.86108283e-02 ... 2.96970742e-01
 4.68069768e-01 4.30412948e+00]


Epoch 5:   0%|          | 1/282 [00:00<04:05,  1.14it/s]

Epoch 5, batch 0, loss 1.6996400356292725


Epoch 5:  36%|███▌      | 101/282 [01:27<02:34,  1.17it/s]

Epoch 5, batch 100, loss 1.6482959985733032


Epoch 5:  71%|███████▏  | 201/282 [02:54<01:11,  1.14it/s]

Epoch 5, batch 200, loss 1.563628911972046


Epoch 5: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 5: average loss = 1.6364379334957042


100%|██████████| 63/63 [00:40<00:00,  1.55it/s]


cider_score: 0.8193732019185392 [1.24486624e-03 5.29553752e-01 4.86108283e-02 ... 5.57562250e-01
 4.68069768e-01 4.30412948e+00]


Epoch 6:   0%|          | 1/282 [00:00<03:59,  1.18it/s]

Epoch 6, batch 0, loss 1.3401316404342651


Epoch 6:  36%|███▌      | 101/282 [01:28<02:38,  1.14it/s]

Epoch 6, batch 100, loss 1.5418198108673096


Epoch 6:  71%|███████▏  | 201/282 [02:54<01:11,  1.14it/s]

Epoch 6, batch 200, loss 1.4507759809494019


Epoch 6: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 6: average loss = 1.5753938762854176


100%|██████████| 63/63 [00:41<00:00,  1.50it/s]


cider_score: 0.828999795632129 [1.24486624e-03 8.85567017e-01 4.86108283e-02 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]
Best val cider: 0.828999795632129


100%|██████████| 94/94 [00:58<00:00,  1.61it/s]


Start Training


Epoch 0:   0%|          | 1/282 [00:00<04:33,  1.03it/s]

Epoch 0, batch 0, loss 7.614601135253906


Epoch 0:  36%|███▌      | 101/282 [01:28<02:37,  1.15it/s]

Epoch 0, batch 100, loss 2.5594234466552734


Epoch 0:  71%|███████▏  | 201/282 [02:55<01:09,  1.16it/s]

Epoch 0, batch 200, loss 2.1716995239257812


Epoch 0: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 0: average loss = 2.728222966194153


100%|██████████| 63/63 [00:46<00:00,  1.35it/s]


cider_score: 0.5091556152859794 [1.97242094e-05 4.20108423e-02 7.28769945e-03 ... 6.12299171e-01
 4.68069768e-01 2.53497046e+00]


Epoch 1:   0%|          | 1/282 [00:00<04:04,  1.15it/s]

Epoch 1, batch 0, loss 2.2235918045043945


Epoch 1:  36%|███▌      | 101/282 [01:27<02:39,  1.14it/s]

Epoch 1, batch 100, loss 2.065016508102417


Epoch 1:  71%|███████▏  | 201/282 [02:54<01:10,  1.16it/s]

Epoch 1, batch 200, loss 2.197023630142212


Epoch 1: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 1: average loss = 2.0078064346989843


100%|██████████| 63/63 [00:38<00:00,  1.64it/s]


cider_score: 0.7466887734101961 [2.54790884e-05 9.14164802e-01 3.89667235e-03 ... 6.12299171e-01
 4.68069768e-01 1.51239492e+00]


Epoch 2:   0%|          | 1/282 [00:00<04:05,  1.15it/s]

Epoch 2, batch 0, loss 1.6653501987457275


Epoch 2:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 2, batch 100, loss 1.8572973012924194


Epoch 2:  71%|███████▏  | 201/282 [02:54<01:11,  1.14it/s]

Epoch 2, batch 200, loss 1.8065179586410522


Epoch 2: 100%|██████████| 282/282 [04:04<00:00,  1.16it/s]


Epoch 2: average loss = 1.8171251369706283


100%|██████████| 63/63 [00:41<00:00,  1.53it/s]


cider_score: 0.8412571894917185 [0.00446472 0.5755013  0.02364989 ... 0.61229917 0.46806977 1.51239492]


Epoch 3:   0%|          | 1/282 [00:00<04:05,  1.14it/s]

Epoch 3, batch 0, loss 1.7159595489501953


Epoch 3:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 3, batch 100, loss 1.665363073348999


Epoch 3:  71%|███████▏  | 201/282 [02:54<01:10,  1.16it/s]

Epoch 3, batch 200, loss 1.646863579750061


Epoch 3: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 3: average loss = 1.7132057856160698


100%|██████████| 63/63 [00:44<00:00,  1.42it/s]


cider_score: 0.7573777730492557 [1.24486624e-03 9.14164802e-01 2.24347963e-01 ... 9.47945888e-01
 4.68069768e-01 1.51239492e+00]


Epoch 4:   0%|          | 1/282 [00:00<04:00,  1.17it/s]

Epoch 4, batch 0, loss 1.5795938968658447


Epoch 4:  36%|███▌      | 101/282 [01:27<02:36,  1.16it/s]

Epoch 4, batch 100, loss 1.8607826232910156


Epoch 4:  71%|███████▏  | 201/282 [02:55<01:10,  1.15it/s]

Epoch 4, batch 200, loss 1.6930853128433228


Epoch 4: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 4: average loss = 1.6123492510606212


100%|██████████| 63/63 [00:42<00:00,  1.49it/s]


cider_score: 0.6672063388740586 [1.24486624e-03 1.12196377e+00 4.86108283e-02 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]


Epoch 5:   0%|          | 1/282 [00:00<04:01,  1.16it/s]

Epoch 5, batch 0, loss 1.602615475654602


Epoch 5:  36%|███▌      | 101/282 [01:27<02:38,  1.14it/s]

Epoch 5, batch 100, loss 1.5753254890441895


Epoch 5:  71%|███████▏  | 201/282 [02:54<01:11,  1.13it/s]

Epoch 5, batch 200, loss 1.588120937347412


Epoch 5: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 5: average loss = 1.5399917867166777


100%|██████████| 63/63 [00:42<00:00,  1.49it/s]


cider_score: 0.731603567492537 [2.51449928e-03 1.12196377e+00 0.00000000e+00 ... 5.92353505e-01
 4.68069768e-01 4.30412948e+00]


Epoch 6:   0%|          | 1/282 [00:00<04:06,  1.14it/s]

Epoch 6, batch 0, loss 1.567973256111145


Epoch 6:  36%|███▌      | 101/282 [01:27<02:36,  1.15it/s]

Epoch 6, batch 100, loss 1.5695298910140991


Epoch 6:  71%|███████▏  | 201/282 [02:54<01:09,  1.16it/s]

Epoch 6, batch 200, loss 1.479878306388855


Epoch 6: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 6: average loss = 1.4798477600652276


100%|██████████| 63/63 [00:44<00:00,  1.41it/s]


cider_score: 0.8433494976474637 [1.24486624e-03 5.29553752e-01 4.86108283e-02 ... 4.77548971e-01
 1.35021017e-01 4.30412948e+00]
Best val cider: 0.8433494976474637


100%|██████████| 94/94 [01:03<00:00,  1.47it/s]


Start Training


Epoch 0:   0%|          | 1/282 [00:00<04:25,  1.06it/s]

Epoch 0, batch 0, loss 7.839865207672119


Epoch 0:  36%|███▌      | 101/282 [01:27<02:36,  1.16it/s]

Epoch 0, batch 100, loss 2.4388928413391113


Epoch 0:  71%|███████▏  | 201/282 [02:54<01:09,  1.16it/s]

Epoch 0, batch 200, loss 2.218437433242798


Epoch 0: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 0: average loss = 2.661367431177315


100%|██████████| 63/63 [00:46<00:00,  1.36it/s]


cider_score: 0.4813147350998661 [6.72794717e-04 5.09574427e-01 0.00000000e+00 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]


Epoch 1:   0%|          | 1/282 [00:00<03:55,  1.19it/s]

Epoch 1, batch 0, loss 2.0197789669036865


Epoch 1:  36%|███▌      | 101/282 [01:27<02:32,  1.19it/s]

Epoch 1, batch 100, loss 1.8937129974365234


Epoch 1:  71%|███████▏  | 201/282 [02:54<01:10,  1.15it/s]

Epoch 1, batch 200, loss 1.9893995523452759


Epoch 1: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 1: average loss = 1.952819861418812


100%|██████████| 63/63 [00:39<00:00,  1.59it/s]


cider_score: 0.6190671376169227 [6.72794717e-04 9.14164802e-01 0.00000000e+00 ... 3.10639851e-01
 4.68069768e-01 4.30412948e+00]


Epoch 2:   0%|          | 1/282 [00:00<03:56,  1.19it/s]

Epoch 2, batch 0, loss 1.815085530281067


Epoch 2:  36%|███▌      | 101/282 [01:27<02:36,  1.16it/s]

Epoch 2, batch 100, loss 1.6220976114273071


Epoch 2:  71%|███████▏  | 201/282 [02:54<01:09,  1.16it/s]

Epoch 2, batch 200, loss 1.7594002485275269


Epoch 2: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 2: average loss = 1.760380597824746


100%|██████████| 63/63 [00:44<00:00,  1.43it/s]


cider_score: 0.6593342149886902 [2.51449928e-03 3.53669805e-01 4.86108283e-02 ... 2.96970742e-01
 4.68069768e-01 2.97369481e+00]


Epoch 3:   0%|          | 1/282 [00:00<03:55,  1.19it/s]

Epoch 3, batch 0, loss 1.4945505857467651


Epoch 3:  36%|███▌      | 101/282 [01:27<02:35,  1.16it/s]

Epoch 3, batch 100, loss 1.6364885568618774


Epoch 3:  71%|███████▏  | 201/282 [02:54<01:10,  1.16it/s]

Epoch 3, batch 200, loss 1.524720311164856


Epoch 3: 100%|██████████| 282/282 [04:04<00:00,  1.16it/s]


Epoch 3: average loss = 1.6412484442934077


100%|██████████| 63/63 [00:44<00:00,  1.43it/s]


cider_score: 0.7212542219216637 [2.51449928e-03 1.12196377e+00 4.86108283e-02 ... 6.12299171e-01
 4.68069768e-01 4.30412948e+00]


Epoch 4:   0%|          | 1/282 [00:00<03:59,  1.17it/s]

Epoch 4, batch 0, loss 1.6160720586776733


Epoch 4:  36%|███▌      | 101/282 [01:27<02:34,  1.17it/s]

Epoch 4, batch 100, loss 1.492222785949707


Epoch 4:  71%|███████▏  | 201/282 [02:54<01:10,  1.14it/s]

Epoch 4, batch 200, loss 1.6168767213821411


Epoch 4: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 4: average loss = 1.5558170633958586


100%|██████████| 63/63 [00:45<00:00,  1.37it/s]


cider_score: 0.9132030191528517 [2.51449928e-03 9.14164802e-01 4.86108283e-02 ... 6.12299171e-01
 1.14824174e+00 2.97369481e+00]


Epoch 5:   0%|          | 1/282 [00:00<04:06,  1.14it/s]

Epoch 5, batch 0, loss 1.4580496549606323


Epoch 5:  36%|███▌      | 101/282 [01:27<02:36,  1.16it/s]

Epoch 5, batch 100, loss 1.404758095741272


Epoch 5:  71%|███████▏  | 201/282 [02:54<01:10,  1.14it/s]

Epoch 5, batch 200, loss 1.4612725973129272


Epoch 5: 100%|██████████| 282/282 [04:04<00:00,  1.15it/s]


Epoch 5: average loss = 1.4800555164932359


100%|██████████| 63/63 [00:42<00:00,  1.48it/s]


cider_score: 0.5885478989278491 [1.24486624e-03 5.29553752e-01 4.86108283e-02 ... 2.96970742e-01
 4.68069768e-01 2.25463708e-04]


Epoch 6:   0%|          | 1/282 [00:00<03:55,  1.19it/s]

Epoch 6, batch 0, loss 1.350040078163147


Epoch 6:  36%|███▌      | 101/282 [01:27<02:37,  1.15it/s]

Epoch 6, batch 100, loss 1.3906853199005127


Epoch 6:  71%|███████▏  | 201/282 [02:54<01:10,  1.15it/s]

Epoch 6, batch 200, loss 1.2965525388717651


Epoch 6: 100%|██████████| 282/282 [04:03<00:00,  1.16it/s]


Epoch 6: average loss = 1.412235528019303


100%|██████████| 63/63 [00:45<00:00,  1.38it/s]


cider_score: 0.6723483030455616 [1.24486624e-03 5.29553752e-01 4.86108283e-02 ... 6.12299171e-01
 4.68069768e-01 2.63103707e-04]
Best val cider: 0.9132030191528517


100%|██████████| 94/94 [01:05<00:00,  1.44it/s]
