### 임포트

In [1]:
import torch
import os
import sys
import numpy as np
import random

# custom
from util import *
from transformers import GPT2Tokenizer
from AAC_Prefix.AAC_Prefix import * # network
from Train import *

### 기타 값들 설정

In [2]:
# prefix vector 크기 설정
temporal_prefix_size = 15 # 0 or 15
global_prefix_size = 11 # 0 or 11

prefix_size = temporal_prefix_size + global_prefix_size 

# mapping network가 사용할 transformer의 스펙 설정
transformer_num_layers = {"temporal_num_layers" : 4, "global_num_layers" : 4}
prefix_size_dict = {"temporal_prefix_size" : temporal_prefix_size, "global_prefix_size" : global_prefix_size}


data_dir = './AudioCaps'
MODEL_NAME = 'add_exp_train_audiocaps_test_audiocaps'

epochs = 50
LR = 5e-5

TEST_BATCH_SIZE = 5
TRAIN_BATCH_SIZE = 75

random_seed=2766
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
torch.backends.cudnn.benchmark=False
torch.backends.cudnn.deterministic=True
np.random.seed(random_seed)
random.seed(random_seed)  

USE_CUDA = torch.cuda.is_available() 
device = torch.device('cuda' if USE_CUDA else 'cpu')

### Tokenizer, Dataloader 불러오기

In [3]:
tokenizer_type = 'Custom'
tokenizer = tokenizer_forCustomVocab(Dataset = 'AudioCaps')
vocab_size = len(tokenizer.vocab)

In [4]:
test_dataloader  = CreateDataloader(tokenizer, data_dir, TEST_BATCH_SIZE, 'test', prefix_size, is_TrainDataset = False, tokenizer_type = tokenizer_type)
train_dataloader = CreateDataloader(tokenizer, data_dir, TRAIN_BATCH_SIZE, 'train', prefix_size, is_TrainDataset = True, tokenizer_type = tokenizer_type)

get dataset...: 100%|███████████████████████| 960/960 [00:00<00:00, 1400.21it/s]
get dataset...: 100%|████████████████████| 49276/49276 [03:00<00:00, 272.83it/s]


### 학습결과 정리하는 폴더 생성하기

In [5]:
directory = "./Train_record/params_" + MODEL_NAME
try:
    if not os.path.exists(directory):
        os.makedirs(directory)
except OSError:
    print("Error: Failed to create the directory.")


### 모델 초기화

In [6]:
model = get_AAC_Prefix(tokenizer, 
                        vocab_size = vocab_size, Dataset = 'AudioCaps',
                        prefix_size_dict = prefix_size_dict, transformer_num_layers = transformer_num_layers, 
                        encoder_freeze = True, decoder_freeze = True,
                        pretrain_fromAudioCaps = False, device = device)

  fft_window = librosa.util.pad_center(fft_window, n_fft)
  return f(*args, **kwargs)


use Custom Tokenizer
temporal feature's mapping network : num_head = 8 num_layers = 4
global feature ver's mapping network : num_head = 8 num_layers = 4
use custom header!
Encoder freezing
GPT2 freezing


### 학습 & 평가

In [7]:
Train(model, LR, train_dataloader, test_dataloader,
    epochs, model_name = MODEL_NAME, beam_search = True, device = device,
    Dataset = 'AudioCaps', test_dataloader_other_dataset = None)

Training Epoch 0, Loss = 12.14145: 100%|██████| 656/656 [05:04<00:00,  2.16it/s]
Training Epoch 1, Loss = 8.33692: 100%|███████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 2, Loss = 6.52476: 100%|███████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 3, Loss = 5.60416: 100%|███████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 4, Loss = 5.45392: 100%|███████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 5, Loss = 5.32693: 100%|███████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 6, Loss = 5.1957: 100%|████████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 7, Loss = 5.06584: 100%|███████| 656/656 [05:04<00:00,  2.16it/s]
Training Epoch 8, Loss = 4.9322: 100%|████████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 9, Loss = 4.79338: 100%|███████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 10, Loss = 4.67208: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 11, Loss = 4.55623: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 12, Loss = 4.

loading annotations into memory...
0:00:00.006509
creating index...
index created!
Loading and preparing results...     
DONE (t=0.00s)
creating index...
index created!
tokenization...


PTBTokenizer tokenized 59528 tokens at 423690.93 tokens per second.
PTBTokenizer tokenized 8691 tokens at 104160.26 tokens per second.


setting up scorers...
computing Bleu score...
{'testlen': 6778, 'reflen': 7215, 'guess': [6778, 5821, 4868, 3999], 'correct': [3863, 1710, 636, 117]}
ratio: 0.9394317394316092
Bleu_1: 0.534
Bleu_2: 0.384
Bleu_3: 0.262
Bleu_4: 0.149
computing METEOR score...
METEOR: 0.170
computing Rouge score...
ROUGE_L: 0.428
computing CIDEr score...
CIDEr: 0.425
computing SPICE score...


Parsing reference captions
Parsing test captions
Initiating Stanford parsing pipeline
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator tokenize
[main] INFO edu.stanford.nlp.pipeline.TokenizerAnnotator - TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer.
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ssplit
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator parse
[main] INFO edu.stanford.nlp.parser.common.ParserGrammar - Loading parser from serialized file edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz ... 
done [0.4 sec].
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator lemma
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ner
Loading classifier from edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz ... done [1.4 sec].
Loading classifier from edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz ... done [0.6

SPICE evaluation took: 13.88 s
SPICE: 0.122
computing SPIDEr score...
SPIDEr: 0.274


Training Epoch 15, Loss = 4.24011: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 16, Loss = 4.18198: 100%|██████| 656/656 [05:04<00:00,  2.15it/s]
Training Epoch 17, Loss = 4.13129: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 18, Loss = 4.08131: 100%|██████| 656/656 [05:04<00:00,  2.15it/s]
Training Epoch 19, Loss = 4.03766: 100%|██████| 656/656 [05:04<00:00,  2.15it/s]
Eval using dataset...: 100%|██████████████████| 957/957 [03:29<00:00,  4.56it/s]


loading annotations into memory...
0:00:00.006214
creating index...
index created!
Loading and preparing results...     
DONE (t=0.00s)
creating index...
index created!
tokenization...


PTBTokenizer tokenized 59528 tokens at 438979.19 tokens per second.
PTBTokenizer tokenized 8734 tokens at 106200.34 tokens per second.


setting up scorers...
computing Bleu score...
{'testlen': 6821, 'reflen': 7232, 'guess': [6821, 5864, 4912, 4013], 'correct': [4294, 2020, 828, 221]}
ratio: 0.9431692477874801
Bleu_1: 0.593
Bleu_2: 0.438
Bleu_3: 0.312
Bleu_4: 0.199
computing METEOR score...
METEOR: 0.188
computing Rouge score...
ROUGE_L: 0.454
computing CIDEr score...
CIDEr: 0.494
computing SPICE score...


Parsing reference captions
Parsing test captions
Initiating Stanford parsing pipeline
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator tokenize
[main] INFO edu.stanford.nlp.pipeline.TokenizerAnnotator - TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer.
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ssplit
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator parse
[main] INFO edu.stanford.nlp.parser.common.ParserGrammar - Loading parser from serialized file edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz ... 
done [0.4 sec].
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator lemma
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ner
Loading classifier from edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz ... done [1.4 sec].
Loading classifier from edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz ... done [0.6

SPICE evaluation took: 12.55 s
SPICE: 0.133
computing SPIDEr score...
SPIDEr: 0.314


Training Epoch 20, Loss = 3.99489: 100%|██████| 656/656 [05:04<00:00,  2.16it/s]
Training Epoch 21, Loss = 3.95396: 100%|██████| 656/656 [05:04<00:00,  2.16it/s]
Training Epoch 22, Loss = 3.9203: 100%|███████| 656/656 [05:04<00:00,  2.16it/s]
Training Epoch 23, Loss = 3.88401: 100%|██████| 656/656 [05:04<00:00,  2.16it/s]
Training Epoch 24, Loss = 3.85286: 100%|██████| 656/656 [05:04<00:00,  2.16it/s]
Eval using dataset...: 100%|██████████████████| 957/957 [03:09<00:00,  5.04it/s]


loading annotations into memory...
0:00:00.006333
creating index...
index created!
Loading and preparing results...     
DONE (t=0.00s)
creating index...
index created!
tokenization...


PTBTokenizer tokenized 59528 tokens at 435440.43 tokens per second.
PTBTokenizer tokenized 8967 tokens at 106913.66 tokens per second.


setting up scorers...
computing Bleu score...
{'testlen': 7048, 'reflen': 7292, 'guess': [7048, 6091, 5136, 4199], 'correct': [4562, 2264, 1002, 302]}
ratio: 0.9665386725176951
Bleu_1: 0.625
Bleu_2: 0.474
Bleu_3: 0.348
Bleu_4: 0.233
computing METEOR score...
METEOR: 0.206
computing Rouge score...
ROUGE_L: 0.475
computing CIDEr score...
CIDEr: 0.562
computing SPICE score...


Parsing reference captions
Parsing test captions
Initiating Stanford parsing pipeline
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator tokenize
[main] INFO edu.stanford.nlp.pipeline.TokenizerAnnotator - TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer.
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ssplit
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator parse
[main] INFO edu.stanford.nlp.parser.common.ParserGrammar - Loading parser from serialized file edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz ... 
done [0.4 sec].
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator lemma
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ner
Loading classifier from edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz ... done [1.4 sec].
Loading classifier from edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz ... done [0.6

SPICE evaluation took: 11.86 s
SPICE: 0.152
computing SPIDEr score...
SPIDEr: 0.357


Training Epoch 25, Loss = 3.82215: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 26, Loss = 3.79466: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 27, Loss = 3.76821: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 28, Loss = 3.74987: 100%|██████| 656/656 [05:04<00:00,  2.16it/s]
Training Epoch 29, Loss = 3.72865: 100%|██████| 656/656 [05:04<00:00,  2.16it/s]
Eval using dataset...: 100%|██████████████████| 957/957 [03:05<00:00,  5.17it/s]


loading annotations into memory...
0:00:00.006336
creating index...
index created!
Loading and preparing results...     
DONE (t=0.00s)
creating index...
index created!
tokenization...


PTBTokenizer tokenized 59528 tokens at 425442.81 tokens per second.
PTBTokenizer tokenized 8667 tokens at 99123.78 tokens per second.


setting up scorers...
computing Bleu score...
{'testlen': 6738, 'reflen': 7005, 'guess': [6738, 5781, 4824, 3880], 'correct': [4458, 2199, 974, 310]}
ratio: 0.9618843683082138
Bleu_1: 0.636
Bleu_2: 0.482
Bleu_3: 0.356
Bleu_4: 0.243
computing METEOR score...
METEOR: 0.207
computing Rouge score...
ROUGE_L: 0.475
computing CIDEr score...
CIDEr: 0.555
computing SPICE score...


Parsing reference captions
Parsing test captions
Initiating Stanford parsing pipeline
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator tokenize
[main] INFO edu.stanford.nlp.pipeline.TokenizerAnnotator - TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer.
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ssplit
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator parse
[main] INFO edu.stanford.nlp.parser.common.ParserGrammar - Loading parser from serialized file edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz ... 
done [0.4 sec].
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator lemma
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ner
Loading classifier from edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz ... done [1.4 sec].
Loading classifier from edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz ... done [0.6

SPICE evaluation took: 11.67 s
SPICE: 0.152
computing SPIDEr score...
SPIDEr: 0.354


Training Epoch 30, Loss = 3.71035: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 31, Loss = 3.68996: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 32, Loss = 3.67706: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 33, Loss = 3.66128: 100%|██████| 656/656 [05:05<00:00,  2.15it/s]
Training Epoch 34, Loss = 3.64922: 100%|██████| 656/656 [05:04<00:00,  2.16it/s]
Eval using dataset...: 100%|██████████████████| 957/957 [03:11<00:00,  4.99it/s]


loading annotations into memory...
0:00:00.006646
creating index...
index created!
Loading and preparing results...     
DONE (t=0.00s)
creating index...
index created!
tokenization...


PTBTokenizer tokenized 59528 tokens at 438137.04 tokens per second.
PTBTokenizer tokenized 9017 tokens at 101026.78 tokens per second.


setting up scorers...
computing Bleu score...
{'testlen': 7090, 'reflen': 7214, 'guess': [7090, 6133, 5176, 4227], 'correct': [4647, 2343, 1070, 354]}
ratio: 0.9828112004434456
Bleu_1: 0.644
Bleu_2: 0.492
Bleu_3: 0.366
Bleu_4: 0.252
computing METEOR score...
METEOR: 0.210
computing Rouge score...
ROUGE_L: 0.483
computing CIDEr score...
CIDEr: 0.584
computing SPICE score...


Parsing reference captions
Parsing test captions
Initiating Stanford parsing pipeline
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator tokenize
[main] INFO edu.stanford.nlp.pipeline.TokenizerAnnotator - TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer.
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ssplit
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator parse
[main] INFO edu.stanford.nlp.parser.common.ParserGrammar - Loading parser from serialized file edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz ... 
done [0.5 sec].
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator lemma
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ner
Loading classifier from edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz ... done [1.5 sec].
Loading classifier from edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz ... done [0.6

SPICE evaluation took: 10.89 s
SPICE: 0.158
computing SPIDEr score...
SPIDEr: 0.371


Training Epoch 35, Loss = 3.63699: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 36, Loss = 3.6286: 100%|███████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 37, Loss = 3.61825: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 38, Loss = 3.60905: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 39, Loss = 3.60317: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Eval using dataset...: 100%|██████████████████| 957/957 [03:09<00:00,  5.06it/s]


loading annotations into memory...
0:00:00.006525
creating index...
index created!
Loading and preparing results...     
DONE (t=0.00s)
creating index...
index created!
tokenization...


PTBTokenizer tokenized 59528 tokens at 413823.60 tokens per second.
PTBTokenizer tokenized 8831 tokens at 104260.42 tokens per second.


setting up scorers...
computing Bleu score...
{'testlen': 6890, 'reflen': 7015, 'guess': [6890, 5933, 4976, 4027], 'correct': [4614, 2336, 1082, 367]}
ratio: 0.9821810406270873
Bleu_1: 0.658
Bleu_2: 0.504
Bleu_3: 0.379
Bleu_4: 0.264
computing METEOR score...
METEOR: 0.214
computing Rouge score...
ROUGE_L: 0.485
computing CIDEr score...
CIDEr: 0.602
computing SPICE score...


Parsing reference captions
Parsing test captions
Initiating Stanford parsing pipeline
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator tokenize
[main] INFO edu.stanford.nlp.pipeline.TokenizerAnnotator - TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer.
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ssplit
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator parse
[main] INFO edu.stanford.nlp.parser.common.ParserGrammar - Loading parser from serialized file edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz ... 
done [0.4 sec].
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator lemma
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ner
Loading classifier from edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz ... done [1.4 sec].
Loading classifier from edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz ... done [0.6

SPICE evaluation took: 10.45 s
SPICE: 0.163
computing SPIDEr score...
SPIDEr: 0.382


Training Epoch 40, Loss = 3.59616: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 41, Loss = 3.59048: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 42, Loss = 3.58667: 100%|██████| 656/656 [05:04<00:00,  2.16it/s]
Training Epoch 43, Loss = 3.58307: 100%|██████| 656/656 [05:04<00:00,  2.15it/s]
Training Epoch 44, Loss = 3.58048: 100%|██████| 656/656 [05:04<00:00,  2.15it/s]
Eval using dataset...: 100%|██████████████████| 957/957 [03:11<00:00,  5.00it/s]


loading annotations into memory...
0:00:00.006330
creating index...
index created!
Loading and preparing results...     
DONE (t=0.00s)
creating index...
index created!
tokenization...


PTBTokenizer tokenized 59528 tokens at 428421.15 tokens per second.
PTBTokenizer tokenized 9045 tokens at 106241.87 tokens per second.


setting up scorers...
computing Bleu score...
{'testlen': 7098, 'reflen': 7201, 'guess': [7098, 6141, 5184, 4234], 'correct': [4720, 2387, 1120, 372]}
ratio: 0.9856964310511059
Bleu_1: 0.655
Bleu_2: 0.501
Bleu_3: 0.377
Bleu_4: 0.261
computing METEOR score...
METEOR: 0.215
computing Rouge score...
ROUGE_L: 0.483
computing CIDEr score...
CIDEr: 0.598
computing SPICE score...


Parsing reference captions
Parsing test captions
Initiating Stanford parsing pipeline
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator tokenize
[main] INFO edu.stanford.nlp.pipeline.TokenizerAnnotator - TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer.
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ssplit
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator parse
[main] INFO edu.stanford.nlp.parser.common.ParserGrammar - Loading parser from serialized file edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz ... 
done [0.4 sec].
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator lemma
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ner
Loading classifier from edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz ... done [1.4 sec].
Loading classifier from edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz ... done [0.6

SPICE evaluation took: 11.47 s
SPICE: 0.166
computing SPIDEr score...
SPIDEr: 0.382


Training Epoch 45, Loss = 3.58041: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 46, Loss = 3.57602: 100%|██████| 656/656 [05:04<00:00,  2.15it/s]
Training Epoch 47, Loss = 3.57468: 100%|██████| 656/656 [05:04<00:00,  2.16it/s]
Training Epoch 48, Loss = 3.57527: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Training Epoch 49, Loss = 3.57621: 100%|██████| 656/656 [05:03<00:00,  2.16it/s]
Eval using dataset...: 100%|██████████████████| 957/957 [03:10<00:00,  5.03it/s]


loading annotations into memory...
0:00:00.006377
creating index...
index created!
Loading and preparing results...     
DONE (t=0.00s)
creating index...
index created!
tokenization...


PTBTokenizer tokenized 59528 tokens at 431194.68 tokens per second.
PTBTokenizer tokenized 8892 tokens at 94572.46 tokens per second.


setting up scorers...
computing Bleu score...
{'testlen': 6954, 'reflen': 7116, 'guess': [6954, 5997, 5040, 4090], 'correct': [4643, 2359, 1118, 382]}
ratio: 0.9772344013489351
Bleu_1: 0.652
Bleu_2: 0.501
Bleu_3: 0.379
Bleu_4: 0.265
computing METEOR score...
METEOR: 0.214
computing Rouge score...
ROUGE_L: 0.484
computing CIDEr score...
CIDEr: 0.601
computing SPICE score...


Parsing reference captions
Parsing test captions
Initiating Stanford parsing pipeline
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator tokenize
[main] INFO edu.stanford.nlp.pipeline.TokenizerAnnotator - TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer.
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ssplit
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator parse
[main] INFO edu.stanford.nlp.parser.common.ParserGrammar - Loading parser from serialized file edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz ... 
done [0.4 sec].
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator lemma
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ner
Loading classifier from edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz ... done [1.4 sec].
Loading classifier from edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz ... done [0.6

SPICE evaluation took: 9.894 s
SPICE: 0.162
computing SPIDEr score...
SPIDEr: 0.381

Training time : 4:13:18
