In [1]:
# import time 
# import torch.backends.cudnn as cudnn
# import torch.optim
# import torch.utils.data

# from torch import nn
# from torch.nn.utils.rnn import pack_padded_sequence
# from models import Encoder, DecoderWithAttention
# from dataset import *
# from utils import *
# from train import *
# from torch.utils.tensorboard import SummaryWriter
# from os import path as osp

In [2]:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from caption import CaptionDataset, caption_image
from utils import load_checkpoint, print_scores
from dataset import build_vocab
import pandas as pd
from tqdm import tqdm

In [3]:
model_path = 'models/BEST_checkpoint_flickr8k_ar_arabert_pretrained_finetune.pth.tar'
IMGS_PATH = 'flickr/Images/'
DATA_JSON_PATH = 'data/ar_data.json'

In [4]:
vocab = build_vocab(DATA_JSON_PATH)

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


bs = 1

loader = DataLoader(
            dataset=CaptionDataset(IMGS_PATH, DATA_JSON_PATH,
                                    transforms=transform, vocab=vocab, split='test'),
            batch_size=bs,
            num_workers=7,
            shuffle=True,
            pin_memory=True
        )

100%|██████████| 24000/24000 [00:00<00:00, 284849.63it/s]


Dataset split: test
Unique images: 1000
Total size: 3000


In [5]:
checkpoint = load_checkpoint(model_path)

Loaded Checkpoint!!
Last Epoch: 9
Best Bleu-4: 24.949378413361714


In [6]:
references, hypothesis, img_ids = caption_image(loader, vocab, checkpoint['encoder'],
                                               checkpoint['decoder'], 5)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)
EVALUATING AT BEAM SIZE 5: 100%|██████████| 3000/3000 [03:15<00:00, 15.31it/s]


In [7]:
_ = print_scores(references, hypothesis)

----- Bleu-n Scores -----
1: 58.678484239386094
2: 46.85683508774053
3: 36.14555791431082
4: 27.864202291806382
-------------------------


In [8]:
df = pd.DataFrame.from_dict({"file_name":img_ids, "references":references, "hypothesis": hypothesis})
df.head()

Unnamed: 0,file_name,references,hypothesis
0,1056338697_4f7d7ce270.jpg,"[[112, 4, 5, 151, 16, 122, 109, 664, 689, 4, 5...","[20, 4, 5, 7, 4, 5, 16, 10, 4, 71]"
1,353180303_6a24179c50.jpg,"[[112, 4, 5, 215, 10, 4, 236, 1008, 962, 253, ...","[112, 4, 5, 7, 4, 5, 16, 10, 4, 50]"
2,1415591512_a84644750c.jpg,"[[20, 4, 5, 91, 16, 10, 4, 793, 21, 4, 10, 4, ...","[20, 4, 5, 7, 4, 5, 7, 4, 5, 16, 10, 4, 71]"
3,3256043809_47258e0b3e.jpg,"[[12, 65, 15, 4, 31, 1232, 10, 4, 209, 10, 4, ...","[90, 45, 10, 4, 89, 38, 4, 121, 16, 10, 4, 55]"
4,3499720588_c32590108e.jpg,"[[28, 389, 1743, 4, 5, 976, 4, 5, 38, 10, 4, 8...","[28, 15, 4, 112, 4, 5, 16, 10, 4, 50]"


In [9]:
df.sort_values("file_name").head()

Unnamed: 0,file_name,references,hypothesis
0,1056338697_4f7d7ce270.jpg,"[[112, 4, 5, 151, 16, 122, 109, 664, 689, 4, 5...","[20, 4, 5, 7, 4, 5, 16, 10, 4, 71]"
255,1056338697_4f7d7ce270.jpg,"[[112, 4, 5, 151, 16, 122, 109, 664, 689, 4, 5...","[20, 4, 5, 7, 4, 5, 16, 10, 4, 71]"
2625,1056338697_4f7d7ce270.jpg,"[[112, 4, 5, 151, 16, 122, 109, 664, 689, 4, 5...","[20, 4, 5, 7, 4, 5, 16, 10, 4, 71]"
716,106490881_5a2dd9b7bd.jpg,"[[49, 16, 236, 10, 4, 237, 4, 5, 10, 4, 93, 18...","[49, 7, 51, 16, 10, 4, 66]"
1394,106490881_5a2dd9b7bd.jpg,"[[49, 16, 236, 10, 4, 237, 4, 5, 10, 4, 93, 18...","[49, 7, 51, 16, 10, 4, 66]"


In [11]:
df.to_json('arabert_beam5.json')

In [3]:
df = pd.read_json('arabert_beam5.json')
df.head()

Unnamed: 0,file_name,references,hypothesis
0,1056338697_4f7d7ce270.jpg,"[[112, 4, 5, 151, 16, 122, 109, 664, 689, 4, 5...","[20, 4, 5, 7, 4, 5, 16, 10, 4, 71]"
1,353180303_6a24179c50.jpg,"[[112, 4, 5, 215, 10, 4, 236, 1008, 962, 253, ...","[112, 4, 5, 7, 4, 5, 16, 10, 4, 50]"
2,1415591512_a84644750c.jpg,"[[20, 4, 5, 91, 16, 10, 4, 793, 21, 4, 10, 4, ...","[20, 4, 5, 7, 4, 5, 7, 4, 5, 16, 10, 4, 71]"
3,3256043809_47258e0b3e.jpg,"[[12, 65, 15, 4, 31, 1232, 10, 4, 209, 10, 4, ...","[90, 45, 10, 4, 89, 38, 4, 121, 16, 10, 4, 55]"
4,3499720588_c32590108e.jpg,"[[28, 389, 1743, 4, 5, 976, 4, 5, 38, 10, 4, 8...","[28, 15, 4, 112, 4, 5, 16, 10, 4, 50]"


In [4]:
# getting unique file_names
files = []
refes = []
hypos = []
for fname in tqdm(df.file_name.unique()):
    refes.append(df[df.file_name==fname].references.to_list()[0])
    hypos.append(df[df.file_name==fname].hypothesis.to_list()[0])
    files.append(fname)

100%|██████████| 984/984 [00:01<00:00, 963.65it/s]


In [5]:
len(refes), len(hypos), len(files)

(984, 984, 984)

In [6]:
_ = print_scores(refes, hypos)

----- Bleu-n Scores -----
1: 58.678484239386094
2: 46.85683508774053
3: 36.14555791431082
4: 27.864202291806382
-------------------------


In [7]:
b1, b2, b3, b4 = [], [], [], []

for ref, hypo in zip(refes, hypos):
    scores = print_scores([ref], [hypo], prnt=False)
    b1.append(scores[0])
    b2.append(scores[1])
    b3.append(scores[2])
    b4.append(scores[3])

The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [31]:
df.head()

Unnamed: 0,file_name,references,hypothesis
0,1056338697_4f7d7ce270.jpg,"[[112, 4, 5, 151, 16, 122, 109, 664, 689, 4, 5...","[20, 4, 5, 7, 4, 5, 16, 10, 4, 71]"
1,353180303_6a24179c50.jpg,"[[112, 4, 5, 215, 10, 4, 236, 1008, 962, 253, ...","[112, 4, 5, 7, 4, 5, 16, 10, 4, 50]"
2,1415591512_a84644750c.jpg,"[[20, 4, 5, 91, 16, 10, 4, 793, 21, 4, 10, 4, ...","[20, 4, 5, 7, 4, 5, 7, 4, 5, 16, 10, 4, 71]"
3,3256043809_47258e0b3e.jpg,"[[12, 65, 15, 4, 31, 1232, 10, 4, 209, 10, 4, ...","[90, 45, 10, 4, 89, 38, 4, 121, 16, 10, 4, 55]"
4,3499720588_c32590108e.jpg,"[[28, 389, 1743, 4, 5, 976, 4, 5, 38, 10, 4, 8...","[28, 15, 4, 112, 4, 5, 16, 10, 4, 50]"


In [32]:
df = pd.DataFrame({'file_name':fname, 'references': refes, 'hypothesis': hypos,
                  'b1':b1, 'b2':b2, 'b3':b3, 'b4':b4})

In [33]:
df.head()

Unnamed: 0,file_name,references,hypothesis,b1,b2,b3,b4
0,2239938351_43c73c887c.jpg,"[[112, 4, 5, 151, 16, 122, 109, 664, 689, 4, 5...","[20, 4, 5, 7, 4, 5, 16, 10, 4, 71]",63.338619,56.426527,47.627321,40.88065
1,2239938351_43c73c887c.jpg,"[[112, 4, 5, 215, 10, 4, 236, 1008, 962, 253, ...","[112, 4, 5, 7, 4, 5, 16, 10, 4, 50]",59.265458,44.173864,26.241228,4.154341e-76
2,2239938351_43c73c887c.jpg,"[[20, 4, 5, 91, 16, 10, 4, 793, 21, 4, 10, 4, ...","[20, 4, 5, 7, 4, 5, 7, 4, 5, 16, 10, 4, 71]",72.549562,64.396832,54.470822,45.15435
3,2239938351_43c73c887c.jpg,"[[12, 65, 15, 4, 31, 1232, 10, 4, 209, 10, 4, ...","[90, 45, 10, 4, 89, 38, 4, 121, 16, 10, 4, 55]",66.666667,55.048188,44.964431,31.70233
4,2239938351_43c73c887c.jpg,"[[28, 389, 1743, 4, 5, 976, 4, 5, 38, 10, 4, 8...","[28, 15, 4, 112, 4, 5, 16, 10, 4, 50]",90.0,70.710678,50.0,36.55552


In [37]:
df.to_json('arabert_5beam_results.json')