In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import time 
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
from torch import nn
from torch.nn.utils.rnn import pack_padded_sequence
from models import Encoder, DecoderWithAttention
from dataset import *
from utils import *
from train import *
from torch.utils.tensorboard import SummaryWriter
from os import path as osp

In [3]:
# Model parameters
encoder_dim = 2048 # resnet101
emb_dim = 512  # dimension of word embeddings
attention_dim = 512  # dimension of attention linear layers
decoder_dim = 512  # dimension of decoder RNN
dropout = 0.5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # sets device for model and PyTorch tensors
cudnn.benchmark = True  # set to true only if inputs to model are fixed size; otherwise lot of computational overhead

# training parameters
epochs = 30  # number of epochs to train for (if early stopping is not triggered)
batch_size = 256
workers = 2
encoder_lr = 1e-4  # learning rate for encoder if fine-tuning
decoder_lr = 4e-4  # learning rate for decoder
fine_tune_encoder = False  # fine-tune encoder?
pretrained_embeddings = False
fine_tune_embeddings = False
checkpoint = None  # path to checkpoint, None if none

In [14]:
DATA_JSON_PATH = 'data/ar_data.json'
IMGS_PATH = 'images_200/'
DATA_NAME = 'TESTING'

In [15]:
max_seq = 65
vocab = build_vocab(DATA_JSON_PATH, max_seq=max_seq)
vocab_len = len(vocab); vocab_len

100%|██████████| 24000/24000 [00:00<00:00, 302988.23it/s]


3309

In [16]:
list(vocab.itos.keys())[:10], list(vocab.itos.values())[:10]

([0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 ['<pad>', '<sos>', '<eos>', '<unk>', '+', 'ة', 'طفل', 'صغير', 'تتسلق', 'إلى'])

In [22]:
m = load_checkpoint("ar_models/BEST_checkpoint_flickr8k_ar_arabert_finetune.pth.tar")

Loaded Checkpoint!!
Last Epoch: 17
Best Bleu-4: 25.49970489970383


In [24]:
encoder = m['encoder']
decoder = m['decoder']

In [28]:
from eval import test_score

test_dict = {}

for i in range(1, 6):
    
    b1, b2, b3, b4 = test_score(i, encoder, decoder, IMGS_PATH, 'sm_ar_data.json', vocab)
    if i == 3:
        test_dict['b1'] = b1
        test_dict['b2'] = b2
        test_dict['b3'] = b3
    
    test_dict[f'b4-b{i}'] = b4

Dataset split: test
Unique images: 201
Total size: 1005


EVALUATING AT BEAM SIZE 1: 100%|██████████| 1005/1005 [00:41<00:00, 24.50it/s]


----- Bleu-n Scores -----
1: 57.26460717331159
2: 44.11587054804982
3: 31.57337234488357
4: 22.521816405336576
-------------------------
Dataset split: test
Unique images: 201
Total size: 1005


EVALUATING AT BEAM SIZE 2: 100%|██████████| 1005/1005 [00:51<00:00, 19.70it/s]


----- Bleu-n Scores -----
1: 57.481629162500404
2: 45.297626072950294
3: 33.06811991316908
4: 24.007402361039233
-------------------------
Dataset split: test
Unique images: 201
Total size: 1005


EVALUATING AT BEAM SIZE 3: 100%|██████████| 1005/1005 [00:58<00:00, 17.07it/s]


----- Bleu-n Scores -----
1: 59.57903879112231
2: 47.81904879994653
3: 35.57908772333811
4: 26.300767047425133
-------------------------
Dataset split: test
Unique images: 201
Total size: 1005


EVALUATING AT BEAM SIZE 4: 100%|██████████| 1005/1005 [01:09<00:00, 14.38it/s]


----- Bleu-n Scores -----
1: 60.2981773061791
2: 48.312757135913806
3: 35.48037749954152
4: 26.245859606761872
-------------------------
Dataset split: test
Unique images: 201
Total size: 1005


EVALUATING AT BEAM SIZE 5: 100%|██████████| 1005/1005 [01:13<00:00, 13.76it/s]


----- Bleu-n Scores -----
1: 59.49016765408188
2: 47.841419654270304
3: 35.4434402518133
4: 26.157154595107894
-------------------------
