In [1]:
import os, sys, json
src_path = os.path.abspath(os.path.join(".."))
if src_path not in sys.path:
    sys.path.append(src_path)

import numpy as np
import matplotlib.pyplot as plt
from src.image_processor import default_image_transform
from src.config import CaptionConfig
from src.data_loader import CaptionDataManager
from src.image_processor import train_transform
from src.base_experiment import plot_eval
import math

import torch
import torch.optim as optim

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from collections import defaultdict
data = defaultdict(dict)

Y_test = [x**2 for x in range(20)]
Y_train = [math.sqrt(x) for x in range(20)]
Y_val = [x for x in range(20)]

data['loss']['train'] = Y_train
data['loss']['test'] = Y_test
data['loss']['val'] = Y_val



In [3]:
# plot_eval(data, 'train', name='loss')
# plot_eval(data, 'test', name='loss')
# plot_eval(data, 'val', name='loss')

In [4]:
config_path = os.path.join(src_path, "src","config.yml") 
print(config_path)
assert(os.path.exists(config_path))

manager = CaptionDataManager(config_path, n_sample=500)

print(manager)

/Users/nsimsiri/Documents/code/ml/nlp/cap/src/config.yml
loading annotations into memory...


  0%|          | 0/500 [00:00<?, ?it/s]

Done (t=0.42s)
creating index...
index created!
loaded - val captions








100%|██████████| 500/500 [00:00<00:00, 5008.06it/s]

<src.data_loader.CaptionDataManager object at 0x1a21bd9160>





#  Experiments

In [18]:
from src.models.base_model import EncoderDecoder

vocab_size      = len(manager.vocab())
embed_size      = 64
hidden_size     = 128
num_layers      = 1

num_epochs      = 10
learning_rate   = 0.001


print("HYPER-PARAM[vocab={} embed={} hidden={} num_layers={} ]".format(vocab_size, 
                                                                       embed_size, 
                                                                       hidden_size,
                                                                       num_layers))
net = EncoderDecoder(vocab_size, 
                     embed_size=embed_size, 
                     hidden_size=hidden_size,
                     num_layers=num_layers)

total_param_count = sum(param.numel() for param in net.parameters())
decoder_param_count = sum(param.numel() for param in net.decoder.parameters())
print("total param #: " + str(total_param_count))
print("decoder param #: " + str(decoder_param_count))

criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)


HYPER-PARAM[vocab=1089 embed=64 hidden=128 num_layers=1 ]
total param #: 440769
decoder param #: 309505


In [19]:
from src.image_processor import train_transform, default_image_transform
from src.base_experiment import plot_eval, run_model

train_loader = manager.build_dataloader('train', 
                                        batch_size=25, 
                                        shuffle=True, 
#                                         image_transform=train_transform)
                                        image_transform=default_image_transform)
# val_loader   = manager.build_dataloader('val', 
#                                         batch_size=5)
# test_loader  = manager.build_dataloader('test', 
#                                         batch_size=5)

for epoch in range(num_epochs):
    losses, _ = run_model(net, 
                          train_loader, 
                          criterion, 
                          optimizer,
                          train=True,
                          epoch=epoch)
    
    


ep=0 step=2 loss=6.99378228187561
ep=0 step=4 loss=6.985867142677307
ep=0 step=6 loss=6.976934274037679
ep=0 step=8 loss=6.96873927116394
ep=0 step=10 loss=6.9576562404632565
ep=0 step=12 loss=6.94555926322937
ep=0 step=14 loss=6.930100577218192
ep=0 step=16 loss=6.9083079397678375
ep=1 step=2 loss=6.622510671615601
ep=1 step=4 loss=6.49799656867981
ep=1 step=6 loss=6.378824234008789
ep=1 step=8 loss=6.2600350975990295
ep=1 step=10 loss=6.168941450119019
ep=1 step=12 loss=6.0946158568064375
ep=1 step=14 loss=6.040581635066441
ep=1 step=16 loss=5.979053795337677
ep=2 step=2 loss=5.347299098968506
ep=2 step=4 loss=5.45111358165741
ep=2 step=6 loss=5.403749863306682
ep=2 step=8 loss=5.451522409915924
ep=2 step=10 loss=5.457181978225708
ep=2 step=12 loss=5.436914324760437
ep=2 step=14 loss=5.423209837504795
ep=2 step=16 loss=5.415204435586929
ep=3 step=2 loss=5.207361221313477
ep=3 step=4 loss=5.237094879150391
ep=3 step=6 loss=5.229259570439656
ep=3 step=8 loss=5.248616576194763
ep=3 step

In [32]:
from src.base_experiment import evaluate_model
evaluate_model(net, train_loader, criterion, manager)

image torch.Size([1, 1, 64])
torch.Size([1, 1, 128]) torch.Size([1, 128])
torch.Size([1, 1089])
torch.Size([1])
predicted workd:  tensor([[-2.0547, -1.2790,  8.2758,  ..., -1.7904, -1.4369, -2.0027]],
       grad_fn=<AddmmBackward>) tensor([2]) <start>
torch.Size([1, 1, 128]) torch.Size([1, 128])
torch.Size([1, 1089])
torch.Size([1])
predicted workd:  tensor([[-4.7995, -3.5948,  4.4539,  ..., -4.0931, -4.4234, -4.3693]],
       grad_fn=<AddmmBackward>) tensor([12]) a
torch.Size([1, 1, 128]) torch.Size([1, 128])
torch.Size([1, 1089])
torch.Size([1])
predicted workd:  tensor([[-4.2462, -3.1898,  0.6864,  ..., -3.6880, -3.6396, -3.8783]],
       grad_fn=<AddmmBackward>) tensor([12]) a
torch.Size([1, 1, 128]) torch.Size([1, 128])
torch.Size([1, 1089])
torch.Size([1])
predicted workd:  tensor([[-4.3487, -3.2769,  0.7099,  ..., -3.8351, -3.6822, -4.0930]],
       grad_fn=<AddmmBackward>) tensor([12]) a
torch.Size([1, 1, 128]) torch.Size([1, 128])
torch.Size([1, 1089])
torch.Size([1])
predict

In [None]:
T% 