In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [3]:
import torch
from torch.autograd import Variable

from dataset import Dictionary, HMQAFeatureDataset
from model import SoftCount
from config import *
from datetime import datetime, timedelta

import h5py
import numpy as np
import cPickle as pkl
import json
import torch.nn.functional as F

  from ._conv import register_converters as _register_converters


In [4]:
dictionary = Dictionary.load_from_file('data/dictionary.pkl')

loading dictionary from data/dictionary.pkl


In [5]:
%%time
print('loading features from train hdf5 file')
train_h5_loc = './data/train36.hdf5'
with h5py.File(train_h5_loc, 'r') as hf:
    train_image_features = np.array(hf.get('image_features'))
    train_spatials_features = np.array(hf.get('spatial_features'))

loading features from train hdf5 file
CPU times: user 3.92 s, sys: 1min 32s, total: 1min 36s
Wall time: 3min 1s


In [6]:
from dataset import HMQAFeatureDataset

hmqa_train_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/train36_imgid2idx.pkl", "rb")),
    image_features = train_image_features, 
    spatial_features = train_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="train", 
    dictionary=dictionary
)
del HMQAFeatureDataset

In [7]:
len(hmqa_train_dset)

83642

In [9]:
len(set([x["image_id"] for x in hmqa_train_dset.entries]))

45546

In [5]:
%%time
print('loading features from val hdf5 file')
val_h5_loc = './data/val36.hdf5'
with h5py.File(val_h5_loc, 'r') as hf:
    val_image_features = np.array(hf.get('image_features'))
    val_spatials_features = np.array(hf.get('spatial_features'))

loading features from val hdf5 file
CPU times: user 1.63 s, sys: 26.6 s, total: 28.2 s
Wall time: 28.2 s


In [6]:
# len(train_image_features)

from dataset import HMQAFeatureDataset

hmqa_dev_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/val36_imgid2idx.pkl", "rb")),
    image_features = val_image_features, 
    spatial_features = val_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="dev", 
    dictionary=dictionary
)

hmqa_test_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/val36_imgid2idx.pkl", "rb")),
    image_features = val_image_features, 
    spatial_features = val_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="test", 
    dictionary=dictionary
)
del HMQAFeatureDataset

In [7]:
len(hmqa_dev_dset), len(hmqa_test_dset)

(17714, 5000)

In [8]:
from torch.utils.data import DataLoader

# hmqa_train_loader = DataLoader(hmqa_train_dset, 64, shuffle=True, num_workers=1)
hmqa_dev_loader = DataLoader(hmqa_dev_dset, 64, shuffle=True, num_workers=1)
hmqa_test_loader = DataLoader(hmqa_test_dset, 64, shuffle=True, num_workers=1)

In [13]:
def evaluate(model, hmqa_loader):
    
    all_acc = []
    all_se = []
    for i, (v_emb, b, q, c, c2s) in enumerate(hmqa_loader):
        v_emb = Variable(v_emb)
        b = Variable(b)
        q = Variable(q)
        c = Variable(c).float()
        
        if USE_CUDA:
            v_emb = v_emb.cuda()
            b = b.cuda()
            q = q.cuda()
            c = c.cuda()
        
        kappa_0, rho, batch_eps = model.compute_vars(v_emb, b, q)
        count, greedy_count, P, A, rho = model.take_mc_samples(kappa_0, rho, batch_eps, 1)
        
#         pred = model(v_emb, q)
        
        nearest_pred = (greedy_count + 0.5).long().clamp(0, 20)
        for one_c, one_c2s, one_pred in zip(c, c2s, nearest_pred):
            one_c = one_c.cpu().data
            one_pred = one_pred.cpu().data
            
            all_se.append((one_c - one_pred.float()) ** 2)
            all_acc.append(one_c2s[one_pred])
    
    acc = torch.stack(all_acc).mean()
    rmse = torch.stack(all_se).mean() ** 0.5
    
    return acc, rmse

In [14]:
def isnan(x):
    check = (x != x)
    check = check.float().sum().data[0]
    return check > 0

In [30]:
from model import IRLC
model = IRLC()
del IRLC

initialising with glove embeddings
done.


In [31]:
# USE_CUDA = False

In [32]:
if USE_CUDA:
    model.cuda()
# model

In [33]:
test_acc, test_rmse = evaluate(model, hmqa_test_loader)
test_acc, test_rmse

(0.14578000067472457, 6.577051010901466)

In [34]:
opt = torch.optim.Adam(model.parameters(), lr=3e-6)

In [35]:
for epoch in range(2):
    for i, (v_emb, b, q, c, _) in enumerate(hmqa_dev_loader):
        v_emb = Variable(v_emb)
        b = Variable(b)
        q = Variable(q)
        c = Variable(c).view(-1).float()
        
        if USE_CUDA:
            v_emb = v_emb.cuda()
            q = q.cuda()
            c = c.cuda()
            b = b.cuda()

#         count, greedy_count, P, A, rho = model(v_emb, b, q)
        
        kappa_0, rho, batch_eps = model.compute_vars(v_emb, b, q)
              
        if isnan(kappa_0) or isnan(rho) or isnan(batch_eps):
            raise Exception("there are nans here")
        count, greedy_count, P, A, rho = model.take_mc_samples(kappa_0, rho, batch_eps, 1)
        
        loss = model.get_loss(c, count, greedy_count, P, A, rho)
        
        if i % 100 == 0:
            print("epoch = {}, i = {}, loss = {}".format(epoch, i, loss.data[0]))
        
        opt.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
        opt.step()
    
    
    print("evaluating model on train, dev and test...")
    
#     train_acc, train_rmse = evaluate(model, hmqa_train_loader)
#     print("train_acc: {}, train_rmse: {}".format(train_acc, train_rmse))
    
    model.eval()
#     dev_acc, dev_rmse = evaluate(model, hmqa_dev_loader)
#     print("dev_acc: {}, dev_rmse: {}".format(dev_acc, dev_rmse))
    test_acc, test_rmse = evaluate(model, hmqa_test_loader)
    print("test_acc: {}, test_rmse: {}".format(test_acc, test_rmse))
    model.train()
        

epoch = 0, i = 0, loss = -3087.06494141
epoch = 0, i = 100, loss = -70.2504196167
epoch = 0, i = 200, loss = -98.9064941406
evaluating model on train, dev and test...
test_acc: 0.082160000515, test_rmse: 12.9495019209
epoch = 1, i = 0, loss = -48.8456344604
epoch = 1, i = 100, loss = -624.548095703
epoch = 1, i = 200, loss = -576.245300293
evaluating model on train, dev and test...
test_acc: 0.077160000515, test_rmse: 13.2448329548


In [36]:
torch.stack((count.float(), greedy_count.float(), c))

Variable containing:

Columns 0 to 12 
   36    36    36    36    35    36    36    36    36     4    36    36    36
   36    36    36    36    36     0    36    36     0    36     0     0     0
    1     1     0     5     1     6     1     0     5     2     4     2     1

Columns 13 to 25 
   36    36    36    36    36    36    36    36     4    36     1    36    36
   36     0    36    36     0    36    36    36    36    36     0    36    36
    3     1     2     3     2     4     3     2     3     3     1     2     3

Columns 26 to 38 
   36    36    36    36    36    36    36    36    36     4    36    36    36
   36    36     0     0    36     0    36    36     0    36    36     0    36
    2     3     0     2     1     6     3     1     1     3     3     1     3

Columns 39 to 49 
   36    36    10    36    36    36    36     2     1    34    32
   36    36    36    36    36    36    36    36    36    36    36
    2     3     1     3     3     6    18     2     6     3     1
[tor