In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [3]:
import torch
from torch.autograd import Variable

from dataset import Dictionary, HMQAFeatureDataset
from model import SoftCount
from config import *
from datetime import datetime, timedelta

import h5py
import numpy as np
import cPickle as pkl
import json
import torch.nn.functional as F

  from ._conv import register_converters as _register_converters


In [4]:
dictionary = Dictionary.load_from_file('data/dictionary.pkl')

loading dictionary from data/dictionary.pkl


In [5]:
%%time
print('loading features from train hdf5 file')
train_h5_loc = './data/train36.hdf5'
with h5py.File(train_h5_loc, 'r') as hf:
    train_image_features = np.array(hf.get('image_features'))
    train_spatials_features = np.array(hf.get('spatial_features'))

loading features from train hdf5 file
CPU times: user 3.92 s, sys: 1min 32s, total: 1min 36s
Wall time: 3min 1s


In [6]:
from dataset import HMQAFeatureDataset

hmqa_train_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/train36_imgid2idx.pkl", "rb")),
    image_features = train_image_features, 
    spatial_features = train_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="train", 
    dictionary=dictionary
)
del HMQAFeatureDataset

In [7]:
len(hmqa_train_dset)z

83642

In [9]:
len(set([x["image_id"] for x in hmqa_train_dset.entries]))

45546

In [5]:
%%time
print('loading features from val hdf5 file')
val_h5_loc = './data/val36.hdf5'
with h5py.File(val_h5_loc, 'r') as hf:
    val_image_features = np.array(hf.get('image_features'))
    val_spatials_features = np.array(hf.get('spatial_features'))

loading features from val hdf5 file
CPU times: user 1.78 s, sys: 19.9 s, total: 21.7 s
Wall time: 21.7 s


In [6]:
# len(train_image_features)

from dataset import HMQAFeatureDataset

hmqa_dev_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/val36_imgid2idx.pkl", "rb")),
    image_features = val_image_features, 
    spatial_features = val_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="dev", 
    dictionary=dictionary
)

hmqa_test_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/val36_imgid2idx.pkl", "rb")),
    image_features = val_image_features, 
    spatial_features = val_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="test", 
    dictionary=dictionary
)
del HMQAFeatureDataset

In [7]:
len(hmqa_dev_dset), len(hmqa_test_dset)

(17714, 5000)

In [8]:
from torch.utils.data import DataLoader

# hmqa_train_loader = DataLoader(hmqa_train_dset, 64, shuffle=True, num_workers=1)
hmqa_dev_loader = DataLoader(hmqa_dev_dset, 64, shuffle=True, num_workers=1)
hmqa_test_loader = DataLoader(hmqa_test_dset, 64, shuffle=True, num_workers=1)

In [208]:
def evaluate(model, hmqa_loader, coeff=1):
    
    all_acc = []
    all_se = []
    for i, (v_emb, b, q, c, c2s) in enumerate(hmqa_loader):
        v_emb = Variable(v_emb)
        b = Variable(b)
        q = Variable(q)
        c = Variable(c).view(-1).float()
        
        if USE_CUDA:
            v_emb = v_emb.cuda()
            b = b.cuda()
            q = q.cuda()
            c = c.cuda()
        
        kappa_0, rho = model.compute_vars(v_emb *coeff, b * coeff, q * coeff)
        count, greedy_count, P, A, rho = model.take_mc_samples(kappa_0, rho, 1)
        
#         pred = model(v_emb, q)
        
#         ret = torch.stack((count.float(), greedy_count.float(), c.float()))
#         break
    
        nearest_pred = (greedy_count + 0.5).long().clamp(0, 20)
        for one_c, one_c2s, one_pred in zip(c, c2s, nearest_pred):
            one_c = one_c.cpu().data
            one_pred = one_pred.cpu().data
            
#             print("one_c = ", one_c, " and nearest pred = ", one_pred)
            
            all_se.append((one_c - one_pred.float()) ** 2)
            all_acc.append(one_c2s[one_pred])
    
    acc = torch.stack(all_acc).mean()
    rmse = torch.stack(all_se).mean() ** 0.5
    
    return acc, rmse  #, ret

In [209]:
def isnan(x):
    check = (x != x)
    check = check.float().sum().data[0]
    return check > 0

In [311]:
from model import IRLC
model = IRLC()
del IRLC

initialising with glove embeddings
done.


In [312]:
# model.ques_parser.load_state_dict(torch.load("soft_count_ques_parser.pth"))
# model.f_s.load_state_dict(torch.load("soft_count_f.pth"))

In [313]:
# USE_CUDA = False

In [314]:
if USE_CUDA:
    model.cuda()
model

IRLC(
  (ques_parser): QuestionParser(
    (embd): Embedding(20159, 300, padding_idx=20158)
    (rnn): GRU(300, 1024)
    (dropout): Dropout(p=0.1)
  )
  (f_s): ScoringFunction(
    (v_drop): Dropout(p=0.1)
    (q_drop): Dropout(p=0.1)
    (v_proj): FCNet(
      (main): Sequential(
        (0): Linear(in_features=2048, out_features=2048, bias=True)
        (1): ReLU()
      )
    )
    (q_proj): FCNet(
      (main): Sequential(
        (0): Linear(in_features=1024, out_features=2048, bias=True)
        (1): ReLU()
      )
    )
    (dropout): Dropout(p=0.1)
  )
  (W): Linear(in_features=2048, out_features=1, bias=True)
  (f_rho): RhoScorer(
    (W): Linear(in_features=1024, out_features=1, bias=True)
    (f_rho): FCNet(
      (main): Sequential(
        (0): Linear(in_features=17, out_features=100, bias=True)
        (1): ReLU()
        (2): Linear(in_features=100, out_features=1, bias=True)
        (3): ReLU()
      )
    )
  )
  (extra_params): ParameterList(
  )
)

In [315]:
test_acc, test_rmse = evaluate(model, hmqa_test_loader)
test_acc, test_rmse

(0.012000000262260438, 17.323631259063443)

In [316]:
opt = torch.optim.Adam(model.parameters(), lr=5e-4)

In [None]:
for epoch in range(10):
    for i, (v_emb, b, q, c, _) in enumerate(hmqa_dev_loader):
        v_emb = Variable(v_emb)
        b = Variable(b)
        q = Variable(q)
        c = Variable(c).view(-1).float()
        
        if USE_CUDA:
            v_emb = v_emb.cuda()
            q = q.cuda()
            c = c.cuda()
            b = b.cuda()

#         count, greedy_count, P, A, rho = model(v_emb, b, q)
        
        B, k, _ = v_emb.size()
        
        kappa_0, rho = model.compute_vars(v_emb, b, q)
        if isnan(kappa_0) or isnan(rho) or isnan(model.eps):
            raise Exception("there are nans here")
            
        num_samples = 10
        count, greedy_count, P, A, rho = model.take_mc_samples(kappa_0, rho, num_samples)
        c_gt = torch.cat([c] * num_samples)
        loss = model.get_loss(c_gt, count, greedy_count, P, A, rho)
        
    
        if i % 100 == 0:
            print("epoch = {}, i = {}, loss = {},".format(epoch, i, loss.data[0],))
        
        opt.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
        opt.step()
    
    
    print("evaluating model on train, dev and test...")
    
#     train_acc, train_rmse = evaluate(model, hmqa_dev_loader)
#     print("train_acc: {}, train_rmse: {}".format(train_acc, train_rmse))
    
#     model.eval()
#     dev_acc, dev_rmse = evaluate(model, hmqa_dev_loader)
#     print("dev_acc: {}, dev_rmse: {}".format(dev_acc, dev_rmse))
    test_acc, test_rmse = evaluate(model, hmqa_test_loader)
    print("test_acc: {}, test_rmse: {}".format(test_acc, test_rmse))
#     model.train()
        

epoch = 0, i = 0, loss = 49.4169654846,
epoch = 0, i = 100, loss = 0.959388554096,
epoch = 0, i = 200, loss = -0.0486254766583,
evaluating model on train, dev and test...
test_acc: 0.21298000102, test_rmse: 3.88664379639
epoch = 1, i = 0, loss = -0.0153276687488,
epoch = 1, i = 100, loss = 0.0428304076195,
epoch = 1, i = 200, loss = -0.00206291372888,
evaluating model on train, dev and test...
test_acc: 0.25372000128, test_rmse: 3.55955053342
epoch = 2, i = 0, loss = 0.00610870262608,
epoch = 2, i = 100, loss = -0.0264539178461,
epoch = 2, i = 200, loss = -0.0316820368171,
evaluating model on train, dev and test...
test_acc: 0.236320001256, test_rmse: 3.52913587157
epoch = 3, i = 0, loss = -0.0172131024301,
epoch = 3, i = 100, loss = -0.0302072763443,
epoch = 3, i = 200, loss = 0.0261585265398,
evaluating model on train, dev and test...
test_acc: 0.240760001314, test_rmse: 3.48129286329
epoch = 4, i = 0, loss = -0.719782114029,
epoch = 4, i = 100, loss = -0.0167286451906,


In [None]:
# TODO: renormalize the probabilities

In [298]:
torch.stack((count.float(), greedy_count.float(), c_gt.float()))[:, :100]

Variable containing:

Columns 0 to 12 
   36    11     5    32    31    36    12    16    35    36    21    34    29
    0     0     0     0     0     0     0     0     0     0     0     0     0
    4     3     2     2     4     1     2     1     0     6     5     1     2

Columns 13 to 25 
   31     1    33    36    36    18    33     0    35     3    34    34    36
    0     0     0     0     0     0     0     0     0     0     0     0     0
    0     5     2     3     1     1     1     1     5     1     1     2     2

Columns 26 to 38 
   36     2    26    33    36    36    17    36    15    22     9    36     2
    0     0     0     0     0     0     0     0     0     0     0     0     0
    2     4     1     2     4     1     0     5     1     1     2     1     2

Columns 39 to 51 
   31     8     8    11    33    36    23    36     5     8    25     9     8
    0     0     0     0     0     0     0     0     0     0     0     0     0
    8     1     2     0    10    14     9     

In [302]:
P[:, 0, 1], A

(Variable containing:
 1.00000e-02 *
   2.6912
   2.7500
   2.7771
   2.8506
   2.9212
   3.0551
   3.1778
   3.2342
   3.3306
   3.5390
   3.6401
   3.7144
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
   0.0000
 [torch.cuda.FloatTensor of size 37 (GPU 0)], Variable containing:
    10    18    32  ...     29    22    19
     5     7     7  ...     21    25    33
    16    16     5  ...     14    24     1
        ...          ⋱          ...       
     4    15    14  ...      7    35    32
    23     6    34  ...      9    18    21
    36    34    12  ...     23    30    13
 [torch.cuda.LongTensor of size 37x3200 (GPU 0)])

In [282]:
z = torch.zeros(1).float()

In [281]:
z + 1e-40


1.00000e-40 *
  1.0000
[torch.DoubleTensor of size 1]

In [None]:
        
        count_gt = c.view(-1)
        indicator = Variable(torch.zeros(B, k))
        if USE_CUDA:
            indicator = indicator.cuda()
        indicator = indicator.scatter(1, count_gt[:, None].long(), 1)  # (B, k)
        not_select = indicator.cumsum(dim=1)  # (B, k)
        select = 1 - not_select  # (B, k)
        

In [None]:
        #########  HACK ############
        count_gt = count_gt.view(-1)
        indicator = Variable(torch.zeros(B, k))
        if USE_CUDA:
            indicator = indicator.cuda()
        indicator = indicator.scatter(1, count_gt[:, None].long(), 1)  # (B, k)
        not_select = indicator.cumsum(dim=1)  # (B, k)
        select = 1 - not_select  # (B, k)

        v_emb[:, :, 0] = select.float()

        # print("count_gt = ", count_gt)
        # print("indicator = ", indicator)
        # print("select = ", select)
        # raise Exception("hoopla")
        ###########################

In [24]:
kappa_0, select

(Variable containing:
  9.9070e-01  1.0466e+00  3.0018e-01  ...   1.0486e-03 -1.0895e-02 -2.6854e-03
  1.0868e+00  1.0649e+00  1.1275e+00  ...  -1.2944e-03 -6.8956e-03 -1.3778e-03
  1.0535e+00  1.1431e+00  8.1334e-01  ...  -1.3563e-02 -5.1921e-03  2.6299e-02
                 ...                   ⋱                   ...                
  1.0174e+00  9.0691e-01  9.7639e-01  ...  -1.0598e-03  1.4735e-03  1.5625e-02
  1.1298e+00  8.8190e-01  1.0423e+00  ...  -5.7358e-03  1.9252e-02  3.8303e-03
  8.6857e-01  4.6882e-02  2.1066e-03  ...   1.5766e-02 -1.9883e-03 -5.3849e-03
 [torch.cuda.FloatTensor of size 50x36 (GPU 0)], Variable containing:
     1     1     0  ...      0     0     0
     1     1     1  ...      0     0     0
     1     1     1  ...      0     0     0
        ...          ⋱          ...       
     1     1     1  ...      0     0     0
     1     1     1  ...      0     0     0
     1     0     0  ...      0     0     0
 [torch.cuda.FloatTensor of size 50x36 (GPU 0)])

In [None]:
        #########  HACK ############
        count_gt = count_gt.view(-1)
        indicator = Variable(torch.zeros(B, k))
        if USE_CUDA:
            indicator = indicator.cuda()
        indicator = indicator.scatter(1, count_gt[:, None].long(), 1)  # (B, k)
        not_select = indicator.cumsum(dim=1)  # (B, k)
        select = 1 - not_select  # (B, k)

        v_emb[:, :, 0] = select.float()

        # print("count_gt = ", count_gt)
        # print("indicator = ", indicator)
        # print("select = ", select)
        # raise Exception("hoopla")
        ###########################