In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
import torch
from torch.autograd import Variable

from dataset import Dictionary, HMQAFeatureDataset
from model import SoftCount
from config import *
from datetime import datetime, timedelta

import h5py
import numpy as np
import cPickle as pkl
import json
import torch.nn.functional as F

  from ._conv import register_converters as _register_converters


In [4]:
dictionary = Dictionary.load_from_file('data/dictionary.pkl')

loading dictionary from data/dictionary.pkl


In [5]:
%%time
print('loading features from train hdf5 file')
train_h5_loc = './data/train36.hdf5'
with h5py.File(train_h5_loc, 'r') as hf:
    train_image_features = np.array(hf.get('image_features'))
    train_spatials_features = np.array(hf.get('spatial_features'))

loading features from train hdf5 file
CPU times: user 3.92 s, sys: 1min 32s, total: 1min 36s
Wall time: 3min 1s


In [6]:
from dataset import HMQAFeatureDataset

hmqa_train_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/train36_imgid2idx.pkl", "rb")),
    image_features = train_image_features, 
    spatial_features = train_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="train", 
    dictionary=dictionary
)
del HMQAFeatureDataset

In [7]:
len(hmqa_train_dset)

83642

In [9]:
len(set([x["image_id"] for x in hmqa_train_dset.entries]))

45546

In [8]:
%%time
print('loading features from val hdf5 file')
val_h5_loc = './data/val36.hdf5'
with h5py.File(val_h5_loc, 'r') as hf:
    val_image_features = np.array(hf.get('image_features'))
    val_spatials_features = np.array(hf.get('spatial_features'))

loading features from val hdf5 file
CPU times: user 2.41 s, sys: 57.7 s, total: 1min
Wall time: 1min 51s


In [8]:
# len(train_image_features)

In [10]:
from dataset import HMQAFeatureDataset

hmqa_dev_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/val36_imgid2idx.pkl", "rb")),
    image_features = val_image_features, 
    spatial_features = val_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="dev", 
    dictionary=dictionary
)

hmqa_test_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/val36_imgid2idx.pkl", "rb")),
    image_features = val_image_features, 
    spatial_features = val_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="test", 
    dictionary=dictionary
)
del HMQAFeatureDataset

In [12]:
len(hmqa_dev_dset), len(hmqa_test_dset)

(17714, 5000)

In [11]:
from torch.utils.data import DataLoader

hmqa_train_loader = DataLoader(hmqa_train_dset, 64, shuffle=True, num_workers=1)
hmqa_dev_loader = DataLoader(hmqa_dev_dset, 64, shuffle=True, num_workers=1)
hmqa_test_loader = DataLoader(hmqa_test_dset, 64, shuffle=True, num_workers=1)

In [13]:
def evaluate(model, hmqa_loader):
    
    all_acc = []
    all_se = []
    for i, (v_emb, b, q, c, c2s) in enumerate(hmqa_loader):
        v_emb = Variable(v_emb)
        q = Variable(q)
        c = Variable(c).float()
        
        if USE_CUDA:
            v_emb = v_emb.cuda()
            q = q.cuda()
            c = c.cuda()

        pred = model(v_emb, q)
        
        nearest_pred = (pred + 0.5).long().clamp(0, 20)
        for one_c, one_c2s, one_pred in zip(c, c2s, nearest_pred):
            one_c = one_c.cpu().data
            one_pred = one_pred.cpu().data
            
            all_se.append((one_c - one_pred.float()) ** 2)
            all_acc.append(one_c2s[one_pred])
    
    acc = torch.stack(all_acc).mean()
    rmse = torch.stack(all_se).mean() ** 0.5
    
    return acc, rmse

In [112]:
from model import SoftCount
model = SoftCount()
del SoftCount

initialising with glove embeddings
done.


In [113]:
# USE_CUDA = False

In [114]:
if USE_CUDA:
    model.cuda()
model

SoftCount(
  (ques_parser): QuestionParser(
    (embd): Embedding(20159, 300, padding_idx=20158)
    (rnn): GRU(300, 1024)
    (dropout): Dropout(p=0.1)
  )
  (f): ScoringFunction(
    (v_drop): Dropout(p=0.1)
    (q_drop): Dropout(p=0.1)
    (v_proj): FCNet(
      (main): Sequential(
        (0): Linear(in_features=2048, out_features=512, bias=True)
        (1): ReLU()
      )
    )
    (q_proj): FCNet(
      (main): Sequential(
        (0): Linear(in_features=1024, out_features=512, bias=True)
        (1): ReLU()
      )
    )
    (dropout): Dropout(p=0.1)
  )
  (W): Linear(in_features=512, out_features=1, bias=True)
)

In [115]:
test_acc, test_rmse = evaluate(model, hmqa_test_loader)
test_acc, test_rmse

(0.00298000009059906, 15.40445390138839)

In [116]:
opt = torch.optim.Adam(model.parameters(), lr=3e-4)

In [117]:
for epoch in range(2):
    for i, (v_emb, b, q, c, _) in enumerate(hmqa_dev_loader):
        v_emb = Variable(v_emb)
        q = Variable(q)
        c = Variable(c).float()
        
        if USE_CUDA:
            v_emb = v_emb.cuda()
            q = q.cuda()
            c = c.cuda()

        pred = model(v_emb, q)
        huber_loss = F.smooth_l1_loss(pred, c)
        mse_loss =  F.mse_loss(pred, c)
        loss =  huber_loss # + 0.1 * mse_loss
        
        if i % 100 == 0:
            print("epoch = {}, i = {}, loss = {}, huber = {}, mse = {}".format(epoch, i, loss.data[0], 
                                                                              huber_loss.data[0], mse_loss.data[0]))
        
        opt.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
        opt.step()
    
    
    print("evaluating model on train, dev and test...")
    
#     train_acc, train_rmse = evaluate(model, hmqa_train_loader)
#     print("train_acc: {}, train_rmse: {}".format(train_acc, train_rmse))
    
    model.eval()
#     dev_acc, dev_rmse = evaluate(model, hmqa_dev_loader)
#     print("dev_acc: {}, dev_rmse: {}".format(dev_acc, dev_rmse))
    test_acc, test_rmse = evaluate(model, hmqa_test_loader)
    print("test_acc: {}, test_rmse: {}".format(test_acc, test_rmse))
    model.train()
        

epoch = 0, i = 0, loss = 14.602848053, huber = 14.602848053, mse = 238.237670898
epoch = 0, i = 100, loss = 1.5097117424, huber = 1.5097117424, mse = 7.86944198608
epoch = 0, i = 200, loss = 1.23882031441, huber = 1.23882031441, mse = 8.64920711517
evaluating model on train, dev and test...
test_acc: 0.377700001514, test_rmse: 3.05054093564
epoch = 1, i = 0, loss = 0.956357479095, huber = 0.956357479095, mse = 6.0752196312
epoch = 1, i = 100, loss = 1.71862578392, huber = 1.71862578392, mse = 11.6909618378
epoch = 1, i = 200, loss = 1.71545946598, huber = 1.71545946598, mse = 10.1975259781
evaluating model on train, dev and test...
test_acc: 0.342000001669, test_rmse: 2.90926107457


In [108]:
# torch.save(model.ques_parser.state_dict(), "soft_count_ques_parser.pth")

In [109]:
# torch.save(model.f.state_dict(), "soft_count_f.pth")

In [111]:
# torch.save(model.state_dict(), "soft_count.pth")

In [45]:
from model import RhoScorer

In [46]:
rs = RhoScorer()

In [102]:
b = torch.from_numpy(np.array([
    [ #start 1st batch
        [0,0, 2, 2],
        [1,1, 3,3],
        [2,2,3,3],
    ] # 1st batch
]
))


In [103]:
b


(0 ,.,.) = 
  0  0  2  2
  1  1  3  3
  2  2  3  3
[torch.LongTensor of size 1x3x4]

In [104]:
b_ij, b_ji, iou, o_ij, o_ji = rs.get_spatials(b)

In [105]:
iou[0, :, :, 0], o_ij[0, :, :, 0], o_ji[0, :, :, 0]

(
  1.0000  0.1429  0.0000
  0.1429  1.0000  0.2500
  0.0000  0.2500  1.0000
 [torch.FloatTensor of size 3x3], 
  1.0000  0.2500  0.0000
  0.2500  1.0000  1.0000
  0.0000  0.2500  1.0000
 [torch.FloatTensor of size 3x3], 
  1.0000  0.2500  0.0000
  0.2500  1.0000  0.2500
  0.0000  1.0000  1.0000
 [torch.FloatTensor of size 3x3])