In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
import torch
from torch.autograd import Variable

from dataset import Dictionary, HMQAFeatureDataset
from model import SoftCount
from config import *
from datetime import datetime, timedelta

import h5py
import numpy as np
import cPickle as pkl
import json
import torch.nn.functional as F

  from ._conv import register_converters as _register_converters


In [4]:
dictionary = Dictionary.load_from_file('data/dictionary.pkl')

loading dictionary from data/dictionary.pkl


In [5]:
%%time
print('loading features from train hdf5 file')
train_h5_loc = './data/train36.hdf5'
with h5py.File(train_h5_loc, 'r') as hf:
    train_image_features = np.array(hf.get('image_features'))
    train_spatials_features = np.array(hf.get('spatial_features'))

loading features from train hdf5 file
CPU times: user 3.92 s, sys: 1min 32s, total: 1min 36s
Wall time: 3min 1s


In [6]:
from dataset import HMQAFeatureDataset

hmqa_train_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/train36_imgid2idx.pkl", "rb")),
    image_features = train_image_features, 
    spatial_features = train_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="train", 
    dictionary=dictionary
)
del HMQAFeatureDataset

In [7]:
len(hmqa_train_dset)

83642

In [9]:
len(set([x["image_id"] for x in hmqa_train_dset.entries]))

45546

In [8]:
%%time
print('loading features from val hdf5 file')
val_h5_loc = './data/val36.hdf5'
with h5py.File(val_h5_loc, 'r') as hf:
    val_image_features = np.array(hf.get('image_features'))
    val_spatials_features = np.array(hf.get('spatial_features'))

loading features from val hdf5 file
CPU times: user 2.41 s, sys: 57.7 s, total: 1min
Wall time: 1min 51s


In [8]:
# len(train_image_features)

In [10]:
from dataset import HMQAFeatureDataset

hmqa_dev_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/val36_imgid2idx.pkl", "rb")),
    image_features = val_image_features, 
    spatial_features = val_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="dev", 
    dictionary=dictionary
)

hmqa_test_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/val36_imgid2idx.pkl", "rb")),
    image_features = val_image_features, 
    spatial_features = val_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="test", 
    dictionary=dictionary
)
del HMQAFeatureDataset

In [12]:
len(hmqa_dev_dset), len(hmqa_test_dset)

(17714, 5000)

In [11]:
from torch.utils.data import DataLoader

hmqa_train_loader = DataLoader(hmqa_train_dset, 64, shuffle=True, num_workers=1)
hmqa_dev_loader = DataLoader(hmqa_dev_dset, 64, shuffle=True, num_workers=1)
hmqa_test_loader = DataLoader(hmqa_test_dset, 64, shuffle=True, num_workers=1)

In [13]:
def evaluate(model, hmqa_loader):
    
    all_acc = []
    all_se = []
    for i, (v_emb, b, q, c, c2s) in enumerate(hmqa_loader):
        v_emb = Variable(v_emb)
        q = Variable(q)
        c = Variable(c).float()
        
        if USE_CUDA:
            v_emb = v_emb.cuda()
            q = q.cuda()
            c = c.cuda()

        pred = model(v_emb, q)
        
        nearest_pred = (pred + 0.5).long().clamp(0, 20)
        for one_c, one_c2s, one_pred in zip(c, c2s, nearest_pred):
            one_c = one_c.cpu().data
            one_pred = one_pred.cpu().data
            
            all_se.append((one_c - one_pred.float()) ** 2)
            all_acc.append(one_c2s[one_pred])
    
    acc = torch.stack(all_acc).mean()
    rmse = torch.stack(all_se).mean() ** 0.5
    
    return acc, rmse

In [24]:
from model import SoftCount
model = SoftCount()
del SoftCount

initialising with glove embeddings
done.


In [26]:
# USE_CUDA = False

In [27]:
if USE_CUDA:
    model.cuda()
model

SoftCount(
  (ques_parser): QuestionParser(
    (embd): Embedding(20159, 300, padding_idx=20158)
    (rnn): GRU(300, 1024)
    (dropout): Dropout(p=0.1)
  )
  (f): ScoringFunction(
    (v_drop): Dropout(p=0.1)
    (q_drop): Dropout(p=0.1)
    (v_proj): FCNet(
      (main): Sequential(
        (0): Linear(in_features=2048, out_features=512, bias=True)
        (1): ReLU()
      )
    )
    (q_proj): FCNet(
      (main): Sequential(
        (0): Linear(in_features=1024, out_features=512, bias=True)
        (1): ReLU()
      )
    )
    (dropout): Dropout(p=0.1)
  )
  (W): Linear(in_features=512, out_features=1, bias=True)
)

In [29]:
test_acc, test_rmse = evaluate(model, hmqa_test_loader)
test_acc, test_rmse

(0.00298000009059906, 15.404155283559044)

In [30]:
opt = torch.optim.Adam(model.parameters(), lr=3e-4)

In [36]:
for epoch in range(2):
    for i, (v_emb, b, q, c, _) in enumerate(hmqa_train_loader):
        v_emb = Variable(v_emb)
        q = Variable(q)
        c = Variable(c).float()
        
        if USE_CUDA:
            v_emb = v_emb.cuda()
            q = q.cuda()
            c = c.cuda()

        pred = model(v_emb, q)
        huber_loss = F.smooth_l1_loss(pred, c)
        mse_loss =  F.mse_loss(pred, c)
        loss =  huber_loss # + 0.1 * mse_loss
        
        if i % 100 == 0:
            print("epoch = {}, i = {}, loss = {}, huber = {}, mse = {}".format(epoch, i, loss.data[0], 
                                                                              huber_loss.data[0], mse_loss.data[0]))
        
        opt.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
        opt.step()
    
    
    print("evaluating model on train, dev and test...")
    
#     train_acc, train_rmse = evaluate(model, hmqa_train_loader)
#     print("train_acc: {}, train_rmse: {}".format(train_acc, train_rmse))
    
    model.eval()
#     dev_acc, dev_rmse = evaluate(model, hmqa_dev_loader)
#     print("dev_acc: {}, dev_rmse: {}".format(dev_acc, dev_rmse))
    test_acc, test_rmse = evaluate(model, hmqa_test_loader)
    print("test_acc: {}, test_rmse: {}".format(test_acc, test_rmse))
    model.train()
        

epoch = 0, i = 0, loss = 0.550097882748, huber = 0.550097882748, mse = 3.87832069397
epoch = 0, i = 100, loss = 0.324541091919, huber = 0.324541091919, mse = 0.77025103569
epoch = 0, i = 200, loss = 0.413172006607, huber = 0.413172006607, mse = 1.46421694756
epoch = 0, i = 300, loss = 0.538510084152, huber = 0.538510084152, mse = 2.73510503769
epoch = 0, i = 400, loss = 0.418540328741, huber = 0.418540328741, mse = 1.4878578186
epoch = 0, i = 500, loss = 0.552978396416, huber = 0.552978396416, mse = 2.26761984825
epoch = 0, i = 600, loss = 0.399310112, huber = 0.399310112, mse = 2.21824645996
epoch = 0, i = 700, loss = 0.459556460381, huber = 0.459556460381, mse = 1.49132847786
epoch = 0, i = 800, loss = 0.519453048706, huber = 0.519453048706, mse = 1.84664309025
epoch = 0, i = 900, loss = 0.465825378895, huber = 0.465825378895, mse = 1.76220428944
epoch = 0, i = 1000, loss = 0.606564640999, huber = 0.606564640999, mse = 4.46711158752
epoch = 0, i = 1100, loss = 0.403664261103, huber =