In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
# os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [3]:
import torch
from torch.autograd import Variable

from dataset import Dictionary, HMQAFeatureDataset
from model import SoftCount
from config import *
from datetime import datetime, timedelta

import h5py
import numpy as np
import _pickle as pkl
import json
import torch.nn.functional as F

In [4]:
dictionary = Dictionary.load_from_file('data/dictionary.pkl')

loading dictionary from data/dictionary.pkl


In [5]:
%%time
print('loading features from train hdf5 file')
train_h5_loc = './data/train36.hdf5'
with h5py.File(train_h5_loc, 'r') as hf:
    train_image_features = np.array(hf.get('image_features'))
    train_spatials_features = np.array(hf.get('spatial_features'))
# np.save( open("/tmp/vqa/train_image_features", "wb"), train_image_features)
# np.save( open("/tmp/vqa/train_spatials_features", "wb"), train_spatials_features)

In [6]:
# %%time
# train_image_features = np.load(open("/tmp/vqa/train_image_features", "rb"))
# train_spatials_features = np.load(open("/tmp/vqa/train_spatials_features", "rb"))

CPU times: user 92 ms, sys: 42.6 s, total: 42.7 s
Wall time: 7min 15s


In [7]:
from dataset import HMQAFeatureDataset

hmqa_train_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/train36_imgid2idx.pkl", "rb")),
    image_features = train_image_features, 
    spatial_features = train_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="train", 
    dictionary=dictionary
)
del HMQAFeatureDataset

In [8]:
len(hmqa_train_dset)

83642

In [9]:
len(set([x["image_id"] for x in hmqa_train_dset.entries]))

45546

In [10]:
%%time
print('loading features from val hdf5 file')
val_h5_loc = './data/val36.hdf5'
with h5py.File(val_h5_loc, 'r') as hf:
    val_image_features = np.array(hf.get('image_features'))
    val_spatials_features = np.array(hf.get('spatial_features'))
# np.save( open("/tmp/vqa/val_image_features", "wb"), val_image_features)
# np.save( open("/tmp/vqa/val_spatials_features", "wb"), val_spatials_features)

In [11]:
# %%time
# val_image_features = np.load(open("/tmp/vqa/val_image_features", "rb"))
# val_spatials_features = np.load(open("/tmp/vqa/val_spatials_features", "rb"))

CPU times: user 24 ms, sys: 24.1 s, total: 24.1 s
Wall time: 1min 44s


In [12]:
# len(train_image_features)

from dataset import HMQAFeatureDataset

hmqa_dev_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/val36_imgid2idx.pkl", "rb")),
    image_features = val_image_features, 
    spatial_features = val_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="dev", 
    dictionary=dictionary
)

hmqa_test_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/val36_imgid2idx.pkl", "rb")),
    image_features = val_image_features, 
    spatial_features = val_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="test", 
    dictionary=dictionary
)
del HMQAFeatureDataset

In [13]:
len(hmqa_dev_dset), len(hmqa_test_dset)

(17714, 5000)

In [14]:
from torch.utils.data import DataLoader

hmqa_train_loader = DataLoader(hmqa_train_dset, 64, shuffle=True, num_workers=0)
hmqa_dev_loader = DataLoader(hmqa_dev_dset, 64, shuffle=True, num_workers=0)
hmqa_test_loader = DataLoader(hmqa_test_dset, 64, shuffle=True, num_workers=0)

In [42]:
def evaluate(model, hmqa_loader):
    
    all_acc = []
    all_se = []
    for i, (v_emb, b, q, c, c2s) in enumerate(hmqa_loader):
        v_emb = Variable(v_emb)
        b = Variable(b)
        q = Variable(q)
        c = Variable(c).view(-1).float()
        
        if USE_CUDA:
            v_emb = v_emb.cuda()
            b = b.cuda()
            q = q.cuda()
            c = c.cuda()
        
        kappa_0, rho = model.compute_vars(v_emb, b, q)
        count, greedy_count, logPA, entP, A, rho, P = model.take_mc_samples(kappa_0, rho, 1)
        
#         pred = model(v_emb, q)
        
#         ret = torch.stack((count.float(), greedy_count.float(), c.float()))
#         break
    
        nearest_pred = (greedy_count + 0.5).long().clamp(0, 20)
        for one_c, one_c2s, one_pred in zip(c, c2s, nearest_pred):
            one_c = one_c.cpu().data
            one_pred = one_pred.cpu().data
            
#             print("one_c = ", one_c, " and nearest pred = ", one_pred)
            
            all_se.append((one_c - one_pred.float()) ** 2)
            all_acc.append(one_c2s[one_pred])
    
    acc = torch.stack(all_acc).mean()
    rmse = torch.stack(all_se).mean() ** 0.5
    
    return acc, rmse  #, ret

In [43]:
def isnan(x):
    check = (x != x)
    check = check.float().sum().data[0]
    return check > 0

In [47]:
from model import IRLC
model = IRLC(dropout=0.5)
del IRLC

question parser has zero dropout
initialising with glove embeddings
done.


In [49]:
if USE_CUDA:
    model.cuda()
model

IRLC(
  (ques_parser): QuestionParser(
    (embd): Embedding(20159, 300, padding_idx=20158)
    (rnn): GRU(300, 1024)
    (drop): Dropout(p=0)
  )
  (f_s): ScoringFunction(
    (v_drop): Dropout(p=0.5)
    (q_drop): Dropout(p=0.5)
    (v_proj): FCNet(
      (main): Sequential(
        (0): Linear(in_features=2048, out_features=2048, bias=True)
        (1): LeakyReLU(negative_slope=0.01)
      )
    )
    (q_proj): FCNet(
      (main): Sequential(
        (0): Linear(in_features=1024, out_features=2048, bias=True)
        (1): LeakyReLU(negative_slope=0.01)
      )
    )
    (s_drop): Dropout(p=0.5)
  )
  (W): Linear(in_features=2048, out_features=1, bias=True)
  (f_rho): RhoScorer(
    (W): Linear(in_features=1024, out_features=1, bias=True)
    (f_rho): FCNet(
      (main): Sequential(
        (0): Linear(in_features=17, out_features=100, bias=True)
        (1): LeakyReLU(negative_slope=0.01)
      )
    )
    (dense): Linear(in_features=100, out_features=1, bias=True)
  )
  (extra_pa

In [50]:
test_acc, test_rmse = evaluate(model, hmqa_test_loader)
test_acc, test_rmse

(tensor(1.00000e-02 *
        1.2440), tensor(17.4335))

In [51]:
opt = torch.optim.Adam(model.parameters(), lr=5e-4)
sched = torch.optim.lr_scheduler.ExponentialLR(opt, gamma=.99)

In [52]:
test_accs = []
test_rmses = []

dev_accs = []
dev_rmses = []

In [53]:
exp_name = "hope-2048-0.5-ques0"

In [54]:
for epoch in range(0, 100):
    # adjust learning rate
    sched.step()
    print("learning rate is {}".format(opt.param_groups[0]["lr"]))
    
    # save model every 10 epochs
    if epoch % 10 == 0:
        print("saving model..")
        torch.save(model.state_dict(), "./saved_models/{}-epoch-{}-acc-{}".format(exp_name, epoch, test_acc))
        print("Done.")
    
    for i, (v_emb, b, q, c, _) in enumerate(hmqa_train_loader):
        v_emb = Variable(v_emb)
        b = Variable(b)
        q = Variable(q)
        c = Variable(c).view(-1).float()

        if USE_CUDA:
            v_emb = v_emb.cuda()
            q = q.cuda()
            c = c.cuda()
            b = b.cuda()

        B, k, _ = v_emb.size()

        kappa_0, rho = model.compute_vars(v_emb, b, q)
        if isnan(kappa_0) or isnan(rho) or isnan(model.eps):
            raise Exception("there are nans here")

        num_samples = 32
        count, greedy_count, logPA, entP, A, rho, P = model.take_mc_samples(kappa_0, rho, num_samples)
        c_gt = torch.cat([c] * num_samples)

        loss = model.get_loss(c_gt, count, greedy_count, logPA, entP, A, rho)

        if i % 100 == 0:
            print("epoch = {}, i = {}, loss = {},".format(epoch, i, loss.data[0],))

        opt.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
        opt.step()


    print("evaluating model on dev and test...")

    model.eval()
    dev_acc, dev_rmse = evaluate(model, hmqa_dev_loader)
    print("dev_acc: {}, dev_rmse: {}".format(dev_acc, dev_rmse))
    test_acc, test_rmse = evaluate(model, hmqa_test_loader)
    print("test_acc: {}, test_rmse: {}".format(test_acc, test_rmse))
    model.train()
    
    test_accs.append(test_acc)
    test_rmses.append(test_rmse)
    dev_accs.append(dev_acc)
    dev_rmses.append(dev_rmse)

learning rate is 0.0005
saving model..
Done.


  This is separate from the ipykernel package so we can avoid doing imports until


epoch = 0, i = 0, loss = 30.2487850189209,
epoch = 0, i = 100, loss = -0.024106265977025032,
epoch = 0, i = 200, loss = 0.017748812213540077,
epoch = 0, i = 300, loss = 0.062310248613357544,
epoch = 0, i = 400, loss = -0.05417848378419876,
epoch = 0, i = 500, loss = 0.0579473152756691,
epoch = 0, i = 600, loss = 0.04424819350242615,
epoch = 0, i = 700, loss = -0.005923263728618622,
epoch = 0, i = 800, loss = -0.13275228440761566,
epoch = 0, i = 900, loss = -0.0817226842045784,
epoch = 0, i = 1000, loss = -0.09168244898319244,
epoch = 0, i = 1100, loss = 0.008359390310943127,
epoch = 0, i = 1200, loss = -0.026331951841711998,
epoch = 0, i = 1300, loss = -0.06636011600494385,
evaluating model on dev and test...
dev_acc: 0.38167551159858704, dev_rmse: 3.376408576965332
test_acc: 0.3896600008010864, test_rmse: 3.150238037109375
learning rate is 0.000495
epoch = 1, i = 0, loss = -0.05858936905860901,
epoch = 1, i = 100, loss = -0.07893369346857071,
epoch = 1, i = 200, loss = -0.117283083498

epoch = 9, i = 600, loss = 0.0030097588896751404,
epoch = 9, i = 700, loss = -0.009353281930088997,
epoch = 9, i = 800, loss = -0.06197524070739746,
epoch = 9, i = 900, loss = -0.09363559633493423,
epoch = 9, i = 1000, loss = -0.051551178097724915,
epoch = 9, i = 1100, loss = 0.0009079091250896454,
epoch = 9, i = 1200, loss = -0.008880775421857834,
epoch = 9, i = 1300, loss = -0.0353664755821228,
evaluating model on dev and test...
dev_acc: 0.4946313798427582, dev_rmse: 2.7893266677856445
test_acc: 0.4986400008201599, test_rmse: 2.530019760131836
learning rate is 0.0004521910375044022
saving model..
Done.
epoch = 10, i = 0, loss = -0.04381684958934784,
epoch = 10, i = 100, loss = -0.013822140172123909,
epoch = 10, i = 200, loss = -0.04546855390071869,
epoch = 10, i = 300, loss = -0.02390332706272602,
epoch = 10, i = 400, loss = -0.058667588979005814,
epoch = 10, i = 500, loss = -0.04164177179336548,
epoch = 10, i = 600, loss = -0.10120292752981186,
epoch = 10, i = 700, loss = -0.034194

epoch = 18, i = 800, loss = 0.00030883029103279114,
epoch = 18, i = 900, loss = -0.034065745770931244,
epoch = 18, i = 1000, loss = -0.04183529317378998,
epoch = 18, i = 1100, loss = -0.03222022205591202,
epoch = 18, i = 1200, loss = 0.009185872972011566,
epoch = 18, i = 1300, loss = 0.006621114909648895,
evaluating model on dev and test...
dev_acc: 0.5143897533416748, dev_rmse: 2.7052109241485596
test_acc: 0.5254600048065186, test_rmse: 2.477942705154419
learning rate is 0.00041308431191779333
epoch = 19, i = 0, loss = -0.04666287451982498,
epoch = 19, i = 100, loss = -0.002405330538749695,
epoch = 19, i = 200, loss = 0.00466554332524538,
epoch = 19, i = 300, loss = -0.029950443655252457,
epoch = 19, i = 400, loss = -0.01406443677842617,
epoch = 19, i = 500, loss = -0.027637267485260963,
epoch = 19, i = 600, loss = 0.00186951644718647,
epoch = 19, i = 700, loss = -0.08153592050075531,
epoch = 19, i = 800, loss = -0.005767049267888069,
epoch = 19, i = 900, loss = -0.06049109250307083,


epoch = 27, i = 1000, loss = 0.04141880199313164,
epoch = 27, i = 1100, loss = -0.018373902887105942,
epoch = 27, i = 1200, loss = -0.04682707041501999,
epoch = 27, i = 1300, loss = -0.08595170080661774,
evaluating model on dev and test...
dev_acc: 0.522693932056427, dev_rmse: 2.705148458480835
test_acc: 0.5418599843978882, test_rmse: 2.4744696617126465
learning rate is 0.0003773596436018163
epoch = 28, i = 0, loss = 0.009713897481560707,
epoch = 28, i = 100, loss = -0.021301165223121643,
epoch = 28, i = 200, loss = 0.017959140241146088,
epoch = 28, i = 300, loss = -0.005610305815935135,
epoch = 28, i = 400, loss = -0.07971067726612091,
epoch = 28, i = 500, loss = -0.013644076883792877,
epoch = 28, i = 600, loss = 0.0033105751499533653,
epoch = 28, i = 700, loss = -0.013679862022399902,
epoch = 28, i = 800, loss = -0.08829423040151596,
epoch = 28, i = 900, loss = -0.019435370340943336,
epoch = 28, i = 1000, loss = -0.019611265510320663,
epoch = 28, i = 1100, loss = -0.00965434033423662

epoch = 36, i = 1200, loss = -0.01729520782828331,
epoch = 36, i = 1300, loss = -0.013344522565603256,
evaluating model on dev and test...
dev_acc: 0.5291577577590942, dev_rmse: 2.7035515308380127
test_acc: 0.5488799810409546, test_rmse: 2.429485559463501
learning rate is 0.0003447245429345389
epoch = 37, i = 0, loss = -0.04100264608860016,
epoch = 37, i = 100, loss = -0.03313612565398216,
epoch = 37, i = 200, loss = -0.006439638324081898,
epoch = 37, i = 300, loss = -0.044139184057712555,
epoch = 37, i = 400, loss = -0.027420181781053543,
epoch = 37, i = 500, loss = -0.03493750840425491,
epoch = 37, i = 600, loss = 0.006597659084945917,
epoch = 37, i = 700, loss = -0.004947527311742306,
epoch = 37, i = 800, loss = 0.01571037247776985,
epoch = 37, i = 900, loss = -0.02694648876786232,
epoch = 37, i = 1000, loss = -0.04767255485057831,
epoch = 37, i = 1100, loss = -0.00882795825600624,
epoch = 37, i = 1200, loss = -0.010662954300642014,
epoch = 37, i = 1300, loss = -0.056903935968875885

evaluating model on dev and test...
dev_acc: 0.5279327034950256, dev_rmse: 2.6872990131378174
test_acc: 0.5420600175857544, test_rmse: 2.4211153984069824
learning rate is 0.00031491181560161616
epoch = 46, i = 0, loss = -0.025710690766572952,
epoch = 46, i = 100, loss = -0.015931660309433937,
epoch = 46, i = 200, loss = -0.021366320550441742,
epoch = 46, i = 300, loss = -0.00349352415651083,
epoch = 46, i = 400, loss = -0.02429555356502533,
epoch = 46, i = 500, loss = -0.032621659338474274,
epoch = 46, i = 600, loss = -0.021857228130102158,
epoch = 46, i = 700, loss = -0.08524785190820694,
epoch = 46, i = 800, loss = -0.013290014117956161,
epoch = 46, i = 900, loss = -0.034107040613889694,
epoch = 46, i = 1000, loss = 0.02143169194459915,
epoch = 46, i = 1100, loss = -0.07772921025753021,
epoch = 46, i = 1200, loss = -0.020162265747785568,
epoch = 46, i = 1300, loss = -0.00687435083091259,
evaluating model on dev and test...
dev_acc: 0.5300666093826294, dev_rmse: 2.676964521408081
test

test_acc: 0.5460200309753418, test_rmse: 2.4256956577301025
learning rate is 0.00028767737498846423
epoch = 55, i = 0, loss = 0.018114324659109116,
epoch = 55, i = 100, loss = -0.013547111302614212,
epoch = 55, i = 200, loss = -0.031239503994584084,
epoch = 55, i = 300, loss = -0.045351170003414154,
epoch = 55, i = 400, loss = 0.011006874963641167,
epoch = 55, i = 500, loss = -0.013206337578594685,
epoch = 55, i = 600, loss = -0.10692227631807327,
epoch = 55, i = 700, loss = -0.017103448510169983,
epoch = 55, i = 800, loss = 0.00289778970181942,
epoch = 55, i = 900, loss = -0.018710000440478325,
epoch = 55, i = 1000, loss = -0.0069792307913303375,
epoch = 55, i = 1100, loss = 0.013171534053981304,
epoch = 55, i = 1200, loss = -0.24587292969226837,
epoch = 55, i = 1300, loss = -0.05601205304265022,
evaluating model on dev and test...
dev_acc: 0.5329739451408386, dev_rmse: 2.645207166671753
test_acc: 0.5514400005340576, test_rmse: 2.4300618171691895
learning rate is 0.0002848006012385796

epoch = 64, i = 100, loss = -0.02249692566692829,
epoch = 64, i = 200, loss = -0.03022979013621807,
epoch = 64, i = 300, loss = -0.03779994696378708,
epoch = 64, i = 400, loss = -0.018118757754564285,
epoch = 64, i = 500, loss = 0.0054378872737288475,
epoch = 64, i = 600, loss = -0.004908433184027672,
epoch = 64, i = 700, loss = 0.004074041731655598,
epoch = 64, i = 800, loss = -0.024843929335474968,
epoch = 64, i = 900, loss = -0.03281255066394806,
epoch = 64, i = 1000, loss = -0.034224823117256165,
epoch = 64, i = 1100, loss = -0.051137737929821014,
epoch = 64, i = 1200, loss = -0.04739660397171974,
epoch = 64, i = 1300, loss = -0.011089876294136047,
evaluating model on dev and test...
dev_acc: 0.5329005122184753, dev_rmse: 2.6678812503814697
test_acc: 0.5543599724769592, test_rmse: 2.4200000762939453
learning rate is 0.0002601702613251532
epoch = 65, i = 0, loss = -0.012944592162966728,
epoch = 65, i = 100, loss = -0.03384072333574295,
epoch = 65, i = 200, loss = -0.0284746438264846

epoch = 73, i = 300, loss = -0.07029590755701065,
epoch = 73, i = 400, loss = -0.029555322602391243,
epoch = 73, i = 500, loss = -0.0038061346858739853,
epoch = 73, i = 600, loss = -0.024913037195801735,
epoch = 73, i = 700, loss = -0.043584201484918594,
epoch = 73, i = 800, loss = -0.024620238691568375,
epoch = 73, i = 900, loss = 0.0008479291573166847,
epoch = 73, i = 1000, loss = -0.011167029850184917,
epoch = 73, i = 1100, loss = -0.04160452261567116,
epoch = 73, i = 1200, loss = -0.036524929106235504,
epoch = 73, i = 1300, loss = -0.022668717429041862,
evaluating model on dev and test...
dev_acc: 0.5329626202583313, dev_rmse: 2.6623740196228027
test_acc: 0.5481799840927124, test_rmse: 2.4106016159057617
learning rate is 0.00023767002100285347
epoch = 74, i = 0, loss = -0.020837515592575073,
epoch = 74, i = 100, loss = -0.006533347070217133,
epoch = 74, i = 200, loss = -0.04083200544118881,
epoch = 74, i = 300, loss = -0.033668696880340576,
epoch = 74, i = 400, loss = -0.0146744195

epoch = 82, i = 400, loss = -0.014639953151345253,
epoch = 82, i = 500, loss = -0.00796202290803194,
epoch = 82, i = 600, loss = -0.023646743968129158,
epoch = 82, i = 700, loss = -0.025705022737383842,
epoch = 82, i = 800, loss = -0.005390476435422897,
epoch = 82, i = 900, loss = -0.023340506479144096,
epoch = 82, i = 1000, loss = -0.023358561098575592,
epoch = 82, i = 1100, loss = 0.00021937862038612366,
epoch = 82, i = 1200, loss = -0.07192349433898926,
epoch = 82, i = 1300, loss = -0.13106100261211395,
evaluating model on dev and test...
dev_acc: 0.534272313117981, dev_rmse: 2.6693832874298096
test_acc: 0.5488200187683105, test_rmse: 2.4290740489959717
learning rate is 0.00021711566339590582
epoch = 83, i = 0, loss = 0.0003622695803642273,
epoch = 83, i = 100, loss = -0.04625536873936653,
epoch = 83, i = 200, loss = -0.05889501795172691,
epoch = 83, i = 300, loss = -0.2590397000312805,
epoch = 83, i = 400, loss = -0.024432644248008728,
epoch = 83, i = 500, loss = -0.066413342952728

epoch = 91, i = 600, loss = -0.017051927745342255,
epoch = 91, i = 700, loss = -0.02621205896139145,
epoch = 91, i = 800, loss = -0.00974722858518362,
epoch = 91, i = 900, loss = -0.01396472193300724,
epoch = 91, i = 1000, loss = -0.02055794931948185,
epoch = 91, i = 1100, loss = -0.13720251619815826,
epoch = 91, i = 1200, loss = 0.005937190726399422,
epoch = 91, i = 1300, loss = 0.008594583719968796,
evaluating model on dev and test...
dev_acc: 0.5360731482505798, dev_rmse: 2.664874792098999
test_acc: 0.5521600246429443, test_rmse: 2.4010415077209473
learning rate is 0.00019833890321101254
epoch = 92, i = 0, loss = -0.01717349886894226,
epoch = 92, i = 100, loss = -0.013177014887332916,
epoch = 92, i = 200, loss = -0.018595857545733452,
epoch = 92, i = 300, loss = -0.05108370631933212,
epoch = 92, i = 400, loss = -0.027635453268885612,
epoch = 92, i = 500, loss = 0.0016302159056067467,
epoch = 92, i = 600, loss = 0.016829008236527443,
epoch = 92, i = 700, loss = -0.021041076630353928,

In [39]:
torch.save(model.state_dict(), "./saved_models/{}-epoch-99-acc".format(exp_name))

In [40]:
len(test_accs), len(dev_accs)

(100, 100)

In [None]:
top_dev_accs = sorted(zip(dev_accs, test_accs, test_rmses), reverse=True)
best_dev_acc, corr_test_acc, corr_test_rmse = top_dev_accs[0]
print("The best dev accuracy is {}. The corresponding test accuracy and test RMSE are {} and {} respectively".format(
    best_dev_acc, corr_test_acc, corr_test_rmse
))