In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
# os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [3]:
import torch
from torch.autograd import Variable

from dataset import Dictionary, HMQAFeatureDataset
from model import SoftCount
from config import *
from datetime import datetime, timedelta

import h5py
import numpy as np
import _pickle as pkl
import json
import torch.nn.functional as F

In [4]:
dictionary = Dictionary.load_from_file('data/dictionary.pkl')

loading dictionary from data/dictionary.pkl


In [5]:
%%time
print('loading features from train hdf5 file')
train_h5_loc = './data/train36.hdf5'
with h5py.File(train_h5_loc, 'r') as hf:
    train_image_features = np.array(hf.get('image_features'))
    train_spatials_features = np.array(hf.get('spatial_features'))

# # can save time by writing the numpy array to disk
# np.save( open("/tmp/vqa/train_image_features", "wb"), train_image_features)
# np.save( open("/tmp/vqa/train_spatials_features", "wb"), train_spatials_features)

In [6]:
# %%time
# train_image_features = np.load(open("/tmp/vqa/train_image_features", "rb"))
# train_spatials_features = np.load(open("/tmp/vqa/train_spatials_features", "rb"))

CPU times: user 72 ms, sys: 15.9 s, total: 16 s
Wall time: 1min 26s


In [83]:
from dataset import HMQAFeatureDataset

hmqa_train_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/train36_imgid2idx.pkl", "rb")),
    image_features = train_image_features, 
    spatial_features = train_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="train", 
    dictionary=dictionary
)
del HMQAFeatureDataset

In [84]:
len(hmqa_train_dset)

83642

In [85]:
len(set([x["image_id"] for x in hmqa_train_dset.entries]))

45546

In [86]:
%%time
print('loading features from val hdf5 file')
val_h5_loc = './data/val36.hdf5'
with h5py.File(val_h5_loc, 'r') as hf:
    val_image_features = np.array(hf.get('image_features'))
    val_spatials_features = np.array(hf.get('spatial_features'))
    
# # can save time by writing the numpy array to disk
# np.save( open("/tmp/vqa/val_image_features", "wb"), val_image_features)
# np.save( open("/tmp/vqa/val_spatials_features", "wb"), val_spatials_features)

In [11]:
# %%time
# val_image_features = np.load(open("/tmp/vqa/val_image_features", "rb"))
# val_spatials_features = np.load(open("/tmp/vqa/val_spatials_features", "rb"))

CPU times: user 104 ms, sys: 20.3 s, total: 20.4 s
Wall time: 58.9 s


In [87]:
# len(train_image_features)

from dataset import HMQAFeatureDataset

hmqa_dev_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/val36_imgid2idx.pkl", "rb")),
    image_features = val_image_features, 
    spatial_features = val_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="dev", 
    dictionary=dictionary
)

hmqa_test_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/val36_imgid2idx.pkl", "rb")),
    image_features = val_image_features, 
    spatial_features = val_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="test", 
    dictionary=dictionary
)
del HMQAFeatureDataset

In [88]:
len(hmqa_dev_dset), len(hmqa_test_dset)

(17714, 5000)

In [89]:
from torch.utils.data import DataLoader

hmqa_train_loader = DataLoader(hmqa_train_dset, 64, shuffle=True, num_workers=0)
hmqa_dev_loader = DataLoader(hmqa_dev_dset, 64, shuffle=True, num_workers=0)
hmqa_test_loader = DataLoader(hmqa_test_dset, 64, shuffle=True, num_workers=0)

In [164]:
def evaluate(model, hmqa_loader):
    
    all_acc = []
    all_se = []
    for i, (v_emb, b, q, c, c2s) in enumerate(hmqa_loader):
        v_emb = Variable(v_emb)
        b = Variable(b)
        q = Variable(q)
        c = Variable(c).view(-1).float()
        
        if USE_CUDA:
            v_emb = v_emb.cuda()
            b = b.cuda()
            q = q.cuda()
            c = c.cuda()
        
        kappa_0, rho = model.compute_vars(v_emb, b, q)
        count, greedy_count, logPA, entP, A, rho, P = model.take_mc_samples(kappa_0, rho, 1)
    
        nearest_pred = (greedy_count + 0.5).long().clamp(0, 20)
        for one_c, one_c2s, one_pred in zip(c, c2s, nearest_pred):
            one_c = one_c.cpu().data
            one_pred = one_pred.cpu().data
            
            all_se.append((one_c - one_pred.float()) ** 2)
            all_acc.append(one_c2s[one_pred])
    
    acc = torch.stack(all_acc).mean()
    rmse = torch.stack(all_se).mean() ** 0.5
    
    return acc, rmse

In [165]:
test_acc, test_rmse = evaluate(model, hmqa_test_loader)
test_acc, test_rmse

(tensor(0.5123), tensor(2.4301))

In [91]:
def isnan(x):
    check = (x != x)
    check = check.float().sum().data[0]
    return check > 0

In [147]:
from model import IRLC
model = IRLC()
del IRLC

initialising with glove embeddings
done.


In [149]:
if USE_CUDA:
    model.cuda()
model

IRLC(
  (ques_parser): QuestionParser(
    (embd): Embedding(20159, 300, padding_idx=20158)
    (rnn): GRU(300, 1024)
    (dropout): Dropout(p=0.3)
  )
  (f_s): ScoringFunction(
    (v_drop): Dropout(p=0.3)
    (q_drop): Dropout(p=0.3)
    (v_proj): FCNet(
      (main): Sequential(
        (0): Linear(in_features=2048, out_features=1024, bias=True)
        (1): LeakyReLU(negative_slope=0.01)
      )
    )
    (q_proj): FCNet(
      (main): Sequential(
        (0): Linear(in_features=1024, out_features=1024, bias=True)
        (1): LeakyReLU(negative_slope=0.01)
      )
    )
    (dropout): Dropout(p=0.3)
  )
  (W): Linear(in_features=1024, out_features=1, bias=True)
  (f_rho): RhoScorer(
    (W): Linear(in_features=1024, out_features=1, bias=True)
    (f_rho): FCNet(
      (main): Sequential(
        (0): Linear(in_features=17, out_features=100, bias=True)
        (1): LeakyReLU(negative_slope=0.01)
      )
    )
    (dense): Linear(in_features=100, out_features=1, bias=True)
  )
  (ex

In [150]:
test_acc, test_rmse = evaluate(model, hmqa_test_loader)
test_acc, test_rmse

(tensor(1.00000e-02 *
        1.3900), tensor(17.3969))

In [151]:
opt = torch.optim.Adam(model.parameters(), lr=5e-4)
sched = torch.optim.lr_scheduler.ExponentialLR(opt, gamma=.99)

In [152]:
test_accs = []
test_rmses = []

dev_accs = []
dev_rmses = []

In [153]:
exp_name = "hope"

In [154]:
for epoch in range(0, 100):
    # adjust learning rate
    sched.step()
    print("learning rate is {}".format(opt.param_groups[0]["lr"]))
    
    # save model every 10 epochs
    if epoch % 10 == 0:
        print("saving model..")
        torch.save(model.state_dict(), "./saved_models/{}-epoch-{}-acc-{}".format(exp_name, epoch, test_acc))
        print("Done.")
    
    for i, (v_emb, b, q, c, _) in enumerate(hmqa_train_loader):
        v_emb = Variable(v_emb)
        b = Variable(b)
        q = Variable(q)
        c = Variable(c).view(-1).float()

        if USE_CUDA:
            v_emb = v_emb.cuda()
            q = q.cuda()
            c = c.cuda()
            b = b.cuda()

        B, k, _ = v_emb.size()

        kappa_0, rho = model.compute_vars(v_emb, b, q)
        if isnan(kappa_0) or isnan(rho) or isnan(model.eps):
            raise Exception("there are nans here")

        num_samples = 32
        count, greedy_count, logPA, entP, A, rho = model.take_mc_samples(kappa_0, rho, num_samples)
        c_gt = torch.cat([c] * num_samples)

        loss = model.get_loss(c_gt, count, greedy_count, logPA, entP, A, rho)

        if i % 100 == 0:
            print("epoch = {}, i = {}, loss = {},".format(epoch, i, loss.data[0],))

        opt.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
        opt.step()


    print("evaluating model on dev and test...")

    model.eval()
    dev_acc, dev_rmse = evaluate(model, hmqa_dev_loader)
    print("dev_acc: {}, dev_rmse: {}".format(dev_acc, dev_rmse))
    test_acc, test_rmse = evaluate(model, hmqa_test_loader)
    print("test_acc: {}, test_rmse: {}".format(test_acc, test_rmse))
    model.train()
    
    test_accs.append(test_acc)
    test_rmses.append(test_rmse)
    dev_accs.append(dev_acc)
    dev_rmses.append(dev_rmse)

learning rate is 0.0005
saving model..
Done.
epoch = 0, i = 0, loss = 25.920881271362305,


  This is separate from the ipykernel package so we can avoid doing imports until


epoch = 0, i = 100, loss = -0.145143523812294,
epoch = 0, i = 200, loss = -0.045947037637233734,
epoch = 0, i = 300, loss = -0.016431093215942383,
epoch = 0, i = 400, loss = -0.05430900678038597,
epoch = 0, i = 500, loss = -0.0850566029548645,
epoch = 0, i = 600, loss = 0.08252079784870148,
epoch = 0, i = 700, loss = -0.03528562933206558,
epoch = 0, i = 800, loss = 0.00215788371860981,
epoch = 0, i = 900, loss = -0.07773107290267944,
epoch = 0, i = 1000, loss = 0.02449709363281727,
epoch = 0, i = 1100, loss = 0.03509727120399475,
epoch = 0, i = 1200, loss = 0.0007887259125709534,
epoch = 0, i = 1300, loss = -0.06152818351984024,
evaluating model on dev and test...
dev_acc: 0.3994637131690979, dev_rmse: 3.288179874420166
test_acc: 0.3989199995994568, test_rmse: 3.076686382293701
learning rate is 0.000495
epoch = 1, i = 0, loss = -0.04450586065649986,
epoch = 1, i = 100, loss = -0.0965864360332489,
epoch = 1, i = 200, loss = -0.0027171634137630463,
epoch = 1, i = 300, loss = 0.0592605285

epoch = 9, i = 700, loss = -0.043922509998083115,
epoch = 9, i = 800, loss = -0.01657542586326599,
epoch = 9, i = 900, loss = 0.012701405212283134,
epoch = 9, i = 1000, loss = 0.008855561725795269,
epoch = 9, i = 1100, loss = -0.030450114980340004,
epoch = 9, i = 1200, loss = 0.015750844031572342,
epoch = 9, i = 1300, loss = -0.05025618150830269,
evaluating model on dev and test...
dev_acc: 0.4850739538669586, dev_rmse: 2.802299976348877
test_acc: 0.49248000979423523, test_rmse: 2.559257745742798
learning rate is 0.0004521910375044022
saving model..
Done.
epoch = 10, i = 0, loss = 0.01868540607392788,
epoch = 10, i = 100, loss = -0.0336083248257637,
epoch = 10, i = 200, loss = -0.034114524722099304,
epoch = 10, i = 300, loss = -0.017372826114296913,
epoch = 10, i = 400, loss = -0.07143858820199966,
epoch = 10, i = 500, loss = 0.0022318996489048004,
epoch = 10, i = 600, loss = 0.006267305463552475,
epoch = 10, i = 700, loss = -0.03275662660598755,
epoch = 10, i = 800, loss = -0.09717507

epoch = 18, i = 900, loss = -0.06500133126974106,
epoch = 18, i = 1000, loss = -0.0053459350019693375,
epoch = 18, i = 1100, loss = -0.014356705360114574,
epoch = 18, i = 1200, loss = -0.05406222492456436,
epoch = 18, i = 1300, loss = -0.04093381017446518,
evaluating model on dev and test...
dev_acc: 0.5031331181526184, dev_rmse: 2.7176926136016846
test_acc: 0.5157399773597717, test_rmse: 2.428662061691284
learning rate is 0.00041308431191779333
epoch = 19, i = 0, loss = -0.017954694107174873,
epoch = 19, i = 100, loss = -0.07849812507629395,
epoch = 19, i = 200, loss = -0.0751354843378067,
epoch = 19, i = 300, loss = -0.03401581197977066,
epoch = 19, i = 400, loss = -0.06277039647102356,
epoch = 19, i = 500, loss = -0.021672990173101425,
epoch = 19, i = 600, loss = -0.046307358890771866,
epoch = 19, i = 700, loss = -0.022844599559903145,
epoch = 19, i = 800, loss = 0.010144227184355259,
epoch = 19, i = 900, loss = 0.005716989748179913,
epoch = 19, i = 1000, loss = 0.03554863482713699,

epoch = 27, i = 1100, loss = -0.039398789405822754,
epoch = 27, i = 1200, loss = -0.013105392456054688,
epoch = 27, i = 1300, loss = -0.018434884026646614,
evaluating model on dev and test...
dev_acc: 0.5058541297912598, dev_rmse: 2.747904062271118
test_acc: 0.5290600061416626, test_rmse: 2.469534397125244
learning rate is 0.0003773596436018163
epoch = 28, i = 0, loss = -0.07565617561340332,
epoch = 28, i = 100, loss = -0.011613212525844574,
epoch = 28, i = 200, loss = -0.020312421023845673,
epoch = 28, i = 300, loss = 0.009708323515951633,
epoch = 28, i = 400, loss = -0.03567878156900406,
epoch = 28, i = 500, loss = -0.020723599940538406,
epoch = 28, i = 600, loss = -0.023331794887781143,
epoch = 28, i = 700, loss = -0.013431837782263756,
epoch = 28, i = 800, loss = -0.0037353895604610443,
epoch = 28, i = 900, loss = -0.005943372845649719,
epoch = 28, i = 1000, loss = -0.03963933885097504,
epoch = 28, i = 1100, loss = 0.014241203665733337,
epoch = 28, i = 1200, loss = 0.01425126660615

epoch = 36, i = 1300, loss = -0.008532263338565826,
evaluating model on dev and test...
dev_acc: 0.5132155418395996, dev_rmse: 2.695950508117676
test_acc: 0.5286399722099304, test_rmse: 2.417478084564209
learning rate is 0.0003447245429345389
epoch = 37, i = 0, loss = -0.021824155002832413,
epoch = 37, i = 100, loss = -0.2001992017030716,
epoch = 37, i = 200, loss = -0.03031623549759388,
epoch = 37, i = 300, loss = -0.02917531505227089,
epoch = 37, i = 400, loss = -0.012665405869483948,
epoch = 37, i = 500, loss = -0.018038950860500336,
epoch = 37, i = 600, loss = -0.02227955497801304,
epoch = 37, i = 700, loss = 0.009936317801475525,
epoch = 37, i = 800, loss = -0.008463053032755852,
epoch = 37, i = 900, loss = -0.06566209346055984,
epoch = 37, i = 1000, loss = -0.035163529217243195,
epoch = 37, i = 1100, loss = -0.03164765238761902,
epoch = 37, i = 1200, loss = -0.021939050406217575,
epoch = 37, i = 1300, loss = -0.03420758247375488,
evaluating model on dev and test...
dev_acc: 0.512

dev_acc: 0.5184712409973145, dev_rmse: 2.70088791847229
test_acc: 0.538919985294342, test_rmse: 2.393449306488037
learning rate is 0.00031491181560161616
epoch = 46, i = 0, loss = -0.028595328330993652,
epoch = 46, i = 100, loss = -0.01717653125524521,
epoch = 46, i = 200, loss = -0.027322886511683464,
epoch = 46, i = 300, loss = -0.032262589782476425,
epoch = 46, i = 400, loss = -0.08879825472831726,
epoch = 46, i = 500, loss = -0.0062776487320661545,
epoch = 46, i = 600, loss = -0.033306438475847244,
epoch = 46, i = 700, loss = -0.03380940482020378,
epoch = 46, i = 800, loss = 0.01452363096177578,
epoch = 46, i = 900, loss = 0.014688420109450817,
epoch = 46, i = 1000, loss = -0.009571176022291183,
epoch = 46, i = 1100, loss = -0.006789581850171089,
epoch = 46, i = 1200, loss = 0.008875001221895218,
epoch = 46, i = 1300, loss = -0.23495063185691833,
evaluating model on dev and test...
dev_acc: 0.5169244408607483, dev_rmse: 2.704772710800171
test_acc: 0.5410199761390686, test_rmse: 2.4

epoch = 55, i = 100, loss = 0.009099395014345646,
epoch = 55, i = 200, loss = -0.05992277339100838,
epoch = 55, i = 300, loss = -0.010174421593546867,
epoch = 55, i = 400, loss = -0.050700441002845764,
epoch = 55, i = 500, loss = 0.013704631477594376,
epoch = 55, i = 600, loss = -0.006942407228052616,
epoch = 55, i = 700, loss = -0.059548668563365936,
epoch = 55, i = 800, loss = -0.033603161573410034,
epoch = 55, i = 900, loss = -0.034967973828315735,
epoch = 55, i = 1000, loss = -0.03458702191710472,
epoch = 55, i = 1100, loss = -0.04478764906525612,
epoch = 55, i = 1200, loss = -0.011622450314462185,
epoch = 55, i = 1300, loss = 0.026484396308660507,
evaluating model on dev and test...
dev_acc: 0.5161454081535339, dev_rmse: 2.6753721237182617
test_acc: 0.5260800123214722, test_rmse: 2.4108920097351074
learning rate is 0.0002848006012385796
epoch = 56, i = 0, loss = -0.04375969246029854,
epoch = 56, i = 100, loss = 0.011039145290851593,
epoch = 56, i = 200, loss = -0.07799997180700302

epoch = 64, i = 300, loss = -0.027368921786546707,
epoch = 64, i = 400, loss = -0.01685374602675438,
epoch = 64, i = 500, loss = -0.028087999671697617,
epoch = 64, i = 600, loss = -0.027199700474739075,
epoch = 64, i = 700, loss = 0.011099103838205338,
epoch = 64, i = 800, loss = -0.026282398030161858,
epoch = 64, i = 900, loss = -0.4488343894481659,
epoch = 64, i = 1000, loss = -0.007777409628033638,
epoch = 64, i = 1100, loss = -0.008589147590100765,
epoch = 64, i = 1200, loss = -0.031420934945344925,
epoch = 64, i = 1300, loss = 0.002057483419775963,
evaluating model on dev and test...
dev_acc: 0.5200011134147644, dev_rmse: 2.675487995147705
test_acc: 0.5359200239181519, test_rmse: 2.394117832183838
learning rate is 0.0002601702613251532
epoch = 65, i = 0, loss = -0.014009395614266396,
epoch = 65, i = 100, loss = -0.24934141337871552,
epoch = 65, i = 200, loss = -0.006910418160259724,
epoch = 65, i = 300, loss = -0.007337145507335663,
epoch = 65, i = 400, loss = -0.03581885993480682

epoch = 73, i = 500, loss = -0.028814932331442833,
epoch = 73, i = 600, loss = -0.03189212828874588,
epoch = 73, i = 700, loss = 0.00367872416973114,
epoch = 73, i = 800, loss = -0.06306760013103485,
epoch = 73, i = 900, loss = -0.04779515415430069,
epoch = 73, i = 1000, loss = -0.012761248275637627,
epoch = 73, i = 1100, loss = -0.02153024822473526,
epoch = 73, i = 1200, loss = -0.01417416799813509,
epoch = 73, i = 1300, loss = -0.0549839586019516,
evaluating model on dev and test...
dev_acc: 0.524246335029602, dev_rmse: 2.6686112880706787
test_acc: 0.5371999740600586, test_rmse: 2.4016244411468506
learning rate is 0.00023767002100285347
epoch = 74, i = 0, loss = -0.029988430440425873,
epoch = 74, i = 100, loss = 0.10686279088258743,
epoch = 74, i = 200, loss = 0.014888769946992397,
epoch = 74, i = 300, loss = -0.048424240201711655,
epoch = 74, i = 400, loss = -0.03902087360620499,
epoch = 74, i = 500, loss = -0.013820216991007328,
epoch = 74, i = 600, loss = 0.01768476329743862,
epoc

epoch = 82, i = 700, loss = -0.02372567355632782,
epoch = 82, i = 800, loss = -0.012504462152719498,
epoch = 82, i = 900, loss = -0.02701612189412117,
epoch = 82, i = 1000, loss = -0.05833015590906143,
epoch = 82, i = 1100, loss = -0.018416687846183777,
epoch = 82, i = 1200, loss = -0.02954839915037155,
epoch = 82, i = 1300, loss = 0.006083131767809391,
evaluating model on dev and test...
dev_acc: 0.5237326622009277, dev_rmse: 2.669182300567627
test_acc: 0.5386599898338318, test_rmse: 2.3905229568481445
learning rate is 0.00021711566339590582
epoch = 83, i = 0, loss = -0.41653338074684143,
epoch = 83, i = 100, loss = 0.05304264649748802,
epoch = 83, i = 200, loss = -0.01111314445734024,
epoch = 83, i = 300, loss = -0.02552861161530018,
epoch = 83, i = 400, loss = -0.03654424846172333,
epoch = 83, i = 500, loss = -0.018792927265167236,
epoch = 83, i = 600, loss = -0.03309491649270058,
epoch = 83, i = 700, loss = -0.0012835664674639702,
epoch = 83, i = 800, loss = 0.09439542889595032,
ep

epoch = 91, i = 900, loss = -0.01763564720749855,
epoch = 91, i = 1000, loss = -0.05648138374090195,
epoch = 91, i = 1100, loss = 0.009074229747056961,
epoch = 91, i = 1200, loss = -0.05065380036830902,
epoch = 91, i = 1300, loss = -0.040862731635570526,
evaluating model on dev and test...
dev_acc: 0.5251213908195496, dev_rmse: 2.6785037517547607
test_acc: 0.5414999723434448, test_rmse: 2.406200408935547
learning rate is 0.00019833890321101254
epoch = 92, i = 0, loss = -0.03714611381292343,
epoch = 92, i = 100, loss = 0.00032517407089471817,
epoch = 92, i = 200, loss = 0.010322603397071362,
epoch = 92, i = 300, loss = -0.15317194163799286,
epoch = 92, i = 400, loss = 0.02329959347844124,
epoch = 92, i = 500, loss = -0.036776598542928696,
epoch = 92, i = 600, loss = -0.006171189248561859,
epoch = 92, i = 700, loss = -0.016369706019759178,
epoch = 92, i = 800, loss = -0.03490867838263512,
epoch = 92, i = 900, loss = 0.0009636087343096733,
epoch = 92, i = 1000, loss = 0.012013890780508518

In [70]:
model.eval()
dev_acc, dev_rmse = evaluate(model, hmqa_dev_loader)
print("dev_acc: {}, dev_rmse: {}".format(dev_acc, dev_rmse))
test_acc, test_rmse = evaluate(model, hmqa_test_loader)
print("test_acc: {}, test_rmse: {}".format(test_acc, test_rmse))
model.train()
None

dev_acc: 0.5313029289245605, dev_rmse: 2.665912628173828
test_acc: 0.541100025177002, test_rmse: 2.416609287261963


In [71]:
train_acc, train_rmse = evaluate(model, hmqa_train_loader)
train_acc, train_rmse

(tensor(0.7304), tensor(1.2511))

In [166]:
torch.save(model.state_dict(), "./saved_models/hope-epoch-99-acc")

In [156]:
len(test_accs), len(dev_accs)

(100, 100)

In [157]:
top_dev_accs = sorted(zip(dev_accs, test_accs, test_rmses), reverse=True)
top_dev_accs

[(tensor(0.5274), tensor(0.5444), tensor(2.4025)),
 (tensor(0.5271), tensor(0.5389), tensor(2.4144)),
 (tensor(0.5252), tensor(0.5374), tensor(2.3979)),
 (tensor(0.5251), tensor(0.5415), tensor(2.4062)),
 (tensor(0.5249), tensor(0.5384), tensor(2.4131)),
 (tensor(0.5245), tensor(0.5455), tensor(2.4065)),
 (tensor(0.5245), tensor(0.5361), tensor(2.3854)),
 (tensor(0.5245), tensor(0.5396), tensor(2.4072)),
 (tensor(0.5244), tensor(0.5438), tensor(2.4046)),
 (tensor(0.5242), tensor(0.5387), tensor(2.4005)),
 (tensor(0.5242), tensor(0.5372), tensor(2.4016)),
 (tensor(0.5242), tensor(0.5424), tensor(2.3942)),
 (tensor(0.5241), tensor(0.5408), tensor(2.3845)),
 (tensor(0.5239), tensor(0.5345), tensor(2.3803)),
 (tensor(0.5239), tensor(0.5428), tensor(2.4321)),
 (tensor(0.5239), tensor(0.5452), tensor(2.3937)),
 (tensor(0.5239), tensor(0.5425), tensor(2.4486)),
 (tensor(0.5237), tensor(0.5387), tensor(2.3905)),
 (tensor(0.5237), tensor(0.5425), tensor(2.4173)),
 (tensor(0.5237), tensor(0.5443

In [None]:
best_dev_acc, corr_test_acc, corr_test_rmse = top_dev_accs[0]
print("The best dev accuracy is {}. The corresponding test accuracy and test RMSE are {} and {} respectively".format(
    best_dev_acc, corr_test_acc, corr_test_rmse
))