In [1]:
import sys, os, re, json

import pandas as pd
import pickle
import h5py

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image
from utils import imread, img_data_2_mini_batch, imgs2batch

from sklearn import metrics

from naive import Enc, Dec
from data_loader import VQADataSet

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
from torchvision import transforms

%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
N = 2000
dataset_filename = "./data_{}.pkl".format(N)
dataset = None
print(dataset_filename)
if (os.path.exists(dataset_filename)):
    with open(dataset_filename, 'rb') as handle:
        print("reading from " + dataset_filename)
        dataset = pickle.load(handle)
else:
    dataset = VQADataSet(Q=N)
    with open(dataset_filename, 'wb') as handle:
        print("writing to " + dataset_filename)
        pickle.dump(dataset, handle)

assert(dataset is not None)

./data_2000.pkl
reading from ./data_2000.pkl


In [3]:
embed_size        = 16
hidden_size       = 16
batch_size        = 5
ques_vocab_size   = len(dataset.vocab['question'])
ans_vocab_size    = len(dataset.vocab['answer'])
rnn_layers        = 1
n_epochs          = 1


In [4]:

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

encoder = Enc(embed_size).to(device)
decoder = Dec(embed_size, hidden_size, ques_vocab_size, ans_vocab_size, rnn_layers).to(device)

criterion = nn.CrossEntropyLoss()
params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters())
optimizer = torch.optim.Adam(params, lr=0.001)

# print(encoder)
# print(decoder)

In [31]:

def eval_model(data_loader=None, batch_size=batch_size, epoch=1):
    if data_loader is None:
        return
    for i, minibatch in enumerate(data_loader):
        # extract minibatch
        idxs, v, q, a, q_len = minibatch
        
        # convert torch's DataLoader output to proper format.
        # torch gives a List[Tensor_1, ... ] where tensor has been transposed. 
        # batchify transposes back.
        v = v.to(device)
        q = VQADataSet.batchify_questions(q).to(device)
        a = a.to(device)
        print("")
        print('V: ', v.shape)
        print('Q: ', q.shape)
        print('A: ', a.shape)

        img_features = encoder(v)
        print("img_features", img_features.shape)

        pred = decoder(img_features, q, q_len)
        print("pred", pred.shape)

        loss = criterion(pred, a)          

        print('epoch:', epoch, '#', i, 'loss:', loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
    



In [32]:
train_loader = dataset.build_data_loader(train=True, args={'batch_size': batch_size})

eval_model(data_loader = train_loader, batch_size = batch_size)

batch_size: 5 shuffle: True

V:  torch.Size([5, 3, 224, 224])
Q:  torch.Size([5, 9])
A:  torch.Size([5])
img_features torch.Size([5, 16])
pred torch.Size([5, 1282])
epoch: 1 # 0 loss: 7.051229000091553

V:  torch.Size([5, 3, 224, 224])
Q:  torch.Size([5, 12])
A:  torch.Size([5])
img_features torch.Size([5, 16])
pred torch.Size([5, 1282])
epoch: 1 # 1 loss: 7.2205657958984375

V:  torch.Size([5, 3, 224, 224])
Q:  torch.Size([5, 11])
A:  torch.Size([5])
img_features torch.Size([5, 16])
pred torch.Size([5, 1282])
epoch: 1 # 2 loss: 7.101626396179199

V:  torch.Size([5, 3, 224, 224])
Q:  torch.Size([5, 11])
A:  torch.Size([5])
img_features torch.Size([5, 16])
pred torch.Size([5, 1282])
epoch: 1 # 3 loss: 7.070573329925537

V:  torch.Size([5, 3, 224, 224])
Q:  torch.Size([5, 11])
A:  torch.Size([5])
img_features torch.Size([5, 16])
pred torch.Size([5, 1282])
epoch: 1 # 4 loss: 7.1009840965271

V:  torch.Size([5, 3, 224, 224])
Q:  torch.Size([5, 13])
A:  torch.Size([5])
img_features torch.Si

KeyboardInterrupt: 