In [9]:
import sys, os, re, json

import pandas as pd
import pickle
import h5py

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import plotting
from PIL import Image
from tqdm import tqdm
from utils import imread, img_data_2_mini_batch, imgs2batch

from sklearn import metrics

from naive import Enc, Dec, EncDec
from data_loader import VQADataSet

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
from torchvision import transforms

%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [10]:
N = 2000
dataset_filename = "./data/data_{}.pkl".format(N)
dataset = None
print(dataset_filename)
if (os.path.exists(dataset_filename)):
    with open(dataset_filename, 'rb') as handle:
        print("reading from " + dataset_filename)
        dataset = pickle.load(handle)
else:
    dataset = VQADataSet(Q=N)
    with open(dataset_filename, 'wb') as handle:
        print("writing to " + dataset_filename)
        pickle.dump(dataset, handle)

assert(dataset is not None)

./data/data_2000.pkl
reading from ./data/data_2000.pkl


In [11]:
embed_size        = 16
hidden_size       = 16
batch_size        = 5
ques_vocab_size   = len(dataset.vocab['question'])
ans_vocab_size    = len(dataset.vocab['answer'])
rnn_layers        = 1
n_epochs          = 1
learning_rate     = 0.001
momentum          = 0.98


In [7]:
def eval_model(data_loader, model, criterion, optimizer, batch_size, training=False):
    running_loss = 0.0
    final_logits, final_preds = [], []
    if data_loader is None:
        return
    
    if training:
        model.eval()
    else:
        model.train()
        
    for i, minibatch in enumerate(data_loader):
        # extract minibatch
        idxs, v, q, a, q_len = minibatch
        
        # convert torch's DataLoader output to proper format.
        # torch gives a List[Tensor_1, ... ] where tensor has been transposed. 
        # batchify transposes back.`
        v = v.to(device)
        q = VQADataSet.batchify_questions(q).to(device)
        a = a.to(device)
        
#         print("")
#         print('V: ', v.shape)
#         print('Q: ', q.shape)
#         print('A: ', a.shape)

        img_features = encoder(v)
        logits = model(v, q, q_len)
        preds = np.argmax(logits, dim=1)
        
        print("pred", pred.shape)

        loss = criterion(pred, a)
        running_loss += loss.item()
        
        if training and optimizer is not None:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        final_labels += a.tolist()
        final_preds  += preds.tolist()
            
    return running_loss, final_labels, final_preds

In [12]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = EncDec(embed_size, hidden_size, ques_vocab_size, ans_vocab_size, rnn_layers).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

print("device: {}".format(device))

device: cpu


In [None]:
train_loader = dataset.build_data_loader(train=True, args={'batch_size': batch_size})
test_loader  = dataset.build_data_loader(test=True, args={'batch_size': batch_size})

loss_list = []
total_loss_over_epochs, scores_over_epochs = plotting.get_empty_stat_over_n_epoch_dictionaries()

for epoch in range(n_epochs):

    loss, labels, preds = eval_model(data_loader=train_loader,
                                     model=model,
                                     criterion=criterion,
                                     optimizer=optimizer,
                                     batch_size=batch_size,
                                     training=True)
    scores = metrics.precision_recall_fscore_support(train_label_list, train_pred_list, average='weighted')
    