In [1]:
import sys, os, re, json, time

import pandas as pd
import pickle
import h5py

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import plotting
from PIL import Image
from tqdm import tqdm
from utils import imread, img_data_2_mini_batch, imgs2batch

from sklearn import metrics

# from naive import Enc, Dec, EncDec
from attention import Enc, Dec, EncDec
# from rnn_att import EncDec

from data_loader import VQADataSet

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
from torchvision import transforms

%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
N = 2000
dataset_filename = "./data/data_{}.pkl".format(N)
dataset = None
print(dataset_filename)
if (os.path.exists(dataset_filename)):
    with open(dataset_filename, 'rb') as handle:
        print("reading from " + dataset_filename)
        dataset = pickle.load(handle)
else:
    dataset = VQADataSet(Q=N)
    with open(dataset_filename, 'wb') as handle:
        print("writing to " + dataset_filename)
        pickle.dump(dataset, handle)

assert(dataset is not None)
def debug(v,q,a):
    print('\nV: {}\nQ: {}\nA: {}'.format(v.shape, q.shape, a.shape))


./data/data_2000.pkl
reading from ./data/data_2000.pkl


In [3]:
embed_size        = 300
hidden_size       = 512
batch_size        = 50
ques_vocab_size   = len(dataset.vocab['question'])
c                 = len(dataset.vocab['answer'])
num_layers        = 1
n_epochs          = 30
learning_rate     = 0.01
momentum          = 0.98
attention_size    = 512
debug             = False


print(ques_vocab_size, ans_vocab_size)

1469 1282


In [4]:
def eval_model(data_loader, model, criterion, optimizer, batch_size, training=False,
              total_loss_over_epochs=[], scores_over_epochs=[]):
    running_loss = 0.
    final_labels, final_preds = [], []
    if data_loader is None:
        return
    
    if training:
        model.train()
    else:
        model.eval()
    
    for i, minibatch in enumerate(data_loader):
        # extract minibatch
        t0 = time.time()
        idxs, v, q, a, q_len = minibatch
        
        # convert torch's DataLoader output to proper format.
        # torch gives a List[Tensor_1, ... ] where tensor has been transposed. 
        # batchify transposes back.`
        v = v.to(device)
        q = VQADataSet.batchify_questions(q).to(device)
        a = a.to(device)

        logits = model(v, q, q_len)
        preds = torch.argmax(logits, dim=1)

#         loss = criterion(logits, a)
        loss = F.nll_loss(logits, a)
        running_loss += loss.item()
        
        score = metrics.precision_recall_fscore_support(preds.tolist(),
                                                        a.tolist(),
                                                        average='weighted')
        
        total_loss_over_epochs['train_loss'].append(loss)
        scores_over_epochs['train_scores'].append(score)
        
        if training and optimizer is not None:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        final_labels += a.tolist()
        final_preds  += preds.tolist()
        if True:#(i%20==0):
#             plotting.plot_score_over_n_epochs(scores_over_epochs, score_type='precision', fig_size=(7,3))
#             plotting.plot_loss_over_n_epochs(total_loss_over_epochs, fig_size=(7, 3), title="Loss")
            print("Loss: {} - score: {} - t: {}".format(loss, score, time.time()-t0))
            
    return running_loss, final_labels, final_preds

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = EncDec(embed_size, 
               hidden_size, 
               ques_vocab_size, 
               ans_vocab_size, 
               rnn_layers,
               prefix_n=1).to(device)

criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.get_parameters(), lr=learning_rate, momentum=momentum)
optimizer = torch.optim.Adam(model.get_parameters(), lr=learning_rate)


In [8]:

train_loader = dataset.build_data_loader(train=True, args={'batch_size': batch_size})
test_loader  = dataset.build_data_loader(test=True, args={'batch_size': batch_size})

best_score = 0

train_all_loss, train_all_labels, train_all_preds = [], [], []

total_loss_over_epochs, scores_over_epochs = plotting.get_empty_stat_over_n_epoch_dictionaries()

for epoch in tqdm(range(n_epochs)):
    t0= time.time()
    tr_loss, tr_labels, tr_preds = eval_model(data_loader = train_loader,
                                     model       = model,
                                     criterion   = criterion,
                                     optimizer   = optimizer,
                                     batch_size  = batch_size,
                                     training    = True,
                                     total_loss_over_epochs = total_loss_over_epochs,
                                     scores_over_epochs     = scores_over_epochs)
    
#     train_scores = metrics.precision_recall_fscore_support(tr_labels,
#                                                            tr_preds,
#                                                            average='weighted')
    
#     total_loss_over_epochs['train_loss'].append(tr_loss)
#     scores_over_epochs['train_scores'].append(train_scores)
    
#     if True:# or epoch%1 == 0:
#         print("#==#"*5 + "epoch: {}".format(epoch) + "#==#"*5)
#         print("time: {}".format(time.time()-t0))
#         print(train_scores)
#     plotting.plot_score_over_n_epochs(scores_over_epochs, score_type='precision', fig_size=(8,5))
#     plotting.plot_loss_over_n_epochs(total_loss_over_epochs, fig_size=(8, 5), title="Loss")
    
    
    
    

  0%|          | 0/25 [00:00<?, ?it/s]

batch_size: 5 shuffle: True
batch_size: 5 shuffle: False
torch.Size([12, 5, 16])
output shape torch.Size([5, 48])





RuntimeError: size mismatch, m1: [5 x 48], m2: [32 x 1282] at /Users/distiller/project/conda/conda-bld/pytorch_1556653492823/work/aten/src/TH/generic/THTensorMath.cpp:961

In [10]:


print(model.decoder.linear)

Linear(in_features=32, out_features=1282, bias=True)


In [None]:
a = torch.randn(5,16)
b = torch.randn(5,16)
torch.cat((a,b), dim=1).shape