In [2]:
import h5py
from torch.autograd import Variable
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.utils.data
import torchvision.models as models
from tqdm import tqdm

import config
import data
import utils
import resnet as caffe_resnet


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.model = caffe_resnet.resnet18(pretrained=False)

        def save_output(module, input, output):
            self.buffer = output
        self.model.layer4.register_forward_hook(save_output)

    def forward(self, x):
        self.model(x)
        return self.buffer


def create_vqa_loader(*paths):
    transform = utils.get_transform(config.image_size, config.central_fraction)
    datasets = [data.VSQImages(path, transform=transform) for path in paths]
    dataset = data.Composite(*datasets)
    data_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=config.preprocess_batch_size,
        num_workers=config.data_workers,
        shuffle=False,
        pin_memory=True,
    )
    return data_loader


def main():
    cudnn.benchmark = True

    net = Net().cuda()
    net.eval()

    loader = create_vqa_loader(config.train_path, config.val_path)
    features_shape = (
        len(loader.dataset),
        config.output_features,
        config.output_size,
        config.output_size
    )

    with h5py.File(config.preprocessed_path, libver='latest') as fd:
        features = fd.create_dataset('features', shape=features_shape, dtype='float16')
        vsq_ids = fd.create_dataset('ids', shape=(len(loader.dataset),), dtype='int32')

        i = j = 0
        for ids, imgs in tqdm(loader):
            imgs = Variable(imgs).cuda(device=None, non_blocking=True)
            out = net(imgs)

            j = i + imgs.size(0)
            features[i:j, :, :] = out.data.cpu().numpy().astype('float16')
            vsq_ids[i:j] = ids.numpy().astype('int32')
            i = j


if __name__ == '__main__':
    main()


100%|██████████| 3963/3963 [15:51<00:00,  4.17it/s]


In [3]:
import json_access 
from collections import Counter
import itertools
from data import *
import random
import os
import json
import config

#adapted from: https://github.com/Cyanogenoid/pytorch-vqa/blob/master/preprocess-vocab.py

#split = 'train'
#annFile='Annotations/%s.json'%(split)
#imgDir = 'train'

# initialize VQA api for QA annotations
#vqa=json_access.VQA(annFile)

#imgs = vqa.getImgs()
#anns = vqa.getAnns(imgs=imgs)

def extract_vocab(iterable, top_k=None, start=0):
    """ Turns an iterable of list of tokens into a vocabulary.
        These tokens could be single answers or word tokens in questions.
    """
    all_tokens = itertools.chain.from_iterable(iterable)
    counter = Counter(all_tokens)
    if top_k:
        most_common = counter.most_common(top_k)
        most_common = (t for t, c in most_common)
    else:
        most_common = counter.keys()
    # descending in count, then lexicographical order
    tokens = sorted(most_common, key=lambda x: (counter[x], x), reverse=True)
    vocab = {t: i for i, t in enumerate(tokens, start=start)}
    return vocab

def main():
    questions = utils.path_for(train=True, question=True)
    answers = utils.path_for(train=True, answer=True)

    with open(questions, 'r') as fd:
        questions = json.load(fd)
    with open(answers, 'r') as fd:
        answers = json.load(fd)

    questions = list(prepare_questions(questions))
    answers = list(prepare_answers(answers))
    question_vocab = extract_vocab(questions, start=1)
    answer_vocab = extract_vocab(answers, top_k=config.max_answers) #what should top_k be here?
    vocabs = {
        'question': question_vocab,
        'answer': answer_vocab,
    }
    with open(config.vocabulary_path, 'w') as fd:
        json.dump(vocabs, fd)
    
    #v = list(encode_answers(a, answer_vocab) for a in answers)
    #print(v)

if __name__ == '__main__':
    main()








In [2]:
import sys
import os.path
import math
import json

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
from tqdm import tqdm

import config
import data
import modelNoAttention
import utils

import h5py


def update_learning_rate(optimizer, iteration):
    lr = config.initial_lr * 0.5**(float(iteration) / config.lr_halflife)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


total_iterations = 0


def run(net, loader, optimizer, tracker, train=False, prefix='', epoch=0):
    """ Run an epoch over the given loader """
    if train:
        net.train()
        tracker_class, tracker_params = tracker.MovingMeanMonitor, {'momentum': 0.99}
    else:
        net.eval()
        tracker_class, tracker_params = tracker.MeanMonitor, {}
        answ = []
        idxs = []
        accs = []

    tq = tqdm(loader, desc='{} E{:03d}'.format(prefix, epoch), ncols=0)
    loss_tracker = tracker.track('{}_loss'.format(prefix), tracker_class(**tracker_params))
    acc_tracker = tracker.track('{}_acc'.format(prefix), tracker_class(**tracker_params))

    log_softmax = nn.LogSoftmax().cuda()
    for v, q, a, idx, q_len in tq:
        requires_grad = False;
        v = Variable(v, requires_grad)
        q = Variable(q, requires_grad)
        a = Variable(a, requires_grad)
        q_len = Variable(q_len, requires_grad)

        v = v.cuda()
        q = q.cuda()
        a = a.cuda()
        q_len = q_len.cuda()

        out = net(v, q, q_len)
        nll = -log_softmax(out)
        loss = (nll * a / 10).sum(dim=1).mean()
        acc = utils.batch_accuracy(out.data, a.data).cpu()

        if train:
            global total_iterations
            update_learning_rate(optimizer, total_iterations)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_iterations += 1
        else:
            # store information about evaluation of this minibatch
            _, answer = out.data.cpu().max(dim=1)
            answ.append(answer.view(-1))
            accs.append(acc.view(-1))
            idxs.append(idx.view(-1).clone())

        loss_tracker.append(loss.data.item())
        # acc_tracker.append(acc.mean())
        for a in acc:
            acc_tracker.append(a.item())
        fmt = '{:.4f}'.format
        tq.set_postfix(loss=fmt(loss_tracker.mean.value), acc=fmt(acc_tracker.mean.value))

    if not train:
        answ = list(torch.cat(answ, dim=0))
        accs = list(torch.cat(accs, dim=0))
        idxs = list(torch.cat(idxs, dim=0))
        return answ, accs, idxs


def main():
    from datetime import datetime

    # this has been changed to run jupyter
    #
    # non jupyter ##############################################################
    if len(sys.argv) > 1:
        name = ' '.join(sys.argv[1:])
    else:
        name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    ############################################################################


    # remove line below if not running on jupyter
    name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

    target_name = os.path.join('logs', '{}.pth'.format(name))
    print('will save to {}'.format(target_name))

    cudnn.benchmark = True

    train_loader = data.get_loader(train=True)
    val_loader = data.get_loader(val=True)

    net = nn.DataParallel(models.Net(train_loader.dataset.num_tokens)).cuda() #change made here
    optimizer = optim.Adam([p for p in net.parameters() if p.requires_grad])

    tracker = utils.Tracker()
    config_as_dict = {k: v for k, v in vars(config).items() if not k.startswith('__')}

    for i in range(config.epochs):
        _ = run(net, train_loader, optimizer, tracker, train=True, prefix='train', epoch=i)
        r = run(net, val_loader, optimizer, tracker, train=False, prefix='val', epoch=i)

        results = {
            'name': name,
            'tracker': tracker.to_dict(),
            'config': config_as_dict,
            'weights': net.state_dict(),
            'eval': {
                'answers': r[0],
                'accuracies': r[1],
                'idx': r[2],
            },
            'vocab': train_loader.dataset.vocab,
        }
        torch.save(results, target_name)


if __name__ == '__main__':
    main()


will save to logs/2020-12-12_21-40-23.pth


train E000: 100% 156/156 [00:44<00:00,  3.52it/s, acc=0.3275, loss=4.2500]
val E000: 100% 34/34 [00:09<00:00,  3.55it/s, acc=0.3635, loss=2.8808]
train E001: 100% 156/156 [00:44<00:00,  3.50it/s, acc=0.3542, loss=3.5857]
val E001: 100% 34/34 [00:09<00:00,  3.53it/s, acc=0.3803, loss=2.7769]
train E002: 100% 156/156 [00:45<00:00,  3.43it/s, acc=0.4161, loss=3.4322]
val E002: 100% 34/34 [00:09<00:00,  3.57it/s, acc=0.3656, loss=2.7215]
train E003: 100% 156/156 [00:45<00:00,  3.44it/s, acc=0.2874, loss=3.4230]
val E003: 100% 34/34 [00:10<00:00,  3.37it/s, acc=0.3407, loss=2.7068]
train E004: 100% 156/156 [00:47<00:00,  3.30it/s, acc=0.3575, loss=3.2042]
val E004: 100% 34/34 [00:09<00:00,  3.51it/s, acc=0.3432, loss=2.6893]
train E005: 100% 156/156 [00:45<00:00,  3.44it/s, acc=0.2804, loss=3.1921]
val E005: 100% 34/34 [00:09<00:00,  3.57it/s, acc=0.3331, loss=2.6408]
train E006: 100% 156/156 [00:44<00:00,  3.53it/s, acc=0.3473, loss=3.1501]
val E006: 100% 34/34 [00:09<00:00,  3.54it/s, acc

### 