In [2]:
from __future__ import print_function

import argparse
import os
import sys
import json
import numpy as np
import re
import pickle
import utils
import torch
import pickle
import pprint
import tqdm
from utils.types import PathT
# from torch.utils.data import Dataset
import torch.utils.data as data
from typing import Any, Tuple, Dict, List
import torchvision.transforms as transforms
from PIL import Image

In [9]:
class CocoImages(data.Dataset):
    """ Dataset for MSCOCO images located in a folder on the filesystem """
    def __init__(self, path, transform=None):
        super(CocoImages, self).__init__()
        self.path = path
        self.id_to_filename = self._find_images()
        self.sorted_ids = sorted(self.id_to_filename.keys())  # used for deterministic iteration order
        print('found {} images in {}'.format(len(self), self.path))
        self.transform = transform

    def _find_images(self):
        id_to_filename = {}
        for filename in os.listdir(self.path):
            if not filename.endswith('.jpg'):
                continue
            id_and_extension = filename.split('_')[-1]
            id = int(id_and_extension.split('.')[0])
            id_to_filename[id] = filename
        return id_to_filename

    def __getitem__(self, item):
        id = self.sorted_ids[item]
        path = os.path.join(self.path, self.id_to_filename[id])
        img = Image.open(path).convert('RGB')

        if self.transform is not None:
            img = self.transform(img)
        return id, img

    def __len__(self):
        return len(self.sorted_ids)

In [10]:
class Composite(data.Dataset):
    """ Dataset that is a composite of several Dataset objects. Useful for combining splits of a dataset. """
    def __init__(self, datasets):
        self.datasets = datasets

    def __getitem__(self, item):
        current = self.datasets
        for d in self.datasets:
            if item < len(d):
                return d[item]
            item -= len(d)
        else:
            raise IndexError('Index too large for composite dataset')

    def __len__(self):
        return sum(map(len, self.datasets))


In [11]:
def get_transform(target_size, central_fraction=1.0):
    return transforms.Compose([
        transforms.Scale(int(target_size / central_fraction)),
        transforms.CenterCrop(target_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])

In [12]:
transform = get_transform(224, 0.875)
paths = ["data/train2014", "data/val2014"]
# temp = CocoImages("data/train2014", transform=transform)
train_dataset = [CocoImages("data/train2014", transform=transform)]
val_dataset = [CocoImages("data/val2014", transform=transform)]
# train_dataset = Composite(train_dataset)
val_dataset = Composite(val_dataset)
# train_data_loader = torch.utils.data.DataLoader(
#     train_dataset,
#     batch_size=16,
#     num_workers=8,
#     shuffle=False,
#     pin_memory=True,
# )
                          
# val_data_loader = torch.utils.data.DataLoader(
#     val_dataset,
#     batch_size=16,
#     num_workers=0,
#     shuffle=False,
#     pin_memory=True,
# )



found 82783 images in data/train2014
found 40504 images in data/val2014


In [None]:
train_dataset

In [None]:
train_dataset[0]

In [None]:
for i, image in enumerate(train_dataset):
    print("i[0] = ", i)
    print("image = ", image)
    print("image[0] = ", image[0])
    if i == 1:
        raise


In [None]:
path = "data/v2_OpenEnded_mscoco_train2014_questions.json"
with open(path, 'r') as q:
    pprint.pprint(json.load(q))
pprint.pprint("")

In [5]:
def prepare_questions(questions_json):
    """ Tokenize and normalize questions from a given question json in the usual VQA format. """
    questions = [q['question'] for q in questions_json['questions']]
    for question in questions:
        question = question.lower()[:-1]
        yield question.split(' ')
        
def _encode_question(question, token_to_index):
    """ Turn a question into a vector of indices and a question length """
    vec = torch.zeros(max_question_length(question)).long()
    for i, token in enumerate(question):
        index = token_to_index.get(token, 0)
        vec[i] = index
    return vec, len(question)

def max_question_length(self):
    if not hasattr(self, '_max_length'):
        _max_length = max(map(len, questions))
    return _max_length

def questions_to_dict(questions, questions_json):
    question_dict = {}
    for i in range(len(questions_json['questions'])):
        question_dict[questions_json['questions'][i]['question_id']] = questions[i] 
    return (question_dict)

In [23]:
with open("data/v2_OpenEnded_mscoco_train2014_questions.json", 'r') as fd:
    questions_json = json.load(fd)
with open("data/cache/question_vocab", 'r') as fd:
    vocab_json = json.load(fd)

In [9]:
vocab = vocab_json
token_to_index = vocab

In [29]:
questions_json['questions'][0]['question_id']
# {'image_id': 458752,
#    'question': 'What position is this man playing?',
#    'question_id': 458752001},

{'image_id': 458752,
 'question': 'What is this photo taken looking through?',
 'question_id': 458752000}

In [20]:
questions = list(prepare_questions(questions_json))

In [17]:
questions = [_encode_question(questions[0], token_to_index)]

In [None]:
questions = [_encode_question(q, token_to_index) for q in tqdm.tqdm(questions)]

In [18]:
questions

[(tensor([  3,   2,   5,  27,  69,  98, 471,   0,   0,   0,   0,   0,   0,   0,
            0,   0,   0,   0,   0,   0,   0,   0,   0]), 7)]

In [30]:
def questions_to_dict(questions, questions_json):
    question_dict = {}
    for i in range(len(questions_json['questions'])):
        question_dict[questions_json['questions'][i]['question_id']] = questions[i]
    return (question_dict)

temp = questions_to_dict(questions, questions_json)     


In [None]:
temp

In [6]:
with open("data/questions_dict_train", 'rb') as handle:
    questions_dict = pickle.load(handle)

In [None]:
questions_dict

In [20]:
with open("data/question_id_to_image_id_train", 'r') as fd:
    question_id_to_image_id = json.load(fd)

In [21]:
question_id_to_image_id['458752000']

458752