In [1]:
import torch
import torchvision
print(f'Torch version: {torch.__version__}')

Torch version: 0.4.0


In [2]:
import numpy as np
from torchvision import transforms

In [3]:
import h5py
import tqdm

In [4]:
import os
import re
from PIL import Image

# Check h5 file format

In [6]:
import sys
sys.path.append('../visdial-challenge-starter-pytorch/')
sys.path.append('../visdial-challenge-starter-pytorch/visdialch/')

In [7]:
from data.readers import ImageFeaturesHdfReader

In [7]:
val_reader = ImageFeaturesHdfReader(
    '../visdial-challenge-starter-pytorch/data/features_faster_rcnn_x101_val.h5')
test_reader = ImageFeaturesHdfReader(
    '../visdial-challenge-starter-pytorch/data/features_faster_rcnn_x101_test.h5')
train_reader = ImageFeaturesHdfReader(
    '../visdial-challenge-starter-pytorch/data/features_faster_rcnn_x101_train.h5')

In [8]:
print(f'Train has {len(train_reader)} samples')
print(f'Valid has {len(val_reader)} samples')
print(f'Test has {len(test_reader)} samples')

Train has 123287 samples
Valid has 2064 samples
Test has 8000 samples


In [9]:
len(set([id for reader in [train_reader, val_reader, test_reader] for id in reader.image_id_list ]))

133351

### VGG features

In [31]:
import h5py
import os

In [84]:
file = h5py.File('../visdial-challenge-starter-pytorch/data/data_img_vgg16_pool5.h5','r')

In [85]:
print(file['images_train'].shape, len(os.listdir('../visdial-challenge-starter-pytorch/data/images/train2014/')))
print(file['images_val'].shape, len(os.listdir('../visdial-challenge-starter-pytorch/data/images/val2014/')))
print(file['images_test'].shape, len(os.listdir('../visdial-challenge-starter-pytorch/data/images/test2014/')))
print(file['images_test'].shape, len(os.listdir('../visdial-challenge-starter-pytorch/data/images/VisualDialog_test2018/')))

(82783, 512, 14, 14) 82783
(40504, 512, 14, 14) 40504
(8000, 512, 14, 14) 40775
(8000, 512, 14, 14) 8000


In [96]:
82783+40504

123287

In [95]:
len(train_reader)

123287

In [None]:
print(file['images_train'].shape, 
      len(os.listdir('../visdial-challenge-starter-pytorch/data/images/train2014/')),
      len(os.listdir('../visdial-challenge-starter-pytorch/data/images/val2014/'))
     )
print(file['images_test'].shape, len(os.listdir('../visdial-challenge-starter-pytorch/data/images/VisualDialog_test2018/')))

In [83]:
(
    len(os.listdir('../visdial-challenge-starter-pytorch/data/images/train2014/'))+
    len(os.listdir('../visdial-challenge-starter-pytorch/data/images/val2014/'))+
    len(os.listdir('../visdial-challenge-starter-pytorch/data/images/VisualDialog_val2018/'))
)

125351

In [65]:
list(file.keys())

['images_test', 'images_train']

In [7]:
reader.__dict__.keys()

dict_keys(['features_hdfpath', '_in_memory', '_split', 'image_id_list', 'features'])

In [10]:
reader[reader.keys()[0]].shape

(4096,)

# Load images

In [10]:
splits = (
        'train2014',
        'test2014',
        'val2014'
        'VisualDialog_test2018',
        'VisualDialog_val2018',
)

In [11]:
available_image_ids = {
    float(re.findall(r'([\d]+)\.jpg', f)[0])
    :
    os.path.join(f'../visdial-challenge-starter-pytorch/data/images/{split}/{f}')
    for split in (
        'train2014',
        'test2014',
        'val2014',
        'VisualDialog_test2018',
        'VisualDialog_val2018',
    )
    for f in os.listdir(f'../visdial-challenge-starter-pytorch/data/images/{split}/')
    }
len(available_image_ids)

174126

In [12]:
img = Image.open(available_image_ids[list(available_image_ids.keys())[40]])

In [13]:
image_ids_with_features = [
    key
    for reader in [
        train_reader, 
        #test_reader,
        #val_reader,
    ]
    for key in reader.keys()
]
len(image_ids_with_features)

123287

# Load Model

In [14]:
checkpoint_path = '../visdial-challenge-starter-pytorch/data/BEST_checkpoint_coco_5_cap_per_img_5_min_word_freq.pth.tar'

In [15]:
model = torch.load(checkpoint_path)



In [16]:
img_encoder = model['encoder']

In [17]:
device = torch.device('cuda')

In [18]:
transform = transforms.Compose([
    transforms.Resize((256, 256)), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

In [19]:
def get_image_features(img_id):
    img_path = available_image_ids.get(img_id)
    image = Image.open(img_path)
    
    # 1-channel to 3-channel
    if image.mode != 'RGB':
        w, h = image.size
        ima = Image.new('RGB', (w,h))
        data = zip(image.getdata(), image.getdata(), image.getdata())
        ima.putdata(list(data))
        image = ima
        
    features_tensor = img_encoder.resnet(transform(image).to(device).unsqueeze(0))
    return features_tensor.squeeze().detach().cpu().numpy()

In [20]:
def get_images_features(img_ids):
    images_tensors = list()
    for img_id in img_ids:
        img_path = available_image_ids.get(img_id)
        image = Image.open(img_path)

        # 1-channel to 3-channel
        if image.mode != 'RGB':
            w, h = image.size
            ima = Image.new('RGB', (w,h))
            data = zip(image.getdata(), image.getdata(), image.getdata())
            ima.putdata(list(data))
            image = ima
        images_tensors.append(transform(image).to(device).unsqueeze(0))
    images_tensor = torch.cat(images_tensors, dim=0)
    return img_encoder.resnet(images_tensor).squeeze().detach().cpu().numpy()

In [21]:
def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

In [None]:
with h5py.File('../visdial-challenge-starter-pytorch/data/features_thomas_train.h5', mode='w') as h5py_file:
    h5py_file.create_dataset(name='features', shape=(len(train_reader), 2048, 8, 8))
    h5py_file.create_dataset(name='image_id', shape=(len(train_reader),))
    h5py_file.attrs.create(name='split', data=np.string_('train'))
    
    ix = 0
    
    batch_size=16
    total=int(np.ceil(len(train_reader.keys())/batch_size))
    for img_ids in tqdm.tqdm(chunks(train_reader.keys(), batch_size), total=total):
        features = get_images_features(img_ids)
        for img_id, img_features in zip(img_ids, features):
            h5py_file['features'][ix] = img_features
            h5py_file['image_id'][ix] = img_id
            ix += 1

  5%|▍         | 362/7706 [01:57<39:15,  3.12it/s]

In [None]:
with h5py.File('../visdial-challenge-starter-pytorch/data/features_thomas_test.h5', mode='w') as h5py_file:
    h5py_file.create_dataset(name='features', shape=(len(test_reader), 2048, 8, 8))
    h5py_file.create_dataset(name='image_id', shape=(len(test_reader),))
    h5py_file.attrs.create(name='split', data=np.string_('test'))
    
    ix = 0
    
    batch_size=16
    total=int(np.ceil(len(test_reader.keys())/batch_size))
    for img_ids in tqdm.tqdm(chunks(test_reader.keys(), batch_size), total=total):
        features = get_images_features(img_ids)
        for img_id, img_features in zip(img_ids, features):
            h5py_file['features'][ix] = img_features
            h5py_file['image_id'][ix] = img_id
            ix += 1

In [None]:
with h5py.File('../visdial-challenge-starter-pytorch/data/features_thomas_val.h5', mode='w') as h5py_file:
    h5py_file.create_dataset(name='features', shape=(len(val_reader), 2048, 8, 8))
    h5py_file.create_dataset(name='image_id', shape=(len(val_reader),))
    h5py_file.attrs.create(name='split', data=np.string_('val'))
    
    ix = 0
    
    batch_size=16
    total=int(np.ceil(len(val_reader.keys())/batch_size))
    for img_ids in tqdm.tqdm(chunks(val_reader.keys(), batch_size), total=total):
        features = get_images_features(img_ids)
        for img_id, img_features in zip(img_ids, features):
            h5py_file['features'][ix] = img_features
            h5py_file['image_id'][ix] = img_id
            ix += 1

### Test if correct

In [8]:
reader = ImageFeaturesHdfReader('../visdial-challenge-starter-pytorch/data/features_thomas_val.h5')

In [13]:
reader.image_id_list

[18472.0,
 49132.0,
 234760.0,
 390155.0,
 62788.0,
 123577.0,
 409709.0,
 258333.0,
 105002.0,
 125960.0,
 82232.0,
 119945.0,
 300589.0,
 449230.0,
 134052.0,
 406607.0,
 63165.0,
 242822.0,
 193416.0,
 19001.0,
 315349.0,
 98551.0,
 16512.0,
 392052.0,
 351712.0,
 72038.0,
 443125.0,
 344787.0,
 83227.0,
 3587.0,
 270086.0,
 214044.0,
 313977.0,
 382166.0,
 9135.0,
 229536.0,
 464058.0,
 209447.0,
 529106.0,
 40349.0,
 269240.0,
 18807.0,
 297861.0,
 462883.0,
 559632.0,
 242888.0,
 391877.0,
 255172.0,
 451905.0,
 88541.0,
 26012.0,
 466814.0,
 80912.0,
 425740.0,
 195131.0,
 250002.0,
 53772.0,
 459587.0,
 343836.0,
 453147.0,
 17662.0,
 214573.0,
 547737.0,
 545062.0,
 175667.0,
 456824.0,
 504128.0,
 107392.0,
 427711.0,
 573045.0,
 273014.0,
 12712.0,
 572721.0,
 42157.0,
 71000.0,
 376745.0,
 384171.0,
 70221.0,
 293614.0,
 135570.0,
 153121.0,
 100334.0,
 97456.0,
 205033.0,
 579615.0,
 192134.0,
 17823.0,
 244832.0,
 305118.0,
 136775.0,
 502586.0,
 433169.0,
 70131.0,
 3130

In [18]:
tensor = reader[reader.keys()[0]]

In [19]:
tensor.shape

(2048, 8, 8)

# --

In [22]:
tensor = torch.randn(2048, 8, 8)

In [23]:
tensor.shape

torch.Size([2048, 8, 8])

In [28]:
tensor.view((-1, 64)).t().shape

torch.Size([64, 2048])