In [1]:
import pandas as pd
import numpy as np
import os
import cPickle
import skimage
import cv2
import caffe

In [2]:
caption_path = './flickr30k/results_20130124.token'
flickr_image_path = './flickr30k-images/'

In [3]:
captions = pd.read_table(caption_path, sep='\t', header=None, names=['img', 'caption'])
captions.head()

Unnamed: 0,img,caption
0,1000092795.jpg#0,Two young guys with shaggy hair look at their ...
1,1000092795.jpg#1,"Two young , White males are outside near many ..."
2,1000092795.jpg#2,Two men in green shirts are standing in a yard .
3,1000092795.jpg#3,A man in a blue shirt standing in a garden .
4,1000092795.jpg#4,Two friends enjoy time spent together .


In [4]:
captions['img'], captions['img_num'] = captions['img'].str.split('#',1).str
captions.head()

Unnamed: 0,img,caption,img_num
0,1000092795.jpg,Two young guys with shaggy hair look at their ...,0
1,1000092795.jpg,"Two young , White males are outside near many ...",1
2,1000092795.jpg,Two men in green shirts are standing in a yard .,2
3,1000092795.jpg,A man in a blue shirt standing in a garden .,3
4,1000092795.jpg,Two friends enjoy time spent together .,4


In [5]:
captions['img'] = flickr_image_path + captions['img'].astype(str)
captions.head()

Unnamed: 0,img,caption,img_num
0,./flickr30k-images/1000092795.jpg,Two young guys with shaggy hair look at their ...,0
1,./flickr30k-images/1000092795.jpg,"Two young , White males are outside near many ...",1
2,./flickr30k-images/1000092795.jpg,Two men in green shirts are standing in a yard .,2
3,./flickr30k-images/1000092795.jpg,A man in a blue shirt standing in a garden .,3
4,./flickr30k-images/1000092795.jpg,Two friends enjoy time spent together .,4


In [None]:
image_list = captions['img'].values
image_list

In [None]:
def resize_img(x, ht, wd):
    
    img = skimage.io.imread(x)
    img = skimage.img_as_float(img).astype(np.float32)

    if len(img.shape) == 2:
        img = np.tile(img[:,:,None], 3)
    elif len(img.shape) == 4:
        img = img[:,:,:,0]

    img_ht, img_wd, img_ch = img.shape
    if img_wd == img_ht:
        resized_img = cv2.resize(img, (ht, wd))

    elif img_ht < img_wd:
        resized_img = cv2.resize(img, (int(img_wd * float(ht)/ht), wd))
        cropping_length = int((resized_img.shape[1] - ht) / 2)
        resized_img = resized_img[:,cropping_length:resized_img.shape[1] - cropping_length]

    else:
        resized_img = cv2.resize(img, (ht, int(img_ht * float(wd) / wd)))
        cropping_length = int((resized_img.shape[0] - wd) / 2)
        resized_img = resized_img[cropping_length:resized_img.shape[0] - cropping_length,:]

    return cv2.resize(resized_img, (ht, wd))

In [None]:
vgg19_cnn_model = 'VGG_ILSVRC_19_layers.caffemodel'
vgg19_deploy = 'VGG_ILSVRC_19_layers_deploy.prototxt'
mean = 'ilsvrc_2012_mean.npy'

net = caffe.Net(vgg19_deploy, vgg19_cnn_model, caffe.TEST)

transformer = caffe.io.Transformer({'data':net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', np.load(mean).mean(1).mean(1))
transformer.set_raw_scale('data', 255)
transformer.set_channel_swap('data', (2,1,0))

In [None]:
# cnn = CNN(model=vgg19_cnn_model, deploy=vgg_deploy, width=224, height=224)

width = 224
height = 224
batch_size = 10 # VGG_ILSVRC_19_layers_deploy.prototxt has batch size = 10
layer_sizes = [4096]
layers='fc7'

if not os.path.exists('./features.npy'):

    i = len(image_list) + batch_size
    features = np.zeros([len(image_list)] + layer_sizes)

    for start, end in zip(range(0, i, batch_size), range(batch_size, i, batch_size)):
        image_batch_file = image_list[start:end]
        image_batch = np.array(map(lambda x: resize_img(x, width, height), image_batch_file))
        caffe_in = np.zeros(np.array(image_batch.shape)[[0,3,1,2]], dtype=np.float32)
        for idx, inside in enumerate(image_batch):
            caffe_in[idx] = transformer.preprocess('data', inside)
        out = net.forward_all(blobs=[layers], **{'data':caffe_in})
        feats = out[layers]
        features[start:end] = feats

    np.save('./features.npy', features)