# Prepare model for Flickr30k

Here we will prepare model to run caption generation for the paper "Show Attend and Tell". 

This is for the Flickr8k dataset. 

### Prerequisite 
 - The data is available from 
     - Images http://nlp.cs.illinois.edu/HockenmaierGroup/Framing_Image_Description/Flickr8k_Dataset.zip
     - Texts http://nlp.cs.illinois.edu/HockenmaierGroup/Framing_Image_Description/Flickr8k_text.zip

### Load and Preprocess images

Download data and downsize so that large side is 256.  And center crop by 224 by 224.  



In [1]:
# Initial Environment setup 

originalImagesPath = './data/flickr30k/flickr30k-images'
preprocessedImagesPath = './data/flickr30k/processedImages'
fileList = './data/flickr30k/imageList.txt'

numTrain = 25000
numTest = 2500

caffe_root = '/home/intuinno/codegit/caffe/'

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


import sys
sys.path.insert(0, caffe_root + 'python')

import caffe

plt.rcParams['figure.figsize'] = (10, 10)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

import os


In [None]:
imageFiles = open(fileList)
imageList = [i.rstrip() for i in imageFiles]
shuffle(imageList)





In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


import sys
sys.path.insert(0, caffe_root + 'python')

import caffe

plt.rcParams['figure.figsize'] = (10, 10)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

import os
# if not os.path.isfile(caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'):
#     print("Downloading pre-trained CaffeNet model...")
#     !../scripts/download_model_binary.py ../models/bvlc_reference_caffenet

Set caffe to use GPU.  And we will use vgg_il

In [4]:
caffe.set_device(1)
caffe.set_mode_gpu()

net = caffe.Net(caffe_root + 'models/vgg_ilsvrc_19/VGG_ILSVRC_19_layers_deploy.prototxt',
                caffe_root + 'models/vgg_ilsvrc_19/VGG_ILSVRC_19_layers.caffemodel',
                caffe.TEST)

# input preprocessing: 'data' is the name of the input blob == net.inputs[0]
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
# transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) # mean pixel
# transformer.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]
# transformer.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB

Let's start with a simple classification. We'll set a batch of 50 to demonstrate batch processing, even though we'll only be classifying one image. (Note that the batch size can also be changed on-the-fly.)

In [25]:
# set net to batch size of 50
# net.blobs['data'].reshape(5,3,224,224)

# resultingFeatures = 

# files = ['test', 'train', 'dev']
files = [ 'dev','test','train']

import pdb
from sys import stdout
import scipy
import  pickle


for fname in files:
    print fname 
    f = open('/home/intuinno/project/pointTeach/data/Flicker8k/Flickr_8k.' + fname + 'Images.txt')
    counter = 0
    
    imageList = [i for i in f]
    numImage = len(imageList)
#     pdb.set_trace()
    
    result = np.empty((numImage, 100352))

    for i in range(numImage):
        fn = imageList[i].rstrip()
        net.blobs['data'].data[...] = transformer.preprocess('data', caffe.io.load_image( '/home/intuinno/project/pointTeach/data/Flicker8k/preprocessedImages/' +  fn))
        out = net.forward()
        feat = net.blobs['conv5_4'].data[0]
    #     print feat.shape
#         pdb.set_trace()
        reshapeFeat = np.swapaxes(feat, 0,2)
        reshapeFeat2 = np.reshape(reshapeFeat,(1,-1))
        
        counter += 1
        stdout.write("\r%d" % counter)
        stdout.flush()
        result[i,:] = reshapeFeat2
        
    print result.shape
    
    resultSave = scipy.sparse.csr_matrix(result)
    resultSave32 = resultSave.astype('float32')
    fileName = open('flicker_8k_feature.' + fname + '.pkl','wb')
    pickle.dump(resultSave32, fileName ,-1)
    fileName.close()



dev
1000(1000, 100352)
test
1000(1000, 100352)
train
6000(6000, 100352)


In [26]:
import pickle as pkl
print pkl.HIGHEST_PROTOCOL
# pkl.dump(resultSave, open('flicker_8k_feature.' + fname + '.pkl','wb'),protocol=pickle.HIGHEST_PROTOCOL)

resultSave32 = resultSave.astype('float32')



2


In [27]:
resultSave32[0][0]
# pkl.dump(resultSave32, open('flicker_8k_feature.' + fname + '.pkl','wb'),protocol=pickle.HIGHEST_PROTOCOL)


<1x100352 sparse matrix of type '<type 'numpy.float32'>'
	with 19374 stored elements in Compressed Sparse Row format>

In [28]:
import sys

def show_sizeof(x, level=0):

    print "\t" * level, x.__class__, sys.getsizeof(x), x

    if hasattr(x, '__iter__'):
        if hasattr(x, 'items'):
            for xx in x.items():
                show_sizeof(xx, level + 1)
        else:
            for xx in x:
                show_sizeof(xx, level + 1)

In [29]:
show_sizeof(None)
show_sizeof(3)
show_sizeof(2**63)
show_sizeof(102947298469128649161972364837164)
show_sizeof(918659326943756134897561304875610348756384756193485761304875613948576297485698417)

 <type 'NoneType'> 16 None
 <type 'int'> 24 3
 <type 'long'> 36 9223372036854775808
 <type 'long'> 40 102947298469128649161972364837164
 <type 'long'> 60 918659326943756134897561304875610348756384756193485761304875613948576297485698417


In [30]:
capFile = open('/home/intuinno/project/pointTeach/data/Flicker8k/Flickr8k.token.txt')

capDict = {}
import re
for line in capFile:
    match = re.search(r'^([\w]+\.jpg)#(\d)\s([\w\W.\s-]+)$', line)
#     print line
    if not match:
        print line
    else:
        if match.group(2) == '0':
            capDict[match.group(1)] = [match.group(3)]
        else:
            capDict[match.group(1)].append(match.group(3))

In [31]:
import cPickle as pickle
files = ['test', 'train', 'dev']

for name in files:
    counter = 0
    feat = pickle.load(open('flicker_8k_feature.' + name +'.pkl','rb'))
    filenames = open('/home/intuinno/project/pointTeach/data/Flicker8k/Flickr_8k.' + name + 'Images.txt')
    cap = []
    for imageFile in filenames:
        imageFile = imageFile.rstrip()
        for sen in capDict[imageFile]:
            cap.append([sen.rstrip(), counter])
        counter += 1
    saveFile = open('flicker_8k_feature.' + name + '.pkl', 'wb')
    pickle.dump(cap, saveFile, protocol=pickle.HIGHEST_PROTOCOL)
    pickle.dump(feat, saveFile, protocol=pickle.HIGHEST_PROTOCOL)
    saveFile.close()
            
    
    

In [32]:
dictionary['People']

NameError: name 'dictionary' is not defined

In [None]:
dictionary['people']


In [None]:
saveFile