# Prepare model for CoCo


In [10]:
# Setup 

originalImagesPath = 'data/coco/originalImages'
preprocessedImagesPath = 'data/coco/processedImages/'

caffe_root = '/home/intuinno/codegit/caffe/'

vgg_ilsvrc_19_layoutFileName = caffe_root + 'models/vgg_ilsvrc_19/VGG_ILSVRC_19_layers_deploy.prototxt'
vgg_ilsvrc_19_modelFileName = caffe_root + 'models/vgg_ilsvrc_19/VGG_ILSVRC_19_layers.caffemodel'

dataPath = 'data/coco/'
annotation_path = dataPath + 'annotations/captions_train2014.json'
splitFileName = dataPath + 'dataset_coco.json'

experimentPrefix = '.exp1'



In [11]:
# Import

import pdb
from sys import stdout
import scipy
import  cPickle as pickle

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


import sys
sys.path.insert(0, caffe_root + 'python')

import caffe

plt.rcParams['figure.figsize'] = (10, 10)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

import os

import pandas as pd
import nltk

In [12]:
caffe.set_device(1)
caffe.set_mode_gpu()

net = caffe.Net(vgg_ilsvrc_19_layoutFileName,
                vgg_ilsvrc_19_modelFileName,
                caffe.TEST)

# input preprocessing: 'data' is the name of the input blob == net.inputs[0]
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) # mean pixel
transformer.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]
transformer.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB

In [13]:
# Create file list 
# coco.devImages.txt 
# coco.trainImages.txt 
# coco.testImages.txt

import json
from pprint import pprint


with open(splitFileName) as f:
    data = json.load(f)

df = pd.DataFrame(data['images'])

files = [ 'dev','test','train']

dataDict = {}

dataDict['dev'] = df[df.split == 'val']
dataDict['test'] = df[df.split == 'test']
dataDict['restval'] = df[df.split == 'restval']
dataDict['train'] = df[df.split == 'train']

for f in files:
    dataDict[f]['filename'].to_csv(dataPath + 'coco.' + f + 'Images.txt',index=False)
    

def buildCapDict(sentences):
    return [s[u'raw'] for s in sentences ]

df['captions'] = df.apply(lambda row: buildCapDict(row['sentences']), axis=1)

capDict = df.loc[:,['filename', 'captions']].set_index('filename').to_dict()

capDict = capDict['captions']

In [14]:
# Let's build dictionary

# Let's make dictionary

corpus = df['captions'].values
corpus2 = [' '.join(c) for c in corpus]
corpus3 = ' '.join(corpus2)

words = nltk.FreqDist(corpus3.split()).most_common()

wordsDict = {words[i][0]:i+2 for i in range(len(words))}

with open(dataPath + 'dictionary.pkl', 'wb') as f:
    pickle.dump(wordsDict, f)

In [15]:
words[:10]

[(u'a', 660420),
 (u'A', 359312),
 (u'on', 221008),
 (u'of', 211224),
 (u'in', 189904),
 (u'the', 177513),
 (u'with', 160057),
 (u'and', 145571),
 (u'is', 100190),
 (u'man', 71303)]

In [16]:
# set net to batch size of 50
# net.blobs['data'].reshape(10,3,224,224)

# resultingFeatures = 

# files = ['test', 'train', 'dev']
files = [ 'dev','test','train']




for fname in files:
    print fname 
    f = open(dataPath + 'coco.' + fname + 'Images.txt')
    counter = 0
    
    imageList = [i for i in f]
    numImage = len(imageList)
#     pdb.set_trace()
    
    result = np.empty((numImage, 100352))

    for i in range(numImage):
        fn = imageList[i].rstrip()
        net.blobs['data'].data[...] = transformer.preprocess('data', caffe.io.load_image( preprocessedImagesPath +  fn))
        out = net.forward()
        feat = net.blobs['conv5_4'].data[0]
#         import pdb; pdb.set_trace()
    #     print feat.shape
#         pdb.set_trace()
        reshapeFeat = np.swapaxes(feat, 0,2)
        reshapeFeat2 = np.reshape(reshapeFeat,(1,-1))
        
        counter += 1
        stdout.write("\r%d" % counter)
        stdout.flush()
        result[i,:] = reshapeFeat2
        
    print result.shape
    
    resultSave = scipy.sparse.csr_matrix(result)
    resultSave32 = resultSave.astype('float32')
    
    if fname == 'train':
        np.savez(dataPath + 'coco_feature.' + fname + experimentPrefix, data=resultSave32.data, indices=resultSave32.indices, indptr=resultSave32.indptr, shape=resultSave.shape)
    else:
        fileName = open(dataPath + 'coco_feature.' + fname + experimentPrefix + '.pkl','wb')
        pickle.dump(resultSave32, fileName ,-1)
        fileName.close()



dev
5000(5000, 100352)
test
5000(5000, 100352)
train
82783(82783, 100352)


NameError: name 'coco_feature' is not defined

In [17]:
np.savez(dataPath + 'coco_feature.' + fname + experimentPrefix, data=resultSave32.data, indices=resultSave32.indices, indptr=resultSave32.indptr, shape=resultSave.shape)
  


In [18]:
def save_sparse_csr(filename,array):
    np.savez(filename,data = array.data ,indices=array.indices,
             indptr =array.indptr, shape=array.shape )

def load_sparse_csr(filename):
    loader = np.load(filename)
    return csr_matrix((  loader['data'], loader['indices'], loader['indptr']),
                         shape = loader['shape'])

In [19]:
files = ['test', 'dev']

for name in files:
    counter = 0
    feat = pickle.load(open(dataPath + 'coco_feature.' + name + experimentPrefix + '.pkl','rb'))
    filenames = open(dataPath + 'coco.' + name + 'Images.txt')
    cap = []
    for imageFile in filenames:
        imageFile = imageFile.rstrip()
        for sen in capDict[imageFile]:
            cap.append([sen.rstrip(), counter])
        counter += 1
    saveFile = open(dataPath + 'coco_align.' + name + experimentPrefix + '.pkl', 'wb')
    pickle.dump(cap, saveFile, protocol=pickle.HIGHEST_PROTOCOL)
    pickle.dump(feat, saveFile, protocol=pickle.HIGHEST_PROTOCOL)
    saveFile.close()
            
    
    

In [20]:
files = ['train']

for name in files:
    counter = 0
#     feat = pickle.load(open(dataPath + 'coco_feature.' + name +'.pkl','rb'))
    filenames = open(dataPath + 'coco.' + name + 'Images.txt')
    cap = []
    for imageFile in filenames:
        imageFile = imageFile.rstrip()
        for sen in capDict[imageFile]:
            cap.append([sen.rstrip(), counter])
        counter += 1
    saveFile = open(dataPath + 'coco_align.' + name + experimentPrefix + '.pkl', 'wb')
    pickle.dump(cap, saveFile, protocol=pickle.HIGHEST_PROTOCOL)
#     pickle.dump(feat, saveFile, protocol=pickle.HIGHEST_PROTOCOL)
    saveFile.close()

In [21]:
wordsDict['Two']

22

In [22]:
resultSave32

<82783x100352 sparse matrix of type '<type 'numpy.float32'>'
	with 502170223 stored elements in Compressed Sparse Row format>