In [1]:
import scipy
%matplotlib inline
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
pylab.rcParams['figure.figsize'] = (10.0, 8.0)
import matplotlib.patches as mpatches
import sys
import os

# Import Refexp python class
# Please MAKE SURE that ./google_refexp_py_lib is in your
# python library search path
sys.path.append("../data/Google_Refexp_toolbox/google_refexp_py_lib/")
from refexp import Refexp

In [2]:
refexp_filename= '../data/Google_Refexp_toolbox/google_refexp_dataset_release/google_refexp_train_201511_coco_aligned.json'
coco_filename= '../data/Google_Refexp_toolbox/external/coco/annotations/instances_train2014.json'
imagesDir = '../data/Google_Refexp_toolbox/external/coco/images'
imagesType = 'train2014'

# Create Refexp instance.
refexp = Refexp(refexp_filename, coco_filename)

loading annotations into memory...
Done (t=34.61s)
creating index...
index created!
Dataset loaded.


In [3]:
catIds = refexp.getCatIds()
imgIds = []
for i in range(0,10):
    imgIds.append(refexp.getImgIds(catIds=catIds[i]))
imgIds = [item for sublist in imgIds for item in sublist]

print "Loading train images..."
# Returns a tuple of image_ids and array of img_ids
train_images = refexp.loadImgs(imgIds[:20000])
test_images = refexp.loadImgs(imgIds[20000:])

print "Train images loaded.."
# print len(train_images), len(test_images)

Loading train images...
Train images loaded..


In [4]:
ids = train_images[1]
train_annotations = []
for id in ids:
    train_annotations.append(refexp.getAnnIds(id))
print "Training Annotations Loaded.."
# print len(train_annotations)

Training Annotations Loaded..


In [5]:
annotations = []
print "Generating annotation.."
for anns in train_annotations:
    annotations.append(refexp.loadAnns(anns[0])[0])

Generating annotation..


In [6]:
print len(annotations)

16208


In [7]:
fiveDVec = np.ndarray(shape=(16208,5))
i = 0
for ann in annotations:
    x_tl = [ann['bbox'][0]]
    y_tl = [ann['bbox'][1]+ann['bbox'][3]]
    x_br = [ann['bbox'][0]+ann['bbox'][2]]
    y_br = [ann['bbox'][1]]
    area = [ann['area']/(224*224)]
    fiveDVec[i] = np.concatenate([x_tl,y_tl,x_br,y_br,area])
    i=i+1

In [8]:
print fiveDVec[0]

[  1.53260000e+02   5.65280000e+02   3.16820000e+02   3.09550000e+02
   2.89506402e-01]


In [8]:
import scipy.io

matfile = '5Dvec.mat'
scipy.io.savemat(matfile, mdict={'out': fiveDVec}, oned_as='row')
print "Data Saved"

Data Saved


In [9]:
img = train_images[0][0]
I = io.imread(os.path.join(imagesDir, imagesType, img['file_name']))
print I.shape

(640, 359, 3)


In [10]:
import cv2
i=0
reg_imgs = []
for img in train_images[0]:
    avg = 0
    try:
        I = io.imread(os.path.join(imagesDir, imagesType, img['file_name']))
        sub_img = I[fiveDVec[i][3]:fiveDVec[i][1], fiveDVec[i][0]:fiveDVec[i][2]]

        if sub_img.shape[0] > sub_img.shape[1]:
            fac = 224.0/sub_img.shape[0]
            res = cv2.resize(sub_img, None, fx=fac, fy=fac, interpolation = cv2.INTER_CUBIC)
            res = cv2.copyMakeBorder(res,0,0,0,(224-res.shape[1]),cv2.BORDER_CONSTANT,value=0)
        else:
            fac = 224.0/sub_img.shape[1]
            res = cv2.resize(sub_img, None, fx=fac, fy=fac, interpolation = cv2.INTER_CUBIC)
            res = cv2.copyMakeBorder(res,0,(224-res.shape[0]),0,0,cv2.BORDER_CONSTANT,value=0)
        res.resize(3,224,224)
        i=i+1
        reg_imgs.append(res)
    except:
        print i
        i=i+1
        pass



In [12]:
reg_imgs = np.asarray(reg_imgs)
print reg_imgs.shape
import scipy.io

matfile = 'baseline_bb.mat'
scipy.io.savemat(matfile, mdict={'out': reg_imgs}, oned_as='row')
print "Data Saved"

(16208, 3, 224, 224)
Data Saved


In [1]:
from keras.layers.core import Flatten
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, RepeatVector
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import TimeDistributed
from keras.layers import Merge
from keras.optimizers import SGD
from keras.layers.core import Activation
import numpy as np
import h5py

Using Theano backend.
Using gpu device 3: GeForce GTX 1080 (CNMeM is disabled, cuDNN 5105)


In [2]:
image_model = Sequential()
image_model.add(ZeroPadding2D((1,1),input_shape=(3,224,224)))
image_model.add(Convolution2D(64, 3, 3, activation='relu'))
image_model.add(ZeroPadding2D((1,1)))
image_model.add(Convolution2D(64, 3, 3, activation='relu'))
image_model.add(MaxPooling2D((2,2), strides=(2,2)))

image_model.add(ZeroPadding2D((1,1)))
image_model.add(Convolution2D(128, 3, 3, activation='relu'))
image_model.add(ZeroPadding2D((1,1)))
image_model.add(Convolution2D(128, 3, 3, activation='relu'))
image_model.add(MaxPooling2D((2,2), strides=(2,2)))

image_model.add(ZeroPadding2D((1,1)))
image_model.add(Convolution2D(256, 3, 3, activation='relu'))
image_model.add(ZeroPadding2D((1,1)))
image_model.add(Convolution2D(256, 3, 3, activation='relu'))
image_model.add(ZeroPadding2D((1,1)))
image_model.add(Convolution2D(256, 3, 3, activation='relu'))
image_model.add(MaxPooling2D((2,2), strides=(2,2)))

image_model.add(ZeroPadding2D((1,1)))
image_model.add(Convolution2D(512, 3, 3, activation='relu'))
image_model.add(ZeroPadding2D((1,1)))
image_model.add(Convolution2D(512, 3, 3, activation='relu'))
image_model.add(ZeroPadding2D((1,1)))
image_model.add(Convolution2D(512, 3, 3, activation='relu'))
image_model.add(MaxPooling2D((2,2), strides=(2,2)))

image_model.add(ZeroPadding2D((1,1)))
image_model.add(Convolution2D(512, 3, 3, activation='relu'))
image_model.add(ZeroPadding2D((1,1)))
image_model.add(Convolution2D(512, 3, 3, activation='relu'))
image_model.add(ZeroPadding2D((1,1)))
image_model.add(Convolution2D(512, 3, 3, activation='relu'))
image_model.add(MaxPooling2D((2,2), strides=(2,2)))

image_model.add(Flatten())
image_model.add(Dense(4096, activation='relu'))
image_model.add(Dropout(0.5))
image_model.add(Dense(4096, activation='relu'))

f = h5py.File('vgg16_weights.h5')
for k in range(f.attrs['nb_layers']):
    if k >= 35:
        # we don't look at the last (fully-connected) layers in the savefile
        break
#     print k
    g = f['layer_{}'.format(k)]
    weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
    image_model.layers[k].set_weights(weights)
f.close()
print "VGG Loaded with pre-trained weights."

  mode='max')
  mode='max')
  mode='max')


ValueError: DeepCopyOp: the copy failed!
Apply node that caused the error: DeepCopyOp(convolution2d_9_W)
Toposort index: 0
Inputs types: [CudaNdarrayType(float32, 4D)]
Inputs shapes: [(512, 512, 3, 3)]
Inputs strides: [(4608, 9, 3, 1)]
Inputs values: ['not shown']
Outputs clients: [['output']]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

In [17]:
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
image_model.compile(optimizer=sgd, loss='categorical_crossentropy')

 <type 'numpy.ndarray'> (224, 224, 3)
