In [2]:
import os
import math
import random

import numpy as np
import tensorflow as tf
import cv2

slim = tf.contrib.slim

In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [3]:
import sys
sys.path.append('../')

In [4]:
from nets import ssd_vgg_300, ssd_common, np_methods
from preprocessing import ssd_vgg_preprocessing
import visualization

In [5]:
# TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
gpu_options = tf.GPUOptions(allow_growth=True)
config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
isess = tf.InteractiveSession(config=config)

## SSD 300 Model

The SSD 300 network takes 300x300 image inputs. In order to feed any image, the latter is resize to this input shape (i.e.`Resize.WARP_RESIZE`). Note that even though it may change the ratio width / height, the SSD model performs well on resized images (and it is the default behaviour in the original Caffe implementation).

SSD anchors correspond to the default bounding boxes encoded in the network. The SSD net output provides offset on the coordinates and dimensions of these anchors.

In [6]:
# Input placeholder.
net_shape = (300, 300)
data_format = 'NHWC'
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
    img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
image_4d = tf.expand_dims(image_pre, 0)

# Define the SSD model.
reuse = True if 'ssd_net' in locals() else None
ssd_net = ssd_vgg_300.SSDNet()
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
    predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)

# Restore SSD model.
ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt'
# ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
isess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(isess, ckpt_filename)

# SSD default anchor boxes.
ssd_anchors = ssd_net.anchors(net_shape)

INFO:tensorflow:Restoring parameters from ../checkpoints/ssd_300_vgg.ckpt


## Post-processing pipeline

The SSD outputs need to be post-processed to provide proper detections. Namely, we follow these common steps:

* Select boxes above a classification threshold;
* Clip boxes to the image shape;
* Apply the Non-Maximum-Selection algorithm: fuse together boxes whose Jaccard score > threshold;
* If necessary, resize bounding boxes to original image shape.

In [7]:
# Main image processing routine.
import pickle
def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)):
    # Run SSD network.
    rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img],
                                                              feed_dict={img_input: img})
    
    # Get classes and bboxes from the net outputs.
    rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
            rpredictions, rlocalisations, ssd_anchors,
            select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True)
    
    rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
    rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400)
    rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
    # Resize bboxes to original image shape. Note: useless for Resize.WARP!
    rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
    with open('objs.pkl', 'w') as f:  # Python 3: open(..., 'wb')
        pickle.dump([rclasses,rscores,rbboxes], f)
    return rclasses, rscores, rbboxes

In [8]:
# Test on some demo image and visualize output.
import visualization
def bboxes_draw_on_img(img, classes, scores, bboxes, colors, thickness=2):
    shape = img.shape
    print('saving')
    for i in range(bboxes.shape[0]):
        
        #if ind == i:
            bbox = bboxes[i]
            color = colors[classes[i]]
            # Draw bounding box...
            p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1]))
            p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))
            cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
            crim = img[p1[::-1][1]:p2[::-1][1],p1[::-1][0]:p2[::-1][0]]
            cv2.imwrite('crop'+str(i)+'.jpg',crim)
            # Draw text...
            s = '%s/%.3f' % (classes[i], scores[i])
            p1 = (p1[0]-5, p1[1])
            cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1)
    cv2.imwrite('check2.jpg',img)
    return img
path = '../demo/'
image_names = sorted(os.listdir(path))
from IPython.display import Image
img = mpimg.imread('/home/subha/ReferralExp/ObjectRetreival/natural-language-object-retrieval/datasets/ReferIt/ImageCLEF/images/1139.jpg')
Image('/home/subha/ReferralExp/ObjectRetreival/natural-language-object-retrieval/datasets/ReferIt/ImageCLEF/images/7407.jpg')
rclasses, rscores, rbboxes =  process_image(img)
print(rclasses, rscores, rbboxes)
impl = bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)
# impl = cv2.cvtColor(impl, cv2.COLOR_BGR2RGB)
# plt.imshow(impl)
# plt.show()
# visualization.plt_bboxes(img, rclasses, rscores, rbboxes)

(array([18,  9]), array([ 0.88927776,  0.52018094], dtype=float32), array([[ 0.48663515,  0.66947961,  0.94241518,  0.98651397],
       [ 0.27246457,  0.        ,  0.94371337,  0.13969558]], dtype=float32))
saving


<img src='check2.jpg'>

In [None]:
#rightmost animal
#1871
big boat at bottom
9538
orange boat on the right
8352


#result1 : person on horse - 15765
#result2 : 

In [1]:
#Get the query feature that was already extracted using skip-thought
import json
import pickle
def load_pickle(name):
    print(name)
    with open(name,"rb") as fp:
        l=pickle.load(fp)

    return l
with open('/home/subha/ReferralExp/ObjectRetreival/natural-language-object-retrieval/data/metadata/referit_query_dict.json') as fp:
    d = json.load(fp)
query =[u'left bed']
# print(d)
img_id = d.keys()[d.values().index(query)]
# img_id = d[query/

print(img_id)
qF = load_pickle('/home/subha/RefExpCVPR2018/penseur/textFeaturescache/text_'+str(img_id)+'.txt')
print(qF.shape)
# import os

# # import sys
# # os.chdir('/home/subha/RefExpCVPR2018/penseur')
# # sys.path.insert(0, '/home/subha/RefExpCVPR2018/penseur')
# import json
# # sys.path.append('/home/subha/RefExpCVPR2018/penseur')
# import skipthoughts
# import pickle
# import penseur
# p = penseur.Penseur()
# qF = p.get_vector(query)

print(qF.shape)

#Extract image features
import pickle
import visualization
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
model = VGG16(weights='imagenet', include_top=False)
import cv2
import random
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.cm as mpcm
imF=[]
def bboxes_draw_on_img(img, classes, scores, bboxes, colors, qF):
    shape = img.shape
    dis = 1000
    res_img = img.copy()
    print('saving')
    thickness=2
    for i in range(bboxes.shape[0]):
        bbox = bboxes[i]
        color = colors[classes[i]]
        # Draw bounding box...
        coord = np.array([bbox[0],bbox[1],bbox[2],bbox[3]])
        p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1]))
        p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))
        cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
        crim = img[p1[::-1][1]:p2[::-1][1],p1[::-1][0]:p2[::-1][0]]
        cv2.imwrite('crop'+str(i)+'.jpg',crim)
        cimg = image.load_img('crop'+str(i)+'.jpg', target_size=(224, 224))
        x=image.img_to_array(cimg)
        x = np.expand_dims(x,axis=0)
        x = preprocess_input(x)
        features=model.predict(x)
        c=imname
        c=c+'_fc7.npy'
        f=np.load('/home/subha/ReferralExp/ObjectRetreival/natural-language-object-retrieval/data/referit_context_features/'+c)
        iF=np.concatenate((features.flatten(),f.flatten()),axis=0)
        iF = np.concatenate((iF,np.array(coord)),axis=0)
        imF.append(iF)
        
        
        
#         correctBox = [p1,p2]
        
        # Draw text...
        s = '%s/%.3f' % (classes[i], scores[i])
        p1 = (p1[0]-5, p1[1])
        cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1)
    cv2.imwrite('check.jpg',img)
    return img,imF
imname='7407'
img = mpimg.imread('/home/subha/ReferralExp/ObjectRetreival/natural-language-object-retrieval/datasets/ReferIt/ImageCLEF/images/1139.jpg')
with open('objs.pkl') as f:  # Python 3: open(..., 'rb')
    rclasses, rscores, rbboxes = pickle.load(f)
# rclasses, rscores, rbboxes =  process_image(img)
print(rbboxes)
impl,imF= bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma,qF)
imF=np.array(imF)
print(imF.shape)
with open('features.pkl', 'w') as f:  # Python 3: open(..., 'wb')
        pickle.dump([imF,qF], f)


1139_1
/home/subha/RefExpCVPR2018/penseur/textFeaturescache/text_1139_1.txt
(1, 4800)
(1, 4800)


Using TensorFlow backend.


[[ 0.48663515  0.66947961  0.94241518  0.98651397]
 [ 0.27246457  0.          0.94371337  0.13969558]]
saving
(2, 29188)


In [1]:

from __future__ import absolute_import
from __future__ import print_function
import numpy as np
import pickle
import visualization
import random
from keras.layers import Input, Dense
from keras.models import Model
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Input, Lambda
from keras.optimizers import RMSprop
from keras import backend as K
from keras.utils.vis_utils import plot_model
import tensorflow as tf
import cv2
import os
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

config = tf.ConfigProto()
config.gpu_options.allocator_type = 'BFC'
config.gpu_options.per_process_gpu_memory_fraction = 0.90

num_classes = 10
textNames=os.listdir('/home/subha/RefExpCVPR2018/penseur/textFeaturescache')
imgNames=os.listdir('/home/subha/RefExpCVPR2018/penseur/finalTrain/imgCache')


def euclidean_distance(vects):
    x, y = vects
    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))


def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)


def contrastive_loss(y_true, y_pred):
    '''Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    margin = 1
    return K.mean(y_true * K.square(y_pred) +
                  (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))

def bboxPred(model,qF,imF):
#     sz = imF.shape[0]
#     qF =np.tile(qF,(sz,1))
#     sc=model.predict([imF,qF])
#     sc=sc.ravel()
#     print(sc)
#     print(imF[0],imF[1])
    small=100
    smallindex=0
    for i in range(imF.shape[0]):
        temp=np.array([imF[i]])
        sc=model.predict([temp,qF])
        if sc.ravel<small:
            small=sc
            smallindex=i
            
        print(sc)
        return smallindex
        
#     r=sc.argsort()
#     print(r,i)


with open('features.pkl') as f:  # Python 3: open(..., 'rb')
    imF,qF = pickle.load(f)
epochs = 30
# te1,te2,tey =np.load('te1.npy'),np.load('te2.npy'),np.load('tey.npy')/
input_a = Input(shape=(29188,))
input_b = Input(shape=(4800,))

# because we re-use the same instance `base_network`,
# the weights of the network
# will be shared across the two branches
project_a = Input(shape=(29188,))
x = Dense(4800,activation ='relu')(project_a)

shared_fc = Dense(128)
processed_a = shared_fc(x)
processed_b = shared_fc(input_b)

distance = Lambda(euclidean_distance,
                  output_shape=eucl_dist_output_shape)([processed_a, processed_b])

model = Model([ project_a,input_b], [distance])

# train
rms = RMSprop()
model.compile(loss=contrastive_loss, optimizer=rms)
model.load_weights("/home/subha/RefExpCVPR2018/penseur/finalTrain/modelFinal.h5")
ind=bboxPred(model,qF,imF)
# print(ind)
def bboxes_draw_on_img(img, classes, scores, bboxes,ind, colors, thickness=2):
    shape = img.shape
    print('saving',ind)
    for i in range(bboxes.shape[0]):
        
        if ind == i:
            bbox = bboxes[i]
            color = colors[classes[i]]
            # Draw bounding box...
            p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1]))
            p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))
            cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
            crim = img[p1[::-1][1]:p2[::-1][1],p1[::-1][0]:p2[::-1][0]]
            cv2.imwrite('crop'+str(i)+'.jpg',crim)
            # Draw text...
            s = '%s/%.3f' % (classes[i], scores[i])
            p1 = (p1[0]-5, p1[1])
            cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1)
    cv2.imwrite('resultbbox2.jpg',img)
    return img
with open('objs.pkl') as f:  # Python 3: open(..., 'rb')
    rclasses, rscores, rbboxes = pickle.load(f)
img = mpimg.imread('/home/subha/ReferralExp/ObjectRetreival/natural-language-object-retrieval/datasets/ReferIt/ImageCLEF/images/1139.jpg')  
impl =bboxes_draw_on_img(img, rclasses, rscores, rbboxes,ind, visualization.colors_plasma)
# y_pred = model.predict([te1,te2])
# te_acc,recall_1 = compute_accuracy(tey, y_pred)
# results=[te_acc,recall_1]
# thefile = open('results_train.txt', 'w')
# for item in results:
#   thefile.write("%s\n" % item)
# print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
# print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))
# print('Recall@5: %0.2f%%'%(recall_1))



Using TensorFlow backend.


[[ 0.46277404]]
saving 0


<img src='resultbbox2.jpg'>

In [None]:
#Extract query features


In [None]:
# Test to retrieve