In [None]:
%matplotlib inline
import h5py
from sklearn.decomposition import PCA
from scipy.spatial import distance
from matplotlib.pyplot import imshow
from PIL import Image
import matplotlib.pyplot

from keras.applications.vgg19 import VGG19
from keras.preprocessing import image
from keras.applications.vgg19 import preprocess_input
from keras.models import Model

import numpy as np
import random
import os, logging


In [None]:
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)

In [None]:
base_model = VGG19(weights='imagenet')
model = Model(input=base_model.input, output=base_model.get_layer('fc1').output)

In [None]:
#image path for feature extraction
images_path = "set your image path"
images = os.listdir(images_path)

In [None]:
activations = []

for idx,image_path in enumerate(images):
    if idx%20==0:
        # print "getting activations for %d/%d %s" % (idx+1, len(images), image_path)
        logging.info("getting activations for %d/%d %s" % (idx+1, len(images), image_path))

    img = image.load_img(images_path+image_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)

    feat = model.predict(x)
    activations.append(feat)

In [None]:
#Save the feature file 
with h5py.File("/feature.hdf5", "w") as hf:
	dt = h5py.special_dtype(vlen=unicode)
	hf.create_dataset("images", (len(images),), dtype=dt, data=images)
	hf.create_dataset("imgfeatures", (len(activations),4096), data=activations)

In [None]:
# Set your image path
images_path_1 = "image path for query images"

In [None]:
#read the stored feature file
with h5py.File("./validate/features/shopfeat.hdf5", "r") as hf:
	print (hf.keys())
	images = hf.get('images')
	images = list(images)
	imgfeat = hf.get('imgfeatures')
	imgfeat = list(imgfeat)

In [None]:
#reduce the number of features using Principle Component Analysis
featacts = np.array(imgfeat)
pca = PCA(n_components=300)
pca.fit(featacts)
acts = pca.transform(featacts)

In [None]:
#incase you want to save the pca file for further use
# acts.shape
# with h5py.File("features/pca_activations.hdf5", "w") as hf:
# 	dt = h5py.special_dtype(vlen=unicode)
# 	hf.create_dataset("pca_acts", (len(acts),300), data=acts)

In [None]:
def get_concatenated_images(indexes, thumb_height):
    thumbs = []
    for idx in indexes:
        img = Image.open(images_path+images[idx])
        img = img.resize((img.width * thumb_height / img.height, thumb_height), Image.ANTIALIAS)
        if img.mode != 'RGB':
            img = img.convert('RGB')
        thumbs.append(img)
    concat_image = np.concatenate([np.asarray(t) for t in thumbs], axis=1)
    return concat_image

def get_image(path, thumb_height):
    img = Image.open(path)
    img = img.resize((img.width * thumb_height / img.height, thumb_height), Image.ANTIALIAS)
    if img.mode != 'RGB':
        img = img.convert('RGB')
    return img

In [None]:
def get_closest_images(query_image_idx, num_results=7):
    thumb_height = 200
    distances = [ distance.euclidean(acts[query_image_idx], act) for act in acts ]
    idx_closest = sorted(range(len(distances)), key=lambda k: distances[k])[1:num_results+1]
    return idx_closest

In [None]:
def run_query():
    query_image_idx = int(401*random.random())
    idx_closest = get_closest_images(query_image_idx)
    query_image = get_concatenated_images([query_image_idx], 300)
    results_image = get_concatenated_images(idx_closest, 200)

    matplotlib.pyplot.figure(figsize = (5,5))
    imshow(query_image)
    matplotlib.pyplot.title("query image (%d)" % query_image_idx)
    matplotlib.pyplot.figure(figsize = (12,12))
    imshow(results_image)
    matplotlib.pyplot.title("result images")

In [None]:
run_query()

In [None]:
# query_image = np.asarray(get_image("testImages/saree_girl.jpg", 300))
# print query_image.shape
# thumb_height = 200
# distances = [ distance.euclidean(query_image, act) for act in acts ]
# idx_closest = sorted(range(len(distances)), key=lambda k: distances[k])[1:num_results+1]

In [None]:
base_model = VGG19(weights='imagenet')
model = Model(input=base_model.input, output=base_model.get_layer('fc2').output)

In [None]:
img = image.load_img("/home/pankaj/anaconda2/image_experiment/validate/exp1/dog.1028.jpg", target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
feat = model.predict(x)
feat = feat.reshape(4096,)
lst = []
lst.append(feat)
lst = pca.transform(lst)

In [None]:
distances = [ distance.euclidean(lst[0], act) for act in acts ]
idx_closest = sorted(range(len(distances)), key=lambda k: distances[k])[1:5+1]
results_image = get_concatenated_images(idx_closest, 200)

matplotlib.pyplot.figure(figsize = (5,5))
imshow(img)
matplotlib.pyplot.title("query image original")
matplotlib.pyplot.figure(figsize = (12,12))
imshow(results_image)
matplotlib.pyplot.title("result images")

In [None]:
X = np.array(acts)
tsne = TSNE(n_components=2, learning_rate=150, perplexity=30, verbose=2).fit_transform(X)

In [None]:
print "Done"

In [None]:
# normalize t-sne points to {0,1}
tx, ty = tsne[:,0], tsne[:,1]
tx = (tx-np.min(tx)) / (np.max(tx) - np.min(tx))
ty = (ty-np.min(ty)) / (np.max(ty) - np.min(ty))

In [None]:
width = 3000
height = 3000
max_dim = 100

full_image = Image.new('RGB', (width, height))
for img, x, y in zip(images, tx, ty):
    tile = Image.open(images_path)
    rs = max(1, tile.width/max_dim, tile.height/max_dim)
    tile = tile.resize((tile.width/rs, tile.height/rs), Image.ANTIALIAS)
    full_image.paste(tile, (int((width-max_dim)*x), int((height-max_dim)*y)))

matplotlib.pyplot.figure(figsize = (12,12))
imshow(full_image)