# Deep Learning image search

**Image search engines:**
Generally speaking, search engine usually takes a query and returns results. Image search engines takes an input image as an image query, then the image search engine finds the "similar" images within its indexed database and returns them as the search result. 

**How to measure similarity between two images?**   
- 1- **Pixel space**: One simple way is to measure the euclidean distance between the two images in the pixel space. Accordingly, if two images have common or near values for the corresponding pixels, are then considered "similar". This could work some times, however a dolphin and air plane images with blue backgrounds will be considered similar from pixels point of view, and we do not want that!

- 2- **Feature space**: Another approach is to use the feature space instead of pixel space when computing the euclidean distance between the two images. In other words, project the images into a space where images with similar features are close to each others. In this space dolphins and airplanes are separated despite pixel level similarity.      

**How to get features from images?**  
Now the question is, how to project arbitrary image into a space where similar images based on their complex content are grouped together? Well, the answer is easy! Just use a pre-trained generic network such as InceptionV3 trained on the well known ImageNet Large Scale Visual Recognition Challenge (ILSVRC). The network is trained to classify an input image into one of 1000 different classes. Accordingly, if we feed the network with an arbitrary images, and before the output, we can get a strong feature vector that summarizes the content of the input images. that's it.           

For the image search engine, we are going to use the feature vector generated by the a pre- trained network (InceptionV3 for instance), instead of the naive pixel wise approach.         

**Dataset:**   
In the following examples we used the Caltech 101 dataset. For simplicity, only 9 classes are used: 

`['airplanes', 'Motorbikes', 'Faces', 'Faces_easy', 'Leopards', 'car_side', 'grand_piano', 'brain', 'butterfly']`




**Implementation Steps:**   
- Download the data and convert it into X (images) and y (labels)  
- Randomize the data and select a subset of the data as the dataset on which we want to conduct the search
- Load the InceptionV3 network with weights and without the top part to get the high level features.
- Compute the feature for all images in the dataset
- For a query image: Compute its feature vector. Loop and find the euclidean distance between the query image features and the dataset features. Return the nearest results.       

Note: if the dataset is very large, more optimized methods can be used to find the nearest candidates to the query image, such as  KDTree and BallTree. 


In [None]:
from keras.applications.vgg16 import VGG16
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
#from keras.applications.vgg16 import preprocess_input
from keras.applications.inception_v3 import preprocess_input
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

In [None]:
# model = VGG16(weights='imagenet', include_top=False)
model = InceptionV3(weights='imagenet', include_top=False)
model.summary()

In [None]:
for i, layer in enumerate(model.layers):
   print(i, layer.name)

# Caltech 101

http://www.vision.caltech.edu/Image_Datasets/Caltech101/

In [None]:
from urllib.request import urlretrieve
import os
import tarfile
import cv2
import pprint
import operator

In [None]:
URL_CALTECH_101_DATA = 'http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz'

current_directory = os.path.dirname(os.path.realpath('__file__'))
dataset_file_path = current_directory+"/dataset.tgz"
if os.path.exists(dataset_file_path):
    print("Already downloaded.")
else:
    filename, headers = urlretrieve(URL_CALTECH_101_DATA, dataset_file_path)        
print("Done")

In [None]:
if (not os.path.exists('./data/')):
    os.makedirs('./data/')
tar = tarfile.open('./dataset.tgz', "r:gz")
tar.extractall(path='./data/')
tar.close()
print("Data extracted")

In [None]:
data_directory = "./data"
categories = os.listdir(data_directory + "/101_ObjectCategories/")
object_images_count_dict = {}
for category in categories:
    object_images_count_dict[category] = len(os.listdir(data_directory+"/101_ObjectCategories/"+category))
object_images_count_dict = sorted(object_images_count_dict.items(), key=operator.itemgetter(1), reverse=True)
print(object_images_count_dict)

In [None]:
len(object_images_count_dict)

In [None]:
def get_images(object_category, data_directory):
    if (not os.path.exists(data_directory)):
        print("Data not found!")
        return
    obj_category_dir = os.path.join(os.path.join(data_directory,"101_ObjectCategories"),object_category)
    images = [os.path.join(obj_category_dir,img) for img in os.listdir(obj_category_dir)]
    return images

def read_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # correct colors
    img = cv2.resize(img, (300,200), interpolation=cv2.INTER_CUBIC)
    return img

In [None]:
i = 0
X = np.ndarray((10000, 200, 300, 3), dtype=np.uint8)
Y = []

selected_cls = ['airplanes', 'Motorbikes', 'Faces', 'Faces_easy', 'Leopards', 'car_side', 'grand_piano', 'brain', 'butterfly']

for category,_ in object_images_count_dict:
    if category in selected_cls:
      for image in get_images(category, data_directory):
          if not image.endswith('.jpg'):
              continue
          X[i] = read_image(image)
          Y.insert(i,category) 
          i += 1
      print(str(i+1) + "  " + category)


X = X[:i]
print("Done")


In [None]:
X.shape

In [None]:
Y[0]

In [None]:
plt.imshow(X[0])

In [None]:
label_encoder = LabelEncoder()
Y_integer_encoded = label_encoder.fit_transform(Y)
Y_integer_encoded

In [None]:
Y_one_hot = to_categorical(Y_integer_encoded)
Y_one_hot

In [None]:
label_encoder.inverse_transform([np.argmax(Y_one_hot[0])])

In [None]:
# randomize and select sample data
x_train, x_test, y_train, y_test = train_test_split(X, Y_one_hot, test_size=0.3, random_state=42)

In [None]:
fig=plt.figure(figsize=(20, 15))
columns = 10
rows = 10
for i in range(0, columns*rows):
    ax = fig.add_subplot(rows, columns, i+1)
    plt.imshow(x_test[i])
    t = label_encoder.inverse_transform([np.argmax(y_test[i])])[0]
    plt.title(str(i) +":"+ t)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

In [None]:
for i in range(len(selected_cls)):
  print(i , label_encoder.inverse_transform([i])[0])

In [None]:
data_set = preprocess_input(x_test)

In [None]:
feats = model.predict(data_set)
feats = np.squeeze(feats)
print(feats.shape)

In [None]:
Q_id = 10
Query_image = x_test[Q_id]
plt.imshow(Query_image)
Query_image = preprocess_input(Query_image)
Query_feats = model.predict(np.expand_dims(Query_image, axis=0))
Query_feats = Query_feats.squeeze()

In [None]:
# Euclidean distance
results = []
for i in range(feats.shape[0]):
  d = np.linalg.norm(feats[i].flatten() - Query_feats.flatten())
  results.append((d, i))

results = sorted(results)

In [None]:
fig=plt.figure(figsize=(10, 8))
columns = 4
rows = 4
for i in range(0, columns*rows):
    ax = fig.add_subplot(rows, columns, i+1)
    plt.imshow(x_test[results[i][1]])
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

In [None]:
# Euclidean distance
results = []
for i in range(x_test.shape[0]):
  d = np.linalg.norm(x_test[i].flatten() - x_test[Q_id].flatten())
  results.append((d, i))

results = sorted(results)

In [None]:
fig=plt.figure(figsize=(10, 8))
columns = 4
rows = 4
for i in range(0, columns*rows):
    ax = fig.add_subplot(rows, columns, i+1)
    plt.imshow(x_test[results[i][1]])
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

In [None]:
# http://alexanderfabisch.github.io/t-sne-in-scikit-learn.html
def plot_dataset(X, y, X_embedded, min_dist=10.0):
    fig = plt.figure(figsize=(15, 15))
    ax = plt.axes(frameon=False)
    plt.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=0.9, wspace=0.0, hspace=0.0)
    
    plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y)

    if min_dist is not None:
        from matplotlib import offsetbox
        shown_images = np.array([[15., 15.]])
        indices = np.arange(X_embedded.shape[0])
        np.random.shuffle(indices)
        for i in indices[:5000]:
            dist = np.sum((X_embedded[i] - shown_images) ** 2, 1)
            if np.min(dist) < min_dist:
                continue
            shown_images = np.r_[shown_images, [X_embedded[i]]]
            res = cv2.resize(X[i], dsize=(48, 48), interpolation=cv2.INTER_CUBIC)
            imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage(res), X_embedded[i])
            ax.add_artist(imagebox)

In [None]:
n_tsne = 300

In [None]:
tsne_feats = np.reshape(feats, (feats.shape[0], feats.shape[1]*feats.shape[2]*feats.shape[3]))[:n_tsne]
tsne_f = TSNE(n_components=2, perplexity=30, verbose=2).fit_transform(tsne_feats)
print(tsne_f.shape)

In [None]:
c = np.argmax(y_test[:n_tsne], axis=1)

plt.figure(figsize=(7, 7))
plt.scatter(tsne_f[:, 0], tsne_f[:, 1], c=c)

plt.colorbar()
plt.show()

In [None]:
plot_dataset(x_test[:n_tsne], c[:n_tsne], tsne_f[:n_tsne], min_dist=1.0)

In [None]:
tsne_pix = np.reshape(x_test, (x_test.shape[0], x_test.shape[1]*x_test.shape[2]*x_test.shape[3]))[:n_tsne]
tsne_p = TSNE(n_components=2, perplexity=30, verbose=2).fit_transform(tsne_pix)   
print(tsne_p.shape)

In [None]:
c = np.argmax(y_test[:n_tsne], axis=1)

plt.figure(figsize=(7, 7))
plt.scatter(tsne_p[:, 0], tsne_p[:, 1], c=c)

plt.colorbar()
plt.show()

In [None]:
plot_dataset(x_test[:n_tsne], c[:n_tsne], tsne_p[:n_tsne], min_dist=1.0)