We find similar images in a database by using transfer learning via a pre-trained VGG-19 image classifier. We retreive the 5 most similar images for each image in the database, and plot the tSNE for all our image feature vectors.

In [None]:
import sys, os
import numpy as np
from keras.preprocessing import image
from keras.models import Model
sys.path.append("src")
from keras import applications
from imagenet_utils import preprocess_input
from plot_utils import plot_query_answer
from sort_utils import find_topk_unique
from kNN import kNN
from tSNE import plot_tsne

Load pre-trained VGG-19 model and extract features from the deepest convolutional layer: block5_conv4

In [None]:
print("Loading VGG-19 pre-trained model...")
base_model=applications.VGG19(weights='imagenet')
base_model.summary()
model = Model(input=base_model.input,output=base_model.get_layer('block5_conv4').output) #try extracting from a different layer

Read images and convert them to feature vectors

In [None]:
imgs, filename_heads, X = [], [], []
path = "db_new" 
print("Reading images from '{}' directory...\n".format(path))
for f in os.listdir(path):
    # Process filename
    filename = os.path.splitext(f)  # filename in directory
    filename_full = os.path.join(path,f)  # full path filename
    head, ext = filename[0], filename[1]
    if ext.lower() not in [".jpg", ".jpeg"]:
        continue

    # Read image file
    img = image.load_img(filename_full, target_size=(224,224))  # resize images as required by the pre-trained model
    imgs.append(np.array(img))  # image
    filename_heads.append(head)  # filename head

    # Pre-process for model input
    img = image.img_to_array(img)  # convert to array
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    features = model.predict(img).flatten()  # features
    X.append(features)  # append feature extractor

X = np.array(X)  # feature vectors
imgs = np.array(imgs)  # images
print("imgs.shape = {}".format(imgs.shape))
print("X_features.shape = {}\n".format(X.shape))


Find k-nearest images to each image

In [None]:
n_neighbours = 5 + 1  # +1 as itself is most similar
knn = kNN()  # kNN model
knn.compile(n_neighbors=n_neighbours, algorithm="brute", metric="cosine") #try different algorithms and metrics like minkowski distance etc.
knn.fit(X)

Plot recommendations for each image in database

In [None]:
output_rec_dir = os.path.join("output_new", "rec_new") #output, rec before
if not os.path.exists(output_rec_dir):
    os.makedirs(output_rec_dir)
n_imgs = len(imgs)
ypixels, xpixels = imgs[0].shape[0], imgs[0].shape[1]
for ind_query in range(n_imgs):

    # Find top-k closest image feature vectors to each vector
    print("[{}/{}] Plotting similar image recommendations for: {}".format(ind_query+1, n_imgs, filename_heads[ind_query]))
    distances, indices = knn.predict(np.array([X[ind_query]]))
    distances = distances.flatten()
    indices = indices.flatten()
    indices, distances = find_topk_unique(indices, distances, n_neighbours)

    # Plot recommendations
    rec_filename = os.path.join(output_rec_dir, "{}_rec_new.png".format(filename_heads[ind_query]))
    x_query_plot = imgs[ind_query].reshape((-1, ypixels, xpixels, 3))
    x_answer_plot = imgs[indices].reshape((-1, ypixels, xpixels, 3))
    plot_query_answer(x_query=x_query_plot,
                      x_answer=x_answer_plot[1:],  # remove itself
                      filename=rec_filename)

Plot tSNE

In [None]:
output_tsne_dir = os.path.join("output_new")
if not os.path.exists(output_tsne_dir):
    os.makedirs(output_tsne_dir)
tsne_filename = os.path.join(output_tsne_dir, "tsne_new.png")
print("Plotting tSNE_new to {}...".format(tsne_filename))
plot_tsne(imgs, X, tsne_filename)

It would be better to train a model completely on these sets of classes so that it would be more efficient in retrieving these images belonging to a particular class.