In [None]:
import tensorflow.keras as keras

In [None]:
import os

In [None]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.imagenet_utils import decode_predictions, preprocess_input
from tensorflow.keras.models import Model

In [None]:
model = keras.applications.VGG16(weights='imagenet', include_top=True)

In [None]:

model.summary()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def load_image(path):
    img = image.load_img(path, target_size=model.input_shape[1:3])
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return img, x

In [None]:
img, x = load_image("Portraits/7.jpg")
print("shape of x: ", x.shape)
print("data type: ", x.dtype)
plt.imshow(img)

In [None]:
# forward the image through the network
predictions = model.predict(x)

# print out the 
for _, pred, prob in decode_predictions(predictions)[0]:
    print("predicted %s with probability %0.3f" % (pred, prob))

In [None]:
feat_extractor = Model(inputs=model.input, outputs=model.get_layer("fc2").output)
feat_extractor.summary()

In [None]:
img, x = load_image("Portraits/7.jpg")
feat = feat_extractor.predict(x)

plt.figure(figsize=(16,4))
plt.plot(feat[0])

In [None]:
images_path = 'Portraits'
image_extensions = ['.jpg', '.png', '.jpeg']   # case-insensitive (upper/lower doesn't matter)
max_num_images = 10000

images = [os.path.join(dp, f) for dp, dn, filenames in os.walk(images_path) for f in filenames if os.path.splitext(f)[1].lower() in image_extensions]
if max_num_images < len(images):
    images = [images[i] for i in sorted(random.sample(xrange(len(images)), max_num_images))]

print("keeping %d images to analyze" % len(images))

In [None]:
import time
tic = time.clock()


features = []
for i, image_path in enumerate(images):
    if i % 500 == 0:
        toc = time.process_time()
        elap = toc-tic;
        print("analyzing image %d / %d. Time: %4.4f seconds." % (i, len(images),elap))
        tic = time.process_time()
    img, x = load_image(image_path);
    feat = feat_extractor.predict(x)[0]
    features.append(feat)

print('finished extracting features for %d images' % len(images))

In [None]:
from sklearn.decomposition import PCA

features = np.array(features)
pca = PCA(n_components=300)
pca.fit(features)

In [None]:
pca_features = pca.transform(features)

In [None]:

import random

# grab a random query image
query_image_idx = int(len(images) * random.random())

# let's display the image
img = image.load_img(images[query_image_idx])
plt.imshow(img)

In [None]:

from scipy.spatial import distance

similar_idx = [ distance.cosine(pca_features[query_image_idx], feat) for feat in pca_features ]

In [None]:
idx_closest = sorted(range(len(similar_idx)), key=lambda k: similar_idx[k])[1:6]

In [None]:
# load all the similarity results as thumbnails of height 100
thumbs = []
for idx in idx_closest:
    img = image.load_img(images[idx])
    img = img.resize((int(img.width * 100 / img.height), 100))
    thumbs.append(img)

# concatenate the images into a single image
concat_image = np.concatenate([np.asarray(t) for t in thumbs], axis=1)

# show the image
plt.figure(figsize = (16,12))
plt.imshow(concat_image)

In [None]:

def get_closest_images(query_image_idx, num_results=5):
    distances = [ distance.cosine(pca_features[query_image_idx], feat) for feat in pca_features ]
    idx_closest = sorted(range(len(distances)), key=lambda k: distances[k])[1:num_results+1]
    return idx_closest

def get_concatenated_images(indexes, thumb_height):
    thumbs = []
    for idx in indexes:
        img = image.load_img(images[idx])
        img = img.resize((int(img.width * thumb_height / img.height), thumb_height))
        thumbs.append(img)
    concat_image = np.concatenate([np.asarray(t) for t in thumbs], axis=1)
    return concat_image

In [None]:

# do a query on a random image
query_image_idx = int(len(images) * random.random())
idx_closest = get_closest_images(query_image_idx)
query_image = get_concatenated_images([query_image_idx], 300)
results_image = get_concatenated_images(idx_closest, 200)

# display the query image
plt.figure(figsize = (5,5))
plt.imshow(query_image)
plt.title("query image (%d)" % query_image_idx)

# display the resulting images
plt.figure(figsize = (16,12))
plt.imshow(results_image)
plt.title("result images")

In [None]:

# do a query on a random image
# query_image_idx = i
#     for i = 0; i > 0; i++ {
        
#     }

for x in range(0, 100):
    query_image_idx = x
    idx_closest = get_closest_images(query_image_idx)
    query_image = get_concatenated_images([query_image_idx], 300)
    results_image = get_concatenated_images(idx_closest, 200)

    # display the query image
    plt.figure(figsize = (5,5))
    plt.imshow(query_image)
    plt.title("query image (%d)" % query_image_idx)

    # display the resulting images
    plt.figure(figsize = (16,12))
    plt.imshow(results_image)
    plt.title("result images")

In [None]:

# do a query on a random image
# query_image_idx = i
#     for i = 0; i > 0; i++ {
        
#     }

for x in range(101, 200):
    query_image_idx = x
    idx_closest = get_closest_images(query_image_idx)
    query_image = get_concatenated_images([query_image_idx], 300)
    results_image = get_concatenated_images(idx_closest, 200)

    # display the query image
    plt.figure(figsize = (5,5))
    plt.imshow(query_image)
    plt.title("query image (%d)" % query_image_idx)

    # display the resulting images
    plt.figure(figsize = (16,12))
    plt.imshow(results_image)
    plt.title("result images")

In [None]:

# do a query on a random image
# query_image_idx = i
#     for i = 0; i > 0; i++ {
        
#     }

for x in range(201, 300):
    query_image_idx = x
    idx_closest = get_closest_images(query_image_idx)
    query_image = get_concatenated_images([query_image_idx], 300)
    results_image = get_concatenated_images(idx_closest, 200)

    # display the query image
    plt.figure(figsize = (5,5))
    plt.imshow(query_image)
    plt.title("query image (%d)" % query_image_idx)

    # display the resulting images
    plt.figure(figsize = (16,12))
    plt.imshow(results_image)
    plt.title("result images")

In [None]:

# do a query on a random image
# query_image_idx = i
#     for i = 0; i > 0; i++ {
        
#     }

for x in range(301, 400):
    query_image_idx = x
    idx_closest = get_closest_images(query_image_idx)
    query_image = get_concatenated_images([query_image_idx], 300)
    results_image = get_concatenated_images(idx_closest, 200)

    # display the query image
    plt.figure(figsize = (5,5))
    plt.imshow(query_image)
    plt.title("query image (%d)" % query_image_idx)

    # display the resulting images
    plt.figure(figsize = (16,12))
    plt.imshow(results_image)
    plt.title("result images")

In [None]:

# do a query on a random image
# query_image_idx = i
#     for i = 0; i > 0; i++ {
        
#     }

for x in range(401, 500):
    query_image_idx = x
    idx_closest = get_closest_images(query_image_idx)
    query_image = get_concatenated_images([query_image_idx], 300)
    results_image = get_concatenated_images(idx_closest, 200)

    # display the query image
    plt.figure(figsize = (5,5))
    plt.imshow(query_image)
    plt.title("query image (%d)" % query_image_idx)

    # display the resulting images
    plt.figure(figsize = (16,12))
    plt.imshow(results_image)
    plt.title("result images")

In [None]:

# do a query on a random image
# query_image_idx = i
#     for i = 0; i > 0; i++ {
        
#     }

for x in range(501, 577):
    query_image_idx = x
    idx_closest = get_closest_images(query_image_idx)
    query_image = get_concatenated_images([query_image_idx], 300)
    results_image = get_concatenated_images(idx_closest, 200)

    # display the query image
    plt.figure(figsize = (5,5))
    plt.imshow(query_image)
    plt.title("query image (%d)" % query_image_idx)

    # display the resulting images
    plt.figure(figsize = (16,12))
    plt.imshow(results_image)
    plt.title("result images")