# Nearest Neighbors with Keras

In [1]:
import glob
from itertools import groupby
from pathlib import Path
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import Dense, Reshape
from tensorflow.keras.models import Model
#from coremltools.converters.keras import convert
#from tqdm import tqdm_notebook as tqdm

In [2]:
IMG_HEIGHT, IMG_WIDTH = (224, 224)
img_folder = Path("./images/101_ObjectCategories/").expanduser()
coreml_model_file = "similarity.mlmodel"

## Base Network for Feature Extraction
We load the model which has been trained on ImageNet. We specify `include_top=False`.  This ensures that we don't load the final layers specific to the classes the model was originall trained to predict. For more information, see the [Keras documentation](https://keras.io/applications/#resnet50)

In [3]:
encoder_model = ResNet50(input_shape=(IMG_HEIGHT,IMG_WIDTH,3), weights='imagenet', include_top=False, pooling='avg')

Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [4]:
np.prod(encoder_model.output.shape.as_list()[1:])

2048

In [5]:
len(encoder_model.get_weights())

318

## Split Data

In [6]:
image_filenames = glob.glob(str(img_folder / '**/*.jpg'))

test_filenames = []
train_filenames = []
for key, items in groupby(sorted(image_filenames), lambda f: Path(f).parts[-2]):
    if key == 'BACKGROUND_Google':
        continue
    test, *train = items
    test_filenames.append(test)
    train_filenames += train

print("test images: ", len(test_filenames))
print("train images", len(train_filenames))

test images:  0
train images 0


## Extract features for all images in the database

In [7]:
def load_encode_images(encoder, filenames):
    batch_size = 16
    encoded_dim = np.prod(encoder.output.shape[1:]).value
    file_count = len(filenames)
    encoded = np.zeros((file_count, encoded_dim))
    for start_index in tqdm(list(range(0, file_count, batch_size))):
        end_index = min(start_index + batch_size, file_count)
        batch_filenames = filenames[start_index:end_index]

        batch_images = load_images(batch_filenames)
        batch_encoded = encoder.predict(batch_images)
        batch_encoded_flat = batch_encoded.reshape(len(batch_images), -1)
        encoded[start_index:end_index, :] = batch_encoded_flat

    return encoded

def load_images(filenames):
    images = np.zeros((len(filenames), IMG_HEIGHT, IMG_WIDTH, 3))
    for i, filename in enumerate(filenames):
        img = image.load_img(filename, target_size=(IMG_HEIGHT,IMG_WIDTH))
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)
        images[i, :, :, :] = img_array
    return images

encoded_imgs = load_encode_images(encoder_model, train_filenames).T

AttributeError: 'numpy.int64' object has no attribute 'value'

## Build the k-NN and Joined Model
Here we build the actual k-NN and combine it with the existing network. All there is to the K-NN is the Dense layer. We do not need a bias so we specify `use_bias=False`. It's also important to note that we don't need an activation function, hence `actication='linear'`.

In [None]:
def build_knn(model, output_size):
    # Flatten feature vector
    flat_dim_size = np.prod(model.output_shape[1:])
    x = Reshape(target_shape=(flat_dim_size,),
                name='features_flat')(model.output)
    
    # Dot product between feature vector and reference vectors
    x = Dense(units=output_size,
              activation='linear',
              name='dense_1',
              use_bias=False)(x)   
                
    classifier = Model(inputs=[model.input], outputs=x)
    return classifier

In [None]:
joined_model = build_knn(encoder_model, encoded_imgs.shape[1])
joined_model.summary()

### Normalize Encodings

In [None]:
def normalize_ecnodings(encodings):
    ref_norms = np.linalg.norm(encoded_imgs, axis=0)
    return encodings / ref_norms

In [None]:
encoded_imgs_normalized = normalize_ecnodings(encoded_imgs)

### Set Weights to Extracted Features

In [None]:
temp_weights = joined_model.get_weights()
temp_weights[-1] = encoded_imgs_normalized
joined_model.set_weights(temp_weights)

## Predict

In [None]:
example_filename = test_filenames[0]
print(example_filename)
example_img = image.load_img(example_filename, target_size=(IMG_WIDTH, IMG_HEIGHT))
example_img = image.img_to_array(example_img)
example_img = np.expand_dims(example_img, axis=0)
example_img = preprocess_input(example_img)
prediction = joined_model.predict([example_img]).reshape(-1)

In [None]:
for index in prediction.argsort()[-5:][::-1]:
    print(train_filenames[index])

## Convert to CoreML
We now conver to CoreML. Note that we specify `is_bgr=True`. This is because the weights in the ResNet50 model are learned with Caffe which uses BGR as opposed to RGB. We also set a bias for each color channel. This is to simulate the zero-centering that Keras would normally take care of in the `preprocess_input` function (which we use above when loading the images). From the documentation for that function:

> will convert the images from RGB to BGR,
> then will zero-center each color channel with
> respect to the ImageNet dataset,
> without scaling.

The actual mean values used for centering can be found in the Keras Applications [source code](https://github.com/keras-team/keras-applications/blob/2661dac4dacb717e54640f158cfa9bacae6dd91b/keras_applications/imagenet_utils.py#L64).

In [None]:
def keras_to_coreml(joined_model, coreml_model_file):
    coreml_encoder = convert(joined_model,
                             input_names=['encoder_input'],
                             image_input_names=['encoder_input'],
                             output_names=['dense_1'],
                             is_bgr=True,
                             red_bias=-123.68,
                             green_bias=-116.779,
                             blue_bias=-103.939)
    coreml_encoder.author = 'Soren Lind Kristiansen'
    coreml_encoder.license = 'N/A'
    coreml_encoder.short_description = 'Image similarity.'
    coreml_encoder.output_description['dense_1'] = 'k-NN'
    coreml_encoder.save(coreml_model_file)

keras_to_coreml(joined_model, coreml_model_file)