## Session 10 - Image search with VGG16 and K-Nearest Neighbours

In [4]:
# base tools
import os, sys
sys.path.append(os.path.join(".."))

# data analysis
import numpy as np
from numpy.linalg import norm
from tqdm import tqdm

# tensorflow
import tensorflow_hub as hub
from tensorflow.keras.preprocessing.image import (load_img, 
                                                  img_to_array)
from tensorflow.keras.applications.vgg16 import (VGG16, 
                                                 preprocess_input)
# from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input

# matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

ModuleNotFoundError: No module named 'tqdm'

## Helper functions

Q: What kind of preprocessing am I doing here? Why do you think I'm doing it?

In [None]:
def extract_features(img_path, model):
    """
    Extract features from image data using pretrained model (e.g. VGG16)
    """
    # Define input image shape - remember we need to reshape
    input_shape = (224, 224, 3)
    # load image from file path
    img = load_img(img_path, target_size=(input_shape[0], 
                                          input_shape[1]))
    # convert to array
    img_array = img_to_array(img)
    # expand to fit dimensions
    expanded_img_array = np.expand_dims(img_array, axis=0)
    # preprocess image - see last week's notebook
    preprocessed_img = preprocess_input(expanded_img_array)
    # use the predict function to create feature representation
    features = model.predict(preprocessed_img)
    # flatten
    flattened_features = features.flatten()
    # normalise features
    normalized_features = flattened_features / norm(features) # we are normalizing by taking the pictures and ficiding by norm(features) which is a numpy function.
    return flattened_features # we should return normalized_features instead of flattened_features which might yield better results

# Image search

## Load VGG16

Then we are loading our model.

In [None]:
model = VGG16(weights='imagenet', # use the weights from the imagenet dataset
              include_top=False, # don't include the classfication layers. Only include the weights from the embeddings.
              pooling='avg', # average pooling
              input_shape=(224, 224, 3))

## Extract features from single image

In [None]:
features = extract_features('../data/img/florence.jpg', model) # the features are the embeddings from the VGG16 model which are a big array of numbers with shape (1, 7, 7, 512)


## Iterate over folder

In [None]:
# path to the datasets
root_dir = os.path.join("..") # creating root directory
filenames = sorted(get_file_list(root_dir)) # 


__Extract features for each image__

In [None]:
feature_list = [] # i want a list of all the features extracted from the images
for i in tqdm(range(len(filenames)), position=0, leave=True): #position and leave has to do with where the progress bar is displayed
    feature_list.append(extract_features(filenames[i], model)) # append the features to the list (feature_list)

The feature_list contains a list of arrays of length 1360 each of which are of 512.

## Nearest neighbours

Once we have our *database* of extracted embeddings, we can then use K-Nearest Neighbours to find similar images.


In [None]:
from sklearn.neighbors import NearestNeighborsw
neighbors = NearestNeighbors(n_neighbors=10, # find the 10 nearest neighbors
                             algorithm='brute', # use brute force algorithm
                             metric='cosine').fit(feature_list) # once we've initialised the model, we can fit it to the data. We are using cosine similarity to measure the distance between the vectors.

__Calculate nearest neighbours for target__

In [None]:
distances, indices = neighbors.kneighbors([feature_list[250]]) # we are finding the 10 nearest neighbors to the 250th image in the dataset

__Save indices, print data__

In [None]:
# We can then go through the indices and print out the a list of indices and the distances
idxs = []
for i in range(1,6): # look for the five closest indices
    print(distances[0][i], indices[0][i])
    idxs.append(indices[0][i])

__Plot target image__

In [None]:
plt.imshow(mpimg.imread(filenames[250])) # plot image 250

__Plot close images__

In [None]:
plt.imshow(mpimg.imread(filenames[251]))

__Plot target and top 3 closest together__

In [None]:
# plt target
plt.imshow(mpimg.imread(filenames[251]))

# plot 3 most similar
f, axarr = plt.subplots(1,3)
axarr[0].imshow(mpimg.imread(filenames[idxs[0]]))
axarr[1].imshow(mpimg.imread(filenames[idxs[1]]))
axarr[2].imshow(mpimg.imread(filenames[idxs[2]]))

## Simple style transfer

__Load a quick style transfer model from TF Hub__

You can find more details [here](https://www.tensorflow.org/hub/tutorials/tf2_arbitrary_image_stylization)

# Load TF-Hub module.
hub_handle = 'https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2'
hub_module = hub.load(hub_handle)

In [None]:
# load TF-hub module
hub_handle = 'https://tfhub.dev/google/magenta/arbitrary-image-stylization/feature_vector/4'
hub_module = hub.KerasLayer(hub_handle) # the handle to the module

__Load the content image and the style image__

In [None]:
content_image = st_load("../data/img/florence.jpg")
style_image = st_load("../data/img/starry_night.jpg")

__Process using the model__

In [None]:
outputs = hub_module(content_image, style_image)
stylized_image = outputs[0]

__Show content, style, and stylized image__

In [None]:
show_n([content_image, style_image, stylized_image], 
       titles=['Original content image', 'Style image', 'Stylized image'])

### Task1 
- Run this same pipeline on the Indo Fashion dataset. How does it perform?

### Task 2
- Take the code in this notebook and turn it into a Python script. You can then add this to the repo for your Assignment 1 solution for creating doing image search
  - I.e. your Assignment 1 repo would contain both code for image search using colour histograms *and* for image search using a pretrained CNN.

### Task 3 
- Continue working on Assignment 3 in-class just now.