# Build the Annoy Index for Image Similarity

This notebook will do the following:
    - This notebook will do the following:
    - Load the featurized images from .pkl file
    - Add each image to a new AnnoyIndex
    - Save the index to the provided filename
 
To create a nearest neighbor model we use the Annoy Package:https://pypi.org/project/annoy/

In [None]:
import pickle
import numpy as np
import pandas as pd
from PIL import Image, ImageFile
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
from annoy import AnnoyIndex


# Define Constants

In [None]:
#model parameters
n_trees = 1000 #number of trees to use in the model

#Filepath for saving the model
annoy_filename = ''

#Filepath to featurized images & helper files
preprocessed_images_filepath = ''

#check that the file paths have been updated
assert n_trees > 0, 'Number of trees must be greater than 0'
assert annoy_filename != '', 'Please provide a filepath to save the Annoy modle. Example: /model/annoyIndex.ann'
assert preprocessed_images_filepath != '', 'Please provide the file path to your pre-processed images. Example: /data/preprocessed_images.pkl'

# Load Data

In [None]:
preprocessed_images = pickle.load(open(preprocessed_images_filepath,'rb'))
print('shape of preprocessed_images %s' % str(preprocessed_images.shape))

# Build the Annoy Index

In [None]:
length = preprocessed_images.shape[1] #should be 2048 for ResNet50
ai = AnnoyIndex(length)
for i in range(preprocessed_images.shape[0]):
    v = preprocessed_images[i,:] #get vector to add
    ai.add_item(i,v) #add to index
    
ai.build(n_trees)  
ai.save(annoy_filename)