# Build the Annoy Index for Image Similarity

This notebook will do the following:
    - This notebook will do the following:
    - Load the featurized images from .pkl file
    - Add each image to a new AnnoyIndex
    - Save the index to the provided filename
 
To create a nearest neighbor model we use the Annoy Package:https://pypi.org/project/annoy/

In [None]:
import pickle
import numpy as np
import pandas as pd
from PIL import Image, ImageFile
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
from annoy import AnnoyIndex


# Define Constants

In [None]:
#model parameters
n_trees = 10000 #number of trees to use in the model

#Filepath for saving the model
annoy_fn = '/mnt/met-results/met_10k.annoy'


#Filepath to featurized images & helper files
features_fn = '/mnt/met-results/features.pkl'
ids_fn = '/mnt/met-results/ids.pkl'

#check that the file paths have been updated
assert n_trees > 0, 'Number of trees must be greater than 0'
assert annoy_fn != '', 'Please provide a filepath to save the Annoy modle. Example: /model/annoyIndex.ann'
assert features_fn != '', 'Please provide the file path to your pre-processed images. Example: /data/preprocessed_images.pkl'
assert ids_fn != '', 'Please provide the file path to your pre-processed images. Example: /data/preprocessed_images.pkl'

# Load Data

In [None]:
features = pickle.load(open(features_fn,'rb'))
ids = pickle.load(open(ids_fn,'rb'))
print(features.shape)
print(len(ids))
assert(len(ids) == features.shape[0])

# Build the Annoy Index

In [None]:
ai = AnnoyIndex(features.shape[1], metric='angular')
for i in range(features.shape[0]):
    v = features[i,:] #get vector to add
    ai.add_item(i,v) #add to index
    
ai.build(n_trees)  
ai.save(annoy_fn)