In [None]:
# 5D01 - Image Similarity Challenge : Matching Track
# Chidambar Hunakunti - 01fe19bcs219
# Aditya Vikram - 01fe19bcs220
# Rishab Jain - 01fe19bcs228
# Harshita Hiremath - 01fe19bcs235

In [None]:
#Import neccessary libraries
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras import models, Model
from annoy import AnnoyIndex
from scipy import spatial
from tensorflow.keras.models import load_model

In [None]:
ref_id = []  #store reference images names
score = []  #store confidence
reference_id = []  #store reference matching query names
query_id = []  #store query image names

In [None]:
def store_query(path):  #Stores query images
    for t in os.listdir(path):
        file_name = os.path.basename(t)
        file_name_without_extension = os.path.splitext(file_name)[0]
        query_id.append(file_name_without_extension)


def load_images_from_folder(folder):  #loads images from folder
    images = []
    for filename in os.listdir(folder):
        img = load_img(os.path.join(folder, filename), target_size=(600, 600))
        ref_id.append(os.path.basename(filename))
        img = img_to_array(img)
        img = img.reshape((1,) + img.shape)
        if img is not None:
            images.append(img)
    return images


def get_all_images():  #gets reference images
    images1 = load_images_from_folder('reference')
    all_imgs_arr = np.array([images1])
    return all_imgs_arr


def create_model():  #creates EfficientB7 model
    effb7 = EfficientNetB7(include_top=True)
    model2 = Model(effb7.input, effb7.layers[-2].output)
    model2.save('effb7.h5')
    return model2


def get_preds(all_imgs_arr):  #gets image prediction
    preds_all = np.zeros((len(all_imgs_arr), 2560))
    for j in range(all_imgs_arr.shape[0]):
        preds_all[j] = model.predict(all_imgs_arr[j])

    return preds_all


def load_model_from_path(filepath):  #load model from path
    model = load_model(filepath)
    return model


def load_images_preds(numpy_filepath):  #load images prediction file
    data = np.load(numpy_filepath)
    img = data['images']
    preds = data['preds']
    return img, preds


def show_img(array):  #display images
    array = array.reshape(600, 600, 3)
    numpy_image = img_to_array(array)
    plt.imshow(np.uint8(numpy_image))
    plt.show()


def load_images_from_file(filepath):  #load images from file
    img = load_img(filepath, target_size=(600, 600))
    img = img_to_array(img)
    img = img.reshape((1,) + img.shape)
    return img

In [None]:
def get_nearest_neighbor_and_similarity(preds1, K):  #finds k nearest neighbors
    dims = 2560
    n_nearest_neighbors = K + 1
    trees = 10000
    file_index_to_file_vector = {}

    # build ann index
    t = AnnoyIndex(dims)
    for i in range(preds1.shape[0]):
        file_vector = preds1[i]
        file_index_to_file_vector[i] = file_vector
        t.add_item(i, file_vector)
    t.build(trees)

    for i in range(preds1.shape[0]):
        master_vector = file_index_to_file_vector[i]
        similarities = []
        nearest_neighbors = t.get_nns_by_item(i, n_nearest_neighbors)
    for j in nearest_neighbors:
        neighbor_vector = preds1[j]
        similarity = 1 - spatial.distance.cosine(master_vector, neighbor_vector)
        rounded_similarity = int((similarity * 10000)) / 10000.0
        similarities.append(rounded_similarity)
    return similarities, nearest_neighbors

In [None]:
def get_similar_images(similarities, nearest_neighbors, images1):  #get similar images from nearest neighbors
    j = 0
    for i in nearest_neighbors:
        show_img(images1[i])
        print(similarities[j])
        j += 1

In [None]:
def main(new_image_file, model_file, image_pred_file, K):
    model = load_model_from_path(model_file)
    images, preds = load_images_preds(image_pred_file)
    new_image = load_images_from_file(new_image_file)
    new_image_predict = model.predict(new_image)
    images_arr = np.append(images, new_image.reshape(1, 1, 600, 600, 3), axis=0)
    preds_arr = np.append(preds, new_image_predict, axis=0)
    similarities, nearest_neighbors = get_nearest_neighbor_and_similarity(preds_arr, K)
    if similarities[1] != 1:
        score.append(similarities[1])
    print(nearest_neighbors)
    reference_id.append(ref_id[nearest_neighbors[1]])
    get_similar_images(similarities, nearest_neighbors, images_arr)

In [None]:
def get_metrics(query_id, reference_id, score):  #store into result.xlsx
    df = pd.DataFrame()
    df['query_id'] = pd.Series(query_id)
    df['reference_id'] = pd.Series(reference_id)
    df['score'] = pd.Series(score)
    df.to_csv('results.xlsx', index=False)

In [None]:
all_imgs_arr = get_all_images()
all_imgs_arr = all_imgs_arr.reshape(all_imgs_arr.shape[1], 1, 600, 600, 3)
np.save('all_images_effb7', all_imgs_arr)
model = create_model()
preds_all = get_preds(all_imgs_arr)
np.savez('images_preds_effb7', images=all_imgs_arr, preds=preds_all)

In [None]:
model_file_path = r'effb7.h5'
image_pred_path = r'images_preds_effb7.npz'
query_path = r'Query'
store_query(query_path)

for file in os.listdir(query_path):
    t_path = os.path.join(query_path, file)
    main(t_path, model_file_path, image_pred_path, 1)

get_metrics(query_id, reference_id, score)