In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Model
import faiss
import pickle
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.cluster import normalized_mutual_info_score

In [None]:
tf.keras.backend.clear_session()
    
#load the saved test model
test_model = tf.keras.models.load_model('test_model_saved', compile=False)

In [2]:
def final_fun_1(X):
    '''
    Function takes single data point input and returns the prediction
    '''
    
    #read image file at the path
    image = tf.io.read_file(X)
    
    image = tf.image.decode_jpeg(image, channels=3)
    
    #resize image
    image = tf.image.resize(image, [64,64])
    
    #convert to float32
    image = tf.cast(image, tf.float32)
    
    # normalize image to [0,1] range
    image /= 255.0
    
    
    # get embeddings for the test image
    test_embeds = test_model(np.expand_dims(image,0))
    
    #normalize the image pixels
    norm = np.linalg.norm(test_embeds, axis=1, keepdims=True)
    test_embeds = test_embeds / norm
    
    #convert from tensor to array
    test_embeds = test_embeds.numpy()
    
    
    #load the saved kmeans trained model index
    kmeans_index = faiss.read_index("kmeans_trained.index")
    
    #search for nearest distance and indices of the test embeddings to the centroids
    distances, indices = kmeans_index.search(test_embeds,5)
    
    # load the saved label encoder
    with open('le.pkl', "rb") as input_file:
        le = pickle.load(input_file)
    
    # get the actual outputs using the inverse transform of the saved label encoder
    actual_output_labels = le.inverse_transform(indices[0])
    
    return actual_output_labels

In [3]:
def final_fun_2(X,y):
    '''
    Function takes single data point input and its correct result value
    and returns the errors
    '''
    
    #call the function 1 to get the predicted labels
    pred_labels = final_fun_1(X)
    
    #squeeze the labels array
    pred_labels = pred_labels.squeeze()
    
    #get the correlation score between the predicted and true labels
    nmi = normalized_mutual_info_score(y, np.array([pred_labels[0]]))
    
    # load the error file
    with open("kmeans_error.pkl", "rb") as f:
        total_square_error = pickle.load(f)
    
    
    return total_square_error, nmi

In [4]:
#the input data point
X = 'hotel-id-2021-fgvc8/train_images/85/8000d59f6a3d765f.jpg'

# the true target value
y = np.array([24607])

In [5]:
# function call to get predicted values for an input datapoint
final_fun_1(X)

array([42350, 59929, 11836, 35324, 39316])

In [6]:
# function call to get error metrics for an input datapoint and true value
final_fun_2(X,y)

(912.0593872070312, 1.0)