In [1]:
# mount gdrive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# import libraries
import os
import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pickle

import keras
import keras.backend as K
from keras.datasets import cifar100
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, BatchNormalization, Flatten, GlobalMaxPooling2D, GlobalAveragePooling2D, UpSampling2D
from keras.optimizers import Adam, SGD
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
import time
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tqdm import tqdm
from scipy.spatial.distance import cosine
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

In [13]:
# load files
!cp "/content/drive/MyDrive/Colab Notebooks/Information Retrieval CSE508/Project/test_images.zip" '/content'
!unzip 'test_images.zip' &> /dev/null
!rm 'test_images.zip'

train_labels = np.loadtxt('/content/drive/MyDrive/Colab Notebooks/Information Retrieval CSE508/Project/train_labels.csv',delimiter=',')
test_labels =  np.loadtxt('/content/drive/MyDrive/Colab Notebooks/Information Retrieval CSE508/Project/test_labels.csv',delimiter=',')

with open('/content/drive/MyDrive/Colab Notebooks/Information Retrieval CSE508/Project/clustering_index.pkl', 'rb') as file:
  index=pickle.load(file)

# load test embeddings
test_embeddings = np.loadtxt('/content/drive/MyDrive/Colab Notebooks/Information Retrieval CSE508/Project/test_embeddings.csv',delimiter=',')

In [14]:
# helper functions     
def retrieveResults(query_embedding):
  '''
    returns image ids of closest images from the inverted index

  '''
  distances = []
  for i in range(len(index)):
    distances.append(cosine(query_embedding.reshape(-1),index[i][0].reshape(-1)))
  # # find the closest term
  closest_term = np.argmin(distances)
  
  # get the posting list for the closest term and compute the distances between the query embedding and the embeddings in the posting list
  posting_list = index[closest_term][1]
  posting_distances = []
  for posting in posting_list:
    dist = np.linalg.norm(posting[1] - query_embedding)
    posting_distances.append((posting[0], dist))

  # # sort the posting list in ascending order of the distances to the query embedding
  posting_distances.sort(key=lambda x: x[1])

  closest_image_ids = []
  for entry in posting_distances:
      id, dist = entry
      closest_image_ids.append(id)
  return closest_image_ids


def calculateAveragePrecision(query_image_label,retrieved_image_ids):
  '''
    calculates the average precsion @K for a the retrieved images   for a given query image from the test set

  '''
  pred_labels = []
  for idx in retrieved_image_ids:
    pred_labels.append(train_labels[int(idx)-1])
  pred_labels = np.array(pred_labels)
  pred = pred_labels == query_image_label
  
  precision_at_k = np.zeros(shape=(pred.shape))
  for i in range(precision_at_k.shape[0]):
    precision_at_k[i] = np.sum(pred[:i+1])/(i+1)
  
  avg_precision = precision_at_k[pred]
  if avg_precision.shape[0]>0:
    avg_precision = np.sum(avg_precision)/avg_precision.shape[0]
  else : avg_precision = 0
  return avg_precision

def calculateMeanAveragePrecision():
  '''
    calculates the mean average precision for all the query images in the test set

  '''
  map = []
  for i in tqdm(range(test_embeddings.shape[0])):
    img_id = i+1
    img_label = test_labels[img_id-1]
    test_embedding = test_embeddings[i][1:]
    res=  retrieveResults(test_embedding)
    map.append(calculateAveragePrecision(img_label,res))
  return np.mean(map)

In [18]:
# calculate MAP on the test dataset
map = calculateMeanAveragePrecision()
print('\nMean Average Precision for the test query dataset is : {:.4f}'.format(map))

100%|██████████| 9984/9984 [02:40<00:00, 62.31it/s]


Mean Average Precision for the test query dataset is : 0.9209



