<a href="https://colab.research.google.com/github/seraogianluca/mircv-exercises/blob/main/Exercise3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#MIRCV 2021
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
#To support GPU
!pip install opencv-python==4.4.0.46
!pip install whoosh



In [3]:
import cv2
import numpy as np
import os
import operator

from whoosh import index
from whoosh.fields import *
from whoosh.analysis import *
from whoosh import qparser

# for showing images in the cell outputs (Jupyter Notebooks / Google Colab)
from IPython.display import display
from ipywidgets import Image

from tqdm.notebook import tqdm

np.random.seed(42)  # makes the random pivot choice reproducible

BASE_DIR = '/content/gdrive/My Drive/mircv2021'
DEEP_PROTO = BASE_DIR + '/data/caffe/train_val.prototxt'
DEEP_MODEL = BASE_DIR + '/data/caffe/bvlc_reference_caffenet.caffemodel'
SRC_FOLDER = BASE_DIR + '/data/coco_img'
OUT_FOLDER = BASE_DIR + '/out'
WHOOSH_FOLDER = OUT_FOLDER + '/whoosh'
PIVOTS_FILE = OUT_FOLDER + '/pivots.txt.gz'
PIVOTS_ID_FILE = OUT_FOLDER + '/pivot_ids.txt'

IMAGE_ID_FILE = OUT_FOLDER + '/image_ids.txt'
FEATURES_FILE = OUT_FOLDER + '/extracted_features.txt.gz'

if not os.path.exists(OUT_FOLDER):
    os.mkdir(OUT_FOLDER)
if not os.path.exists(WHOOSH_FOLDER):
    os.mkdir(WHOOSH_FOLDER)

DEEP_LAYER = 'relu7'
SIZE = (227, 227)
MEAN_VALUES = (104, 117, 123)  # BGR

NUM_PIVOTS = 100
INDEX_FIELD = 'deep'
KX = 10
KQ = 10

K = 8

In [4]:
class DNNExtractor:    
    
    def __init__(self, net_proto_path, trained_model_path, size, mean_values=None):
        self.size = size
        self.mean_values = mean_values

        self.net = cv2.dnn.readNetFromCaffe(net_proto_path, trained_model_path)
        # to enable GPU (this won't work on Colab without recompiling opencv)
        self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
        self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
        
    
    def extract(self, img_file, layer, normalize=False):
        img = cv2.imread(img_file)
        blob = cv2.dnn.blobFromImage(img, 1.0, self.size, self.mean_values, swapRB=False, crop=False)
        self.net.setInput(blob)
        prob = self.net.forward(layer).flatten()

        if normalize:
            prob /= np.linalg.norm(prob)

        return prob
    
# it creates an instance of the DNNExtractor class
dnn = DNNExtractor(DEEP_PROTO, DEEP_MODEL, SIZE, MEAN_VALUES)

In [5]:
def extract_features(img_folder):
    # get the list of image filenames
    filenames = [f for f in os.listdir(img_folder)]

    descriptors = [dnn.extract(os.path.join(SRC_FOLDER, f),
                               DEEP_LAYER, normalize=True) for f in tqdm(filenames)]

    descriptors = np.array(descriptors)
    filenames = np.array(filenames)
    
    return descriptors, filenames

# compute and cache features
if not os.path.exists(FEATURES_FILE):
    descriptors, ids = extract_features(SRC_FOLDER)
    np.savetxt(IMAGE_ID_FILE, ids, fmt='%s')
    np.savetxt(FEATURES_FILE, descriptors)

ids = np.loadtxt(IMAGE_ID_FILE, dtype='str')
descriptors = np.loadtxt(FEATURES_FILE)

In [6]:
#TODO: STEP 0

#make pivots
# get NUM_PIVOTS integers between 0 and len(descriptors)-1 without repetitions
# use the pivot array indices as pivot ids
if not os.path.exists(PIVOTS_FILE):
  sample = np.random.choice(len(descriptors)-1, size=NUM_PIVOTS, replace=False)
  pivots = [descriptors[sample[i]] for i in range(len(sample))]
  pivot_ids = np.arange(NUM_PIVOTS)

  np.savetxt(PIVOTS_FILE, pivots)
  np.savetxt(PIVOTS_ID_FILE, pivot_ids)


In [7]:
#TODO: STEP 1

pivots = np.loadtxt(PIVOTS_FILE)
pivot_ids = np.loadtxt(PIVOTS_ID_FILE)


def search(queryF, dataset, ids, k):
    cos_similarities = np.dot(dataset, queryF)
    result = zip(cos_similarities, ids)
    res = sorted(result, reverse=True)[:k]
    return res

#use pivots to transform features to Surrogare Text Representation (STR)
def features_2_text(img_f, top_k):
  #generate and return the STR of the img_f feature.
  #use the search function to retrieve the top_k nearest neighbor pivots
  res = search(img_f,pivots,pivot_ids,top_k)
  img_str = ""
  for _,id in res:
    img_str = img_str + str(int(id))*top_k + " "
    top_k -= 1
  return img_str

In [8]:
#TODO: STEP 2

#Whoosh indexing
#Initialize the Whoosh index (see Exercise1)
#call features_2_text to transform the image features to STR
#index the STR of all images into the Whoosh index

schema = Schema(desc=TEXT(analyzer=KeywordAnalyzer(), vector=True, stored=True),
                id=ID(stored=True))
ix = index.create_in(WHOOSH_FOLDER, schema)
print('creating index...')

writer = ix.writer()

for descriptor,id in zip(descriptors,ids):
  img = features_2_text(descriptor, KX)
  writer.add_document(desc=img, id=id)

writer.commit()
print('indexing done ')


creating index...
indexing done 


In [9]:
def display_image(filename, score=0):
    """ Displays an image and its corresponding score (optional)
        in Jupyter Notebook / Google Colab
    """
    filepath = os.path.join(SRC_FOLDER , filename)
    image = Image.from_file(filepath, width=300, height=400)
    print('{} - {:.3f}'.format(filename, score))  # :.3f = 3-decimal float
    display(image)


def display_results(results):
    """ Displays a set of results. Takes a list of (score, id) couples """
    for score, filename in results:
        display_image(filename, score)

In [10]:
#TODO: STEP 3

#Whoosh searching
#Extract the features of img_query.
#Transform the features to STR
#Perform a Whoosh search and call display_results to show the search results
#initialize whoosh searcher

img_query = SRC_FOLDER + "/000000321557.jpg"
img = dnn.extract(img_query,DEEP_LAYER, normalize=True)
query_str = features_2_text(img,KQ)

searcher = ix.searcher()
parser = qparser.QueryParser('desc',ix.schema, group=qparser.OrGroup)
txt_query = parser.parse(query_str)
res = searcher.search(txt_query)

results = []
for hit in res:
  results.append((hit.score,hit['id']))
display_results(results)


000000321557.jpg - 57.163


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000038829.jpg - 16.594


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000297147.jpg - 12.483


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000149406.jpg - 12.483


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000441586.jpg - 11.864


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xe2\x0cXICC_PROFILE\x00\x01\x…

000000408830.jpg - 11.688


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000293245.jpg - 11.688


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xe2\x0cXICC_PROFILE\x00\x01\x…

000000270066.jpg - 11.498


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000018737.jpg - 11.236


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xed\x00\x84Photoshop 3.0\x008…

000000343934.jpg - 11.129


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00\xb4\x00\xb4\x00\x00\xff\xe2\x0cXICC_PROFILE\x00…

In [30]:
# Optional reordering task

#get the IDs of images found by whoosh in hit
#transforms IDs into index positions in the results_descriptors
#to get the corresponding image features
#reorder the features using the search and
#show the result
img_ids = [id for _,id in results]
reordered = []
features = []
for _,id in results:
  features.append(descriptors[np.where(ids == id)][0])
reordered = search(img, features, img_ids,KQ)
print(reordered)
display_results(reordered)

[(1.0000001218082937, '000000321557.jpg'), (0.5208934502487856, '000000297147.jpg'), (0.5062005759392438, '000000408830.jpg'), (0.4567090213145806, '000000293245.jpg'), (0.3991721766381607, '000000270066.jpg'), (0.3378918928825403, '000000441586.jpg'), (0.2910694436621779, '000000343934.jpg'), (0.241505143664762, '000000149406.jpg'), (0.2243554475993307, '000000018737.jpg'), (0.2116141455503468, '000000038829.jpg')]
000000321557.jpg - 1.000


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000297147.jpg - 0.521


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000408830.jpg - 0.506


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000293245.jpg - 0.457


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xe2\x0cXICC_PROFILE\x00\x01\x…

000000270066.jpg - 0.399


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000441586.jpg - 0.338


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xe2\x0cXICC_PROFILE\x00\x01\x…

000000343934.jpg - 0.291


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00\xb4\x00\xb4\x00\x00\xff\xe2\x0cXICC_PROFILE\x00…

000000149406.jpg - 0.242


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…

000000018737.jpg - 0.224


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xed\x00\x84Photoshop 3.0\x008…

000000038829.jpg - 0.212


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\…