In [None]:
# Copyright 2020 IITK EE604A Image Processing. All Rights Reserved.
# 
# Licensed under the MIT License. Use and/or modification of this code outside of EE604 must reference:
#
# © IITK EE604A Image Processing 
# https://github.com/ee604/ee604_assignments
#
# Author: Shashi Kant Gupta, Chiranjeev Prachand and Prof K. S. Venkatesh, Department of Electrical Engineering, IIT Kanpur

# Task 4 (Bonus Question): Template Matching and Visual Search

In this bonus task, you will study about template matching algorithm, which is used to search and find a template image in a larger image. You will then use this method and techniques you learned in task1, task2, and task3 to search for a given "target" image in a large cluttered "search" image. This task of finding a target image is usually called a visual search task.

In this assignment, you will be using this method in a preliminary setting. But this method can be easily generalized to complex visual search tasks. Instead of directly using the image pixels, you first extract features of those images using some deep learning methods or other computer vision algorithms and then apply the template matching method on extracted features instead of the image pixels.

**Theories:** 
* Template Matching: [http://www.cse.psu.edu/~rtc12/CSE486/lecture07.pdf](http://www.cse.psu.edu/~rtc12/CSE486/lecture07.pdf)
* Tempalte Matching (Optional): [https://docs.adaptive-vision.com/4.7/studio/machine_vision_guide/TemplateMatching.html](https://docs.adaptive-vision.com/4.7/studio/machine_vision_guide/TemplateMatching.html)
* Template Matching Wikipedia: [https://en.wikipedia.org/wiki/Template_matching](https://en.wikipedia.org/wiki/Template_matching)
* Visual Search (Optional): [http://www.scholarpedia.org/article/Visual_search](http://www.scholarpedia.org/article/Visual_search)

---

### Your Task
You will be given a set of visual search tasks containing 156 various target images to search in 156 corresponding search images. All image contains only English alphabets. You have to write a python function that will return the (x, y) position of the target in the given search image. You have to use the template matching approach to find the location of the target image. But there's a problem, somehow the provided dataset got corrupted, and it contains lots of noises in it. Smartly using some image enhancement methods to make the images clearer and then applying template matching algorithms could help. It's up to you how you use the techniques taught in EE604 lectures to get the best score. Your score will be evaluated as `score = num_correct_prediction/156`

* Assume the target image is of the same size in the search image.
* Assume the target image is in the same orientation in the search image.
* You are free to use any OpenCV module except `cv2.matchTemplate()`.

**Example of Visual Search Task:**
![](https://github.com/ee604/ee604_assignments/raw/master/assignment_2/imgs/Example.png)

---

In [10]:
%%bash
pip install git+https://github.com/ee604/ee604_plugins

Collecting git+https://github.com/ee604/ee604_plugins
  Cloning https://github.com/ee604/ee604_plugins to /tmp/pip-req-build-o2ay80yo
Building wheels for collected packages: ee604-plugins
  Building wheel for ee604-plugins (setup.py): started
  Building wheel for ee604-plugins (setup.py): finished with status 'done'
  Created wheel for ee604-plugins: filename=ee604_plugins-0.2.2-cp36-none-any.whl size=2313 sha256=688aaf6f3be8cd578ad1e7c43f33db66ad3fd584a4e15b11a76fa0d83b46f606
  Stored in directory: /tmp/pip-ephem-wheel-cache-vjm_p9ac/wheels/34/a8/1d/ae3b7d209ecde89b4800a47ec55a61e7503bb9548bbb975806
Successfully built ee604-plugins


  Running command git clone -q https://github.com/ee604/ee604_plugins /tmp/pip-req-build-o2ay80yo


In [11]:
# Importing required libraries

import cv2
import numpy as np
from IPython.display import display
from PIL import Image
import matplotlib.pyplot as plt

from ee604_plugins import download_dataset

download_dataset(assignment_no=2, task_no=4)

Download Complete!


In [12]:
X_img = np.load("data/search_img.npy")
Y_img = np.load("data/target_img.npy")
M_img = np.load("data/mask_img.npy")

sample_target = np.copy(Y_img[0])
sample_search = np.copy(X_img[0])

In [13]:
# Do not change codes inside this cell

def calc_search_score():
    c = 0 
    for k in range(len(X_img)):
        x, y = searchTarget(np.copy(X_img[k]), np.copy(Y_img[k]))
        c += M_img[k][x, y]
    
    return round(c/len(X_img), 2)

In [14]:
def searchTarget(search_img, target_img):
    '''    
    Inputs:                      
    + search_img - image in which you have to search for the target, size = (512, 512) ==> (dim1, dim2)
    + target_img - target image, size = (55, 55)
    
    Ouputs:
    + x - index along the first dimension ('dim1') of the search image where the target is present
    + y - index along the second dimension ('dim2') of the search image where the target is present
    
    Allowed external package:
    + You are free to use any OpenCV/numpy module except 'cv2.matchTemplate()'.
    
    Hint:
    + Make sure you properly map the index with respect to input image size i.e. (512, 512). 
      Generally convolution/ correlation operation will reduce your output size by the size 
      of templated image.
    + Correlation operation can be written in terms of convolution operator.
    + Use OpenCV's cv2.matchTemplate() to know which method works best and implement corresponding
      method using other openCV modules.
    + This algorithm can be implemented using the functions you wrote/ used for other three tasks.
    
    '''
    x, y = 0, 0
    
    #############################
    # Start your code from here #
    #############################

    H, W = search_img.shape
    h, w = target_img.shape
    
    # Median filtering for removing salt pepper noise
    filter_search = cv2.medianBlur(search_img, 5)
    filter_target = cv2.medianBlur(target_img, 3)

    # Normalizing the images
    norm_search = (filter_search - np.mean(filter_search)) / np.std(filter_search)
    norm_target = (filter_target - np.mean(filter_target)) / np.std(filter_target)

    # Applying NCC method
    res = np.zeros((H - h + 1, W - w + 1), dtype=np.float32)

    for i in range(H - h + 1):
      for j in range(W - w + 1):
        res[i,j] = np.sum(norm_search[i:i+h, j:j+w] * norm_target)

    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
    top_left = max_loc

    #bottom_right = (top_left[0] + h, top_left[1] + w)

    #cv2.rectangle(filter_search,top_left, bottom_right, 255, 2)

    #cv2_imshow(filter_target)
    #cv2_imshow(filter_search)
    
    y = top_left[0] + w//2
    x = top_left[1] + h//2
    
    #############################
    # End your code here ########
    ############################# 
    
    return x, y

In [15]:
# Do not change codes inside this cell
score = calc_search_score()
print("Your score:", score)

Your score: 0.94
