# HASHING: QUERYING IN FACE DATASETS
Hashing is most commonly used to implement hash tables and for data encryption. A hash table stores key/value pairs in the form of a list, while the hash function maps the elements of the dataset via a key and generates hash values. A real-world recognition system has to cope with several unseen individuals and determine whether a given face image is registered or not, and thus, certain elements in databases can be found much faster-using hashing functions and classification methods.

__Question:__

Implement a basic hashing model from scratch that hashes the images. You can use any dataset of few images and can implement a-hash or any other hashing algorithm of your choice. For a-hash, given any images, first resize the image to a suitable size, followed by grayscale conversion of the image. Then mean normalize the image to obtain a binary image, whose sum can be used as a hash value. Using the hash model, encode all the images present inside your directory and then search for images similar to the query image.

# Sanatkumar Ippalpalli Eckovation 15 Aug 20 Batch -4

In [1]:
# Import sklearn daatasets, model selection, neural network, numpy, matplotlib
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import warnings
import cv2
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)
import matplotlib.image as mpimg
import os

In [2]:
# Reading multiple images from a folder and storing it in a list.
folder = "./Documents/FaceDataset/train/madonna"
images = []
for file in os.listdir(folder):
    img = mpimg.imread(os.path.join(folder, file))
    if img is not None:
        images.append(img)
print(images)

[array([[[135, 137, 134],
        [138, 140, 137],
        [138, 138, 136],
        ...,
        [240, 240, 240],
        [240, 240, 240],
        [240, 240, 240]],

       [[141, 143, 140],
        [127, 129, 126],
        [145, 145, 143],
        ...,
        [240, 240, 240],
        [240, 240, 240],
        [240, 240, 240]],

       [[138, 138, 136],
        [134, 134, 132],
        [146, 146, 144],
        ...,
        [240, 240, 240],
        [240, 240, 240],
        [240, 240, 240]],

       ...,

       [[223, 185, 149],
        [228, 190, 154],
        [233, 197, 163],
        ...,
        [240, 240, 240],
        [240, 240, 240],
        [240, 240, 240]],

       [[223, 185, 149],
        [229, 191, 155],
        [234, 198, 164],
        ...,
        [240, 240, 240],
        [240, 240, 240],
        [240, 240, 240]],

       [[221, 183, 147],
        [228, 190, 154],
        [235, 196, 163],
        ...,
        [240, 240, 240],
        [240, 240, 240],
        [240, 240, 240]

# 



In [3]:
# Vectorizing the images and storing it in a list
image_vector = []
for image in images:
    row,col = image.shape
    img_vec = image.reshape(row*col)
    img_vec_norm = img_vec / np.linalg.norm(img_vec)  # Converting the image vector to a unit vector
    image_vector.append(img_vec_norm)
print(img_vec.shape)
print(len(image_vector))

ValueError: too many values to unpack (expected 2)

In [4]:
def genRandomHashVectors(m, length):  # Generate random unit vectors for Hashing
    hash_vector = []
    for i in range(m):
        v = np.random.uniform(-1,1,length)
        vcap = v / np.linalg.norm(v)
        hash_vector.append(vcap)
    return hash_vector

In [5]:
def ahash(hash_vector ,data): 
    hash_code = []
    for i in range(len(hash_vector)):
        if np.dot(data,hash_vector[i]) > 0:
            hash_code.append('1')
        else:
            hash_code.append('0')
    return ''.join(hash_code)

In [6]:
hash_vector = genRandomHashVectors(20,len(image_vector[0]))
print(ahash(hash_vector,image_vector[0]))

IndexError: list index out of range

In [7]:
# Creating a Image Dictionary using the hash as the keys
img_dict = {}
for i in range(len(image_vector)):
    hash_code = ahash(hash_vector,image_vector[i])
    if hash_code not in img_dict.keys():
        img_dict[hash_code] = [i]
    else:
        img_dict[hash_code].append(i)

In [8]:
keys = list(img_dict.keys())
values = list(img_dict.values())


In [9]:
print(img_dict)

{}


In [10]:
# Plotting images with same hash code
def plotImages(images, img_indices):
    imgs = [images[i] for i in range(len(images)) if i in img_indices]
    fig = plt.figure()
    cols = 2
    n_images = len(imgs)
    for n,image in zip(range(n_images),imgs):
        ax = fig.add_subplot(cols, np.ceil(n_images/float(cols)), n + 1)
        plt.gray()
        plt.imshow(image)
    fig.set_size_inches(np.array(fig.get_size_inches()) * n_images)
    plt.show()

In [11]:
plotImages(images, values[1])

IndexError: list index out of range

In [12]:
pip install utils

Note: you may need to restart the kernel to use updated packages.


In [14]:
from utils import *
import logging
import cv2


def detect_face(folder, cc_path='./Documents/FaceDataset/haarcascade_frontalface_default.xml'):
    """
    Detect the face from the image, return colored face
    """

    cc = cv2.CascadeClassifier(os.path.abspath(cc_path))
    folder = os.path.abspath(folder)
    img = cv2.imread(folder)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    faces = cc.detectMultiScale(gray, 1.3, 5)
    roi_color = None

    if len(faces) == 0:
        logging.exception(folder + ': No face found')
    else:
        x,y,w,h = faces[0]
        _h, _w = compute_size(h, w)
        roi_color = img[y - _h:y + h + _h, x - _w:x + w + _w]

    return roi_color


def generate_faces(src_path, dst_path):
    """
    Generate faces from source directory and store cropped faces in destination directory
    """
    for root, dirs, files in os.walk(src_path):
        for name in dirs:
            dir_name = os.path.join(root, name)
            images = os.listdir(dir_name)
            images = [image for image in images if image.endswith('jpg')]

            _path = os.path.join(dst_path, name)

            if not os.path.exists(_path):
                os.makedirs(_path)

            for image in images:
                face = detect_face(os.path.join(dir_name, image))
                if face is None: continue
                cv2.imwrite(os.path.join(_path, image), face)