### Assignment 3 - Question 3

In [None]:
import cv2
import numpy as np
from matplotlib import pyplot as plt


##### 3.1: Compute imag_histe gradient magnitudes and directions

In [None]:
def compute_img_gradient(gray_img, threshold=-1):
    """Compute image gradient magnitudes and directions"""
    # find gradient on x, y
    Ix = cv2.Sobel(gray_img, cv2.CV_64F, 1, 0, ksize=5)
    Iy = cv2.Sobel(gray_img, cv2.CV_64F, 0, 1, ksize=5)
    Ix[Ix==0] = 1e-6
    # calculate magnitude
    magnitude = np.sqrt(Ix*Ix + Iy*Iy)
    magnitude[magnitude<threshold] = 0
    # calculate direction
    direction = np.arctan(Iy/Ix)
    direction = np.where(direction<0, direction+np.pi, direction)
    return magnitude, direction

##### 3.2: Create cell grid

In [None]:
def create_grid(img, size=8):
    # calculate grid size
    m, n = img.shape
    print(m, n)
    return m//size, n//size

##### 3.3: Accumulated gradient magnitudes & number of occ_histurrences

In [None]:
def hog(mag, dir, size=8, mode=False):
    """Calculate hog features using accumulated gradient magnitudes and number of occurrences"""
    
    h, w = create_grid(dir, size)
    hist = np.zeros((h, w, 6))
    for i in range(h):
        for j in range(w):
            row = i * size + (mag.shape[0]-h*size)//2
            col = j * size + (mag.shape[1]-w*size)//2
            for r in range(row, row+size):
                for c in range(col, col+size):
                    deg = int((dir[r, c]/(np.pi/6))+0.5)%6
                    if mode:
                        hist[i, j, deg] += mag[r, c]
                    else:
                        hist[i, j, deg] += 1
    return hist

def downsample(img, scale_percent=50):
    width = int(img.shape[1] * scale_percent / 100)
    height = int(img.shape[0] * scale_percent / 100)
    return cv2.resize(img, (width, height))

In [None]:
def histogram_to_quiver(img, size):
    if len(img.shape) == 2:
        h, w = img.shape
    else:
        h, w, _ = img.shape
    m = h // size
    n = w // size

    X, Y = np.meshgrid(np.linspace(size/2, (n-1)*size+size/2, n),
                    np.linspace(size/2, (m-1)*size+size/2, m))
    return X, Y

def visualize(img, hog, hog2, size):
    X, Y = histogram_to_quiver(img, size)
    fig, ax = plt.subplots(1, 2, figsize=(15, 15))
    ax[0].set_title('HOG by Accumulated Gradient Magnitudes')
    ax[0].imshow(img, cmap='gray')
    for i in range(6):
        ax[0].quiver(X, Y,
                  np.sin(i*np.pi/6) * hog[:, :, i]*0.001,
                  np.cos(i*np.pi/6) * hog[:, :, i]*0.001,
                  color='red',
                  linewidth=0.5,
                  headlength=0,
                  headwidth=1,
                  headaxislength=0,
                  pivot='middle')
        
    ax[1].set_title('HOG by Magnitude Occurrences')
    ax[1].imshow(img, cmap='gray')
    for i in range(6):
        ax[1].quiver(X, Y,
                  np.sin(i*np.pi/6) * hog2[:, :, i]*0.001,
                  np.cos(i*np.pi/6) * hog2[:, :, i]*0.001,
                  color='red',
                  linewidth=0.5,
                  headlength=0,
                  headwidth=1,
                  headaxislength=0,
                  pivot='middle')
    plt.show()


In [None]:
def show_hog_result(img_path, threshold=0.01, size=8):
    img = cv2.imread(img_path) 
    img = downsample(img)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    magnitude, direction = compute_img_gradient(gray, threshold)
    mag_hist = hog(magnitude, direction, size, mode=True)
    occ_hist = hog(magnitude, direction, size, mode=False)
    visualize(gray, mag_hist, occ_hist, size)

In [None]:
show_hog_result("./einstein.png", 20, 8)

In [None]:
show_hog_result("./image1.png", 20, 8)

In [None]:
show_hog_result("./image2.png", 20, 8)

##### Comment: 
Comparing the three pairs of images, accumulated magnitude method tends to outperform number occurrences method in object detection accuracy.

##### 3.4: Normalization

Use accumulated gradient magnitudes for the remaining tasks.

In [None]:
def build_descriptor(hog):
    """calculate the descriptor. Return (M-1)x(N-1)x24 array."""
    h, w, _ = hog.shape
    result = np.zeros((h-1, w-1, 24))
    for i in range(h-1):
        for j in range(w-1):
            result[i, j, :] = np.concatenate([hog[i, j, :], hog[i+1, j, :], hog[i, j+1, :], hog[i+1, j+1]])
            # normalize
            result[i, j, :] = result[i, j, :] / np.sqrt(np.sum(result[i, j, :] * result[i, j, :])+0.001)
    return result

def store_descriptor(img_path, txt_name):
    # import image and convert to grayscale
    img = cv2.imread(img_path) 
    img = downsample(img)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # compute image gradient magnitude and directions; compute histograms
    magnitude, direction = compute_img_gradient(gray)
    mag_hist= hog(magnitude, direction, mode=True)
    descriptor = build_descriptor(mag_hist)
    # save to txt file
    np.savetxt(txt_name, descriptor.reshape((1, -1)), delimiter=',')

store_descriptor("./image2.png", 'image2.txt')
store_descriptor("./image1.png", 'image1.txt')

##### Flash vs Non-Flash

##### Set up images and calculate hog magnitudes

In [None]:
flash_path = "./flash.png"
non_flash_path = "./non_flash.png"


def load_image_hog(img_path, txt_name):
    # load image, downsample it, and convert to grayscale
    img = cv2.imread(img_path) 
    img = downsample(img, 30)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # compute image gradient magnitude and directions; compute histograms
    magnitude, direction = compute_img_gradient(gray, 20)
    mag_hist= hog(magnitude, direction, mode=True)
    # txt_name = img_path[:-4]+'.txt'
    descriptor = build_descriptor(mag_hist)
    np.savetxt(txt_name, descriptor.reshape((1, -1)), delimiter=',')
    # store_descriptor(img_path, txt_name)
    return gray, mag_hist, descriptor

flash, flash_hog, flash_norm = load_image_hog(flash_path, 'flash.txt')
non_flash, non_flash_hog, non_flash_norm = load_image_hog(non_flash_path, 'non_flash.txt')

# plot original grayscale pictures
fig, ax = plt.subplots(1, 2, figsize=(15, 13))

ax[0].set_title('Flash Magnitudes')
ax[0].imshow(flash, cmap='gray')

ax[1].set_title('Normalized Flash Magnitudes')
ax[1].imshow(non_flash, cmap='gray')


In [None]:
def histogram_to_quiver2(img, size, norm=False):
    if len(img.shape) == 2:
        h, w = img.shape
    else:
        h, w, _ = img.shape
    m = h // size
    n = w // size
    if norm:
        # print(np.linspace(size/2, (n-1)*size-size/2, n-1))
        # print(np.linspace(size/2, (m-1)*size-size/2, m-1))
        X, Y = np.meshgrid(np.linspace(size/2, (n-1)*size-size/2, n-1),
                        np.linspace(size/2, (m-1)*size-size/2, m-1)
                        )
    else:
        X, Y = np.meshgrid(np.linspace(size/2, (n-1)*size+size/2, n),
                        np.linspace(size/2, (m-1)*size+size/2, m)
                        )
    return X, Y
    
# X, Y = histogram_to_quiver2(flash, 8, norm=True)

def visualize2(img, hog, norm, size):
    X, Y = histogram_to_quiver2(img, size, norm=False)
    fig, ax = plt.subplots(1, 2, figsize=(15, 15))
    ax[0].imshow(img, cmap='gray')
    for i in range(6):
        ax[0].quiver(X, Y,
                  np.sin(i*np.pi/6) * hog[:, :, i]*0.001,
                  np.cos(i*np.pi/6) * hog[:, :, i]*0.001,
                  color='red',
                  linewidth=0.5,
                  headlength=0,
                  headwidth=1,
                  headaxislength=0,
                  pivot='middle')
    ax[1].imshow(img, cmap='gray')

    X, Y = histogram_to_quiver2(img, size, norm=True)
    for i in range(6):    
        ax[1].quiver(X, Y,
                np.sin(i*np.pi/6) * norm[:, :, i] * 0.001,
                np.cos(i*np.pi/6) * norm[:, :, i]  *0.001,
                color='red',
                linewidth=0.5,
                headlength=0,
                headwidth=1,
                headaxislength=0,
                pivot='middle')
    plt.show()


##### (i)-(ii) Visualize original HOG arrays V.S. Normalized HOG

In [None]:
visualize2(flash, flash_hog, flash_norm, 8)

In [None]:
visualize2(non_flash, non_flash_hog, non_flash_norm, 8)

##### (iii) Compare results: why or why not the normalization of HOG is beneficial

##### Visualization: 
- To compare the results with and without the normalization, I visualized two pairs of comparing pictures for both flash and non-flash pictures. 
- After normalization, there are (m-1)*(n-1) cells of 2x2 blocks; each contains 24 entries, corresponding to the 4 cells it contains. 
- For each 2x2 block's entries, the first 6 entries are the normalized magnitudes of the original 8*8 cells on the top-left among the four.
- Therefore, we use these first 6 normalized entries to visualize each cell. The last horizontal and vertical cells are ignored since there are only (m-1)*(n-1) 2x2 blocks.

##### Comments:
- Overall, visual comparisons suggest normalization of HOG in this case is not very beneficial.
- After normalization, the resultant magnitudes are more invariant in illumination and shadowing changes.
The block normalization process has a trade-off:
    - On the one hand, normalization reduces the effects of local variation in the texture of same objects. (e.g. after normalization, detection accuracy for the chopsticks on the bottom-right corner has improved).  
    - On the other hand, normalization can lead to loss of finer details for large magnitudes and added noise. (e.g. plain areas like bowl are harder to detect due to introduced noise).
- Compare normalization results on flash and non-flash images, flash image seems to have slightly better performance and less noise due to its higher contrast.