# Airplane, Leopard, or Dolphin?

### Using the Visual Bag of Word Technique to determine the subject of a photo

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
import cv2
import os
import random
from scipy.spatial.distance import cdist
from scipy import stats
from sklearn import svm

# Reads all images in given directory
def read_images(image_dir, N=None, resize_val=None):
    img_names = os.listdir(image_dir)
    M = len(img_names)
    imgs = list()
    if (N is not None):
        img_names = img_names[:N]
    
    for i, img_name in enumerate(img_names):
        img_file = os.path.join(image_dir, img_name)
        img = cv2.imread(img_file)
        if img is not None:
            img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            if (resize_val is not None):
                img_gray = cv2.resize(img_gray, (0, 0), fx=0.5, fy=0.5)
            imgs.append(img_gray)
        
    return imgs

airplanes = read_images('..\\OIDv4_ToolKit\\OID\\Dataset\\train\\Airplane', resize_val=0.5, N=600)
dolphins = read_images('..\\OIDv4_ToolKit\\OID\\Dataset\\train\\Dolphin', resize_val=0.5, N=600)
leopards = read_images('..\\OIDv4_ToolKit\\OID\\Dataset\\train\\Leopard', resize_val=0.5, N=600)

In [None]:
def plot_img_and_keypoints(img):
    fig, ((ax1, ax2)) = plt.subplots(1, 2, figsize=(14, 10))
    ax1.imshow(img, cmap='gray')
    sift = cv2.SIFT_create()
    kp, des = sift.detectAndCompute(img, None)
    img_kp = cv2.drawKeypoints(img, kp, img, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
    ax2.imshow(img_kp)

In [None]:
plot_img_and_keypoints(airplanes[4])

In [None]:
plot_img_and_keypoints(dolphins[0])

In [None]:
plot_img_and_keypoints(leopards[1])

In [None]:
sift = cv2.SIFT_create()

def get_set_descriptors(image_set, sift, min_kps=70):
    image_set_filtered = []
    keypoints = []
    descips = []
    for img in image_set:
        kp, des = sift.detectAndCompute(img,None)
        if (len(kp) > min_kps):
            image_set_filtered.append(img)
            descips.append(des)
            keypoints.append(kp)
    
    return image_set_filtered, keypoints, descips
        
airplanes_imgs, airplanes_kp, airplanes_des = get_set_descriptors(airplanes, sift)
dolphins_imgs, dolphins_kp, dolphins_des = get_set_descriptors(dolphins, sift)
leopards_imgs, leopards_kp, leopards_des = get_set_descriptors(leopards, sift)

In [None]:
print(len(airplanes_imgs), len(dolphins_imgs), len(leopards_imgs))
print(min(map(len, airplanes_kp)), min(map(len, dolphins_kp)), min(map(len, leopards_kp)))

In [None]:
def argsort(seq):
    # http://stackoverflow.com/questions/3071415/efficient-method-to-calculate-the-rank-vector-of-a-list-in-python
    return sorted(range(len(seq)), key=seq.__getitem__)[::-1]

def get_top_kp_desc(keypoints, descriptors, n):
    top_kp = []
    top_desc = []
    for keypoint, descriptor in zip(keypoints, descriptors):
        kp_sorted_idx = argsort([kp.size for kp in keypoint])
        top_kp_idx = kp_sorted_idx[:n]
        top_kp.append([keypoint[i] for i in top_kp_idx])
        top_desc.append(descriptor[top_kp_idx,:])
    return top_kp, top_desc

min_kp = 65
kpa, desca = get_top_kp_desc(airplanes_kp, airplanes_des, min_kp)
kpd, descd = get_top_kp_desc(dolphins_kp, dolphins_des, min_kp)
kpl, descl = get_top_kp_desc(leopards_kp, leopards_des, min_kp)

In [None]:
imga=cv2.drawKeypoints(airplanes[4],kpa[4],airplanes[4],flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
imgd=cv2.drawKeypoints(dolphins[0], kpd[0],dolphins[0],flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
imgl=cv2.drawKeypoints(leopards[1], kpl[1],leopards[1],flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
plt.imshow(imga)
plt.show()
plt.imshow(imgd)
plt.show()
plt.imshow(imgl)

In [None]:
train_size = 400
desca_train, desca_test, kpa_train, kpa_test, imga_train, imga_test = train_test_split(desca, kpa, airplanes_imgs, train_size=train_size)
descd_train, descd_test, kpd_train, kpd_test, imgd_train, imgd_test = train_test_split(descd, kpd, dolphins_imgs, train_size=train_size)
descl_train, descl_test, kpl_train, kpl_test, imgl_train, imgl_test  = train_test_split(descl, kpl, leopards_imgs, train_size=train_size)
print(len(desca_train), len(desca_test))
print(len(descd_train), len(descd_test))
print(len(descl_train), len(descl_test))

In [None]:
desca_train_flat = np.vstack(desca_train)
descd_train_flat = np.vstack(descd_train)
descl_train_flat = np.vstack(descl_train)
desca_test_flat = np.vstack(desca_test)
descd_test_flat = np.vstack(descd_test)
descl_test_flat = np.vstack(descl_test)

In [None]:
train_set = np.concatenate((desca_train_flat, descd_train_flat, descl_train_flat))
train_keypoints = [kpa_train, kpd_train, kpl_train]
train_images = [imga_train, imgd_train, imgl_train]
test_set = np.concatenate((desca_test_flat, descd_test_flat, descl_test_flat))
test_keypoints = [kpa_test, kpd_test, kpl_test]
ttest_images = [imga_test, imgd_test, imgl_test]
# Print shape for check
print(train_set.shape)
print(test_set.shape)

In [None]:
kmeans = KMeans(n_clusters=300, max_iter=100)
train_labels = kmeans.fit_predict(train_set)

In [None]:
img_class_label = {'airplane': 0, 'dolphin': 1, 'leopard': 2}

def get_img_cluster_labels(labels, img_class, img_num, img_per_class, kp_per_img):
    img_class_num = img_class_label.get(img_class)
    if img_class_num is None or img_num > img_per_class:
        raise ValueError('Bad arguments')
    start_idx = img_class_num * img_per_class + img_num * kp_per_img
    end_idx = start_idx + kp_per_img
    return labels[start_idx:end_idx]

def get_class_imgnum_from_index(keypoint_index, img_per_class, kp_per_img):
    image_index = keypoint_index // kp_per_img
    class_num = image_index // img_per_class
    image_num = image_index - class_num * img_per_class
    kp_num = keypoint_index % kp_per_img
    if class_num > 2 or image_num < 0 or image_num > img_per_class:
        raise ValueError('Bad arguments; Class num or image num outside range')
    return class_num, image_num, kp_num


In [None]:
img1_labels = get_img_cluster_labels(train_labels, 'airplane', 4, train_size, min_kp)
img1_hist = np.unique(img1_labels, return_counts=True)
img2_labels = get_img_cluster_labels(train_labels, 'dolphin', 0, train_size, min_kp)
img2_hist = np.unique(img2_labels, return_counts=True)
img3_labels = get_img_cluster_labels(train_labels, 'leopard', 1, train_size, min_kp)
img3_hist = np.unique(img3_labels, return_counts=True)

fig, ((ax1, ax2, ax3)) = plt.subplots(1, 3, figsize=(18, 6), sharey=True)
ax1.bar(img1_hist[0], img1_hist[1], width=2)
ax2.bar(img2_hist[0], img2_hist[1], width=2)
ax3.bar(img3_hist[0], img3_hist[1], width=2)

In [None]:
img1_hist.shape

In [None]:
print(len(train_labels[train_labels == 100]))
print(len(np.argwhere(train_labels == 100)))
print(train_set.shape)
d = train_set[np.nonzero(train_labels == 100)]
print(d.shape)
e = np.linalg.norm(d - kmeans.cluster_centers_[0], axis=1)
print(e.shape)

In [None]:
train_keypoints[0][12][45]

In [None]:
# Returns the image to the closest of each cluster centroid
def get_cluster_images(labels, centroids, descriptors, keypoints, images, patch_area=30):
    indices = np.arange(descriptors.shape[0])
    cluster_imgs = []
    for center_num, centroid in enumerate(centroids):
        # Get all descriptors within cluster
        kp_indices = np.nonzero(labels == center_num)
        cluster_descs = descriptors[kp_indices]

        # Find descriptor closest to the centroid point
        dists = np.linalg.norm(cluster_descs - centroid, axis=1)
        min_idx = np.argmin(dists)
        descriptor_index = indices[kp_indices][min_idx]

        # Find the class, image, and keypoints that this descriptor came from
        i_class, i_image, i_kp = get_class_imgnum_from_index(descriptor_index, train_size, min_kp)
        image = images[i_class][i_image]
        keypoint = keypoints[i_class][i_image][i_kp]
        
        # Take a small snippet from the keypoint images around the keypoint location
        x, y = keypoint.pt
        image_padded = np.pad(image, patch_area, mode='constant')
        x, y = x + patch_area, y + patch_area
        patch = image_padded[round(y-patch_area):round(y+patch_area),round(x-patch_area):round(x+patch_area)]
        cluster_imgs.append(patch)

    return np.asarray(cluster_imgs)

cluster_images = get_cluster_images(train_labels, kmeans.cluster_centers_, train_set, train_keypoints, train_images)


In [None]:
cluster_images[0].shape

In [None]:
def plot_vocabulary(images, labels, class_name, img_num):
    img_labels = get_img_cluster_labels(labels, class_name, img_num, train_size, min_kp)
    cluster_nums, counts = np.unique(img_labels, return_counts=True)
    plt.figure(figsize=(28, 18))
    plt.bar(range(0, 2 * len(cluster_nums), 2), counts, width=0.25)
    for i, cluster_num in enumerate(cluster_nums):
        plt.imshow(images[cluster_num], cmap='gray', extent = (-.75 + 2*i, .75 + 2*i, -1.5, .0))
    plt.xlim(-1, 2 * len(cluster_nums))
    plt.ylim(-1.5, max(counts) * 1.15)
    plt.xlabel(cluster_nums)
    plt.show()
    
plot_vocabulary(cluster_images, train_labels, 'airplane', 4)

In [None]:
plot_vocabulary(cluster_images, train_labels, 'dolphin', 0)

In [None]:
plot_vocabulary(cluster_images, train_labels, 'leopard', 1)