# Image Data Association

In [7]:
# optional: allow Jupyter to "hot reload" the Python modules I wrote, to avoid restarting the kernel after every change
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Part A: Formulating the Dataset

Note: the cells below do NOT need to be rerun!

### Downloading the Entire Dataset

In [2]:
! mkdir ./Data

mkdir: ./Data: File exists


In [2]:
! git clone https://github.com/openMVG/SfM_quality_evaluation.git 

Cloning into 'SfM_quality_evaluation'...


remote: Enumerating objects: 237, done.[K
remote: Total 237 (delta 0), reused 0 (delta 0), pack-reused 237[K
Receiving objects: 100% (237/237), 254.31 MiB | 13.61 MiB/s, done.
Resolving deltas: 100% (9/9), done.
Updating files: 100% (214/214), done.


### Aggregating the Images

We only care about the `images/` directories in the following subsets of the data:
- `fountain-P11`
- `Herz-Jesus-P8`
- `entry-P10`

In [3]:
import glob

In [4]:
BASE_DATA_PATH = "./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008"
subset_names = ["entry-P10", "Herz-Jesus-P8", "fountain-P11"]
file_ext_pattern = "*.jpg"

In [5]:
all_img_paths = list()

for subset in subset_names:
    pattern_for_subset_images = "/".join(
        [BASE_DATA_PATH, subset, "images", file_ext_pattern]
    )
    all_img_paths.extend(glob.glob(pattern_for_subset_images))

print(all_img_paths)

['./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0006.jpg', './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0007.jpg', './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0005.jpg', './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0004.jpg', './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0000.jpg', './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0001.jpg', './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0003.jpg', './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0002.jpg', './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0009.jpg', './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0008.jpg', './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/Herz-Jesus-P8/images/0006.

Sweet. Lastly, we can load all these images into memory for further processing. 

In [13]:
from util import ops

In [24]:
all_img_arrays = [
    ops.load_image(img_path, return_grayscale=True, return_array=True)
    for img_path in all_img_paths
]

all_imgs = dict(zip(all_img_paths, all_img_arrays))

Dimensions of ./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0006.jpg: 2048 x 3072
Dimensions of ./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0007.jpg: 2048 x 3072
Dimensions of ./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0005.jpg: 2048 x 3072
Dimensions of ./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0004.jpg: 2048 x 3072
Dimensions of ./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0000.jpg: 2048 x 3072
Dimensions of ./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0001.jpg: 2048 x 3072
Dimensions of ./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0003.jpg: 2048 x 3072
Dimensions of ./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0002.jpg: 2048 x 3072
Dimensions of ./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entr

## Part B: Extract Global Descriptors

I will choose to extract a GIST descriptor for each image. I will elect to keep all the images in their current size (since they're already all the same dimensions).

In [40]:
type(cv2.SIFT.detect)

method_descriptor

In [46]:
from typing import List, Union

import cv2
import numpy as np

from util.clustering import KMeans


def _extract_sift_features(
    img: np.ndarray,
    mode: str = "detection",
) -> Union[List[cv2.KeyPoint], np.ndarray]:
    """Feature extraction using SIFT. Can be used for either detection or description"""
    sift = cv2.SIFT_create()
    img_normalized = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX).astype("uint8")
    keypoints, descriptors = sift.detectAndCompute(img_normalized, None)
    if mode == "detection":
        return keypoints  # list of keypoint objs
    elif mode == "description":
        return descriptors  # ndarray


def create_global_visual_word_vocabulary(
    imgs: List[np.ndarray],
    n_clusters: int,
) -> KMeans:
    descriptors_list = []
    for img in imgs:
        descriptors = _extract_sift_features(img, mode="description")
        if descriptors is not None:
            descriptors_list.extend(descriptors)

    descriptors_arr = np.array(descriptors_list)

    # create full set of visual words
    kmeans = KMeans(k=n_clusters)
    kmeans.fit(descriptors_arr)
    return kmeans


def build_histograms(imgs: np.ndarray, kmeans: KMeans) -> np.ndarray:
    """Build histograms for EACH image using visual words, acting as a global representation."""
    histograms = []
    for img in imgs:
        descriptors = _extract_sift_features(img, mode="description")
        if descriptors is not None:
            labels = kmeans.predict(descriptors)
            histogram, _ = np.histogram(labels, bins=range(kmeans.num_clusters + 1))
            histograms.append(histogram)
        else:
            histograms.append(np.zeros(kmeans.num_clusters))
    return np.array(histograms)

### Build a Global Visual Word Dictionary (via Clustering)

In [31]:
visual_word_identifier = create_global_visual_word_vocabulary(
    all_img_arrays,
    n_clusters=10,  # just a guess, we can tune later
)

### Compute the Global BoW Descriptor for All Images

In [33]:
all_img_descriptors = build_histograms(all_img_arrays, visual_word_identifier)

In [35]:
print(all_img_descriptors)

[[ 901  721  708 1181  377  558  315   22 2083  953]
 [ 884  696  684 1135  509  581  329   26 2104  986]
 [ 833  692  644 1163  346  511  268   18 1796  769]
 [ 846  697  584 1151  360  492  248   30 1626  647]
 [ 944  695  775 1062  483  575  316   22 2128  822]
 [ 854  674  797 1065  447  568  302   33 2220  864]
 [ 938  692  722 1064  397  475  271   19 1907  785]
 [ 850  678  706 1083  375  493  297   23 2080  805]
 [ 609  478  509  868  451  549  255   29 1720  550]
 [ 742  520  618  978  457  482  250   16 1942  743]
 [ 718  498  546  918  351  565  128   10 1230  359]
 [ 687  484  523  922  357  595  123   20 1398  385]
 [ 453  371  359  724  275  435   91    7 1078  252]
 [ 612  403  479  856  258  484  111    9 1358  425]
 [ 847  537  721 1218  393  643  175   17 1538  695]
 [ 487  355  471  677  239  435  106    8 1147  404]
 [ 688  413  780  959  275  525  121   14 1500  735]
 [ 630  411  653  884  278  515  128    9 1349  604]
 [ 751  437  848  892  267  409  158   10 1526