Bag-of-Words classification of STL-10 dataset with HOG features and nearest-neighbor classifier.

In [6]:
import sys
import os
import glob
import cv2
import numpy as np
from scipy import ndimage, spatial

Define main parameters: path to STL-10, list of names for considered classes, number of codebook words (i.e. K-means clusters), type of norm for determining nearest neighbor of BoW histograms.

In [7]:
dataset_dir = '/home/lukas/git/ethz-iacv-2020/pics/STL-10/images_per_class'
class_names_input = ['cat', 'ship']
K = 100
nearest_neighbor_norm = 'L2'

Filter specified class names to obtain a valid subset of STL-10 classes. If this subset has less than two elements, exit with an error status.

In [8]:
STL10_class_names = ['airplane', 'bird', 'car', 'cat', 'deer', 'dog', 'horse', 'monkey', 'ship', 'truck']
class_names_input_unique = np.unique(class_names_input)
is_input_valid = np.array([c in STL10_class_names for c in class_names_input_unique])
class_names = class_names_input_unique[is_input_valid]
C = len(class_names)
if C < 2:
    print('Not enough classes to distinguish. Need at least 2 classes from STL-10!')
    sys.exit(1)

Define functions for extraction of HOG features.

In [9]:
def grid_of_feature_points(image, n_points_x, n_points_y, margin_x, margin_y):
    """
    Construct grid of feature points to serve as patch centers for computation of HOG features.
    """

    x = np.linspace(margin_x, image.shape[0] - margin_x, n_points_x)
    y = np.linspace(margin_y, image.shape[1] - margin_y, n_points_y)

    feature_points_x, feature_points_y = np.meshgrid(x, y)
    
    feature_points_x = feature_points_x[0]
    feature_points_y = np.ravel(feature_points_y[:, 0:1])

    # Return the set of feature points as two 1D arrays holding their image coordinates.
    return feature_points_x, feature_points_y


def compute_HOG_descriptors(image, feature_points_x, feature_points_y, cell_width, cell_height):
    """
    Compute the HOG descriptors, as the set of features for an input image, at the specified points.
    Output:
        |HOG_descriptors|: 2D NumPy array of shape (n_points, n_cells * n_cells * n_bins)
    """

    # Define parameters and constants.
    n_bins = 8
    n_points_x = feature_points_x.shape[0]
    n_points_y = feature_points_y.shape[0]
    n_points = n_points_x * n_points_y
    n_cells = 4
    pi = np.pi
    bins = np.linspace(-pi, pi, num=n_bins+1)
    
    HOG_descriptor_len = n_cells * n_cells * n_bins
    HOG_descriptors = np.zeros((n_points, HOG_descriptor_len))
    patch_no = 0
    for i in range(n_points_x):
        for j in range(n_points_y): # for all grid points
            
            square_patch = image[int(feature_points_y[j]-8):int(feature_points_y[j]+8),
                                 int(feature_points_x[i]-8):int(feature_points_x[i]+8)]
            assert square_patch.shape == (16,16)

            HOG_descriptor = []
            
            cell_no = 0
            for y in range(0,16,4):
                for x in range(0,16,4): # for each cell in patch
                    cell_no += 1
                    
                    cell = square_patch[y:y+4,x:x+4]

                    dx = ndimage.sobel(cell, axis=0)
                    dy = ndimage.sobel(cell, axis=1)
                    
                    # gradient = np.sqrt(dx**2 + dy**2)
                    
                    # arctan2: Array of angles in radians, in the range [-pi, pi]
                    theta = np.arctan2(dy,dx)

                    # import matplotlib.pyplot as plt
                    # _ = plt.hist(theta, bins=n_bins)  # arguments are passed to np.histogram
                    # plt.title("Histogram with 8 bins")
                    # plt.show()
                    
                    hist, bin_edges = np.histogram(theta, bins=bins)
                    # print(f"{patch_no}: {cell_no} - hist: {hist}")
                    assert np.sum(hist) == cell_width * cell_height
                    
                    
                    HOG_descriptor.extend(hist)
                    
            assert len(HOG_descriptor) == HOG_descriptor_len, f"{len(HOG_descriptor)} != {HOG_descriptor_len}"
            HOG_descriptors[patch_no] = HOG_descriptor
            patch_no += 1
    
    return HOG_descriptors


def feature_extraction(image_full_filename):
    """
    Extract HOG features for an input image.
    Inputs:
        |image_full_filename|: full path to the input image file
    Output:
        2D NumPy array of shape (n_points_x * n_points_y, 128)
    """

    # Read the input image into a numpy.ndarray variable of two dimensions (grayscale) for further processing.
    image = cv2.imread(image_full_filename, 0).astype('float')

    # Define parameters.
    n_points_x = 6
    n_points_y = 6
    cell_width = 4
    cell_height = 4
    margin_x = 2 * cell_width
    margin_y = 2 * cell_height

    # Construct grid of feature points.
    feature_points_x, feature_points_y = grid_of_feature_points(image, n_points_x, n_points_y, margin_x, margin_y)

    # Return HOG features at the computed feature points.
    return compute_HOG_descriptors(image, feature_points_x, feature_points_y, cell_width, cell_height)


def image_full_filenames_in_directory(directory):
    """
    Return a list with full filenames of all images in the input directory, sorted in lexicographical order.
    Inputs:
        |directory|: path to input directory.
    """

    image_format = '.png'
    image_filename_pattern = os.path.join(directory, '*' + image_format)
    list_image_full_filenames = glob.glob(image_filename_pattern)
    # Sort the list.
    list_image_full_filenames = sorted(list_image_full_filenames)

    return list_image_full_filenames


def class_features(class_directory):
    """
    Construct a 3D numpy.ndarray holding the HOG features for all images in a class, under the input directory.
    Inputs:
        |class_directory|: path to input directory.
    """

    # Get the list with all images in the class directory.
    list_image_full_filenames = image_full_filenames_in_directory(class_directory)
    n_images = len(list_image_full_filenames)

    # Initialize a list of HOG features per image.
    HOG_features = []

    # Main loop over the images to compute and append HOG features.
    for i in range(n_images):
        # Display progress.
        print('Feature extraction for image {:d}/{:d}'.format(i + 1, n_images))

        # Extract features for current image as a 2D numpy.ndarray and append it to the list.
        HOG_features.append(feature_extraction(list_image_full_filenames[i]))

    # Concatenate feature vectors from all images into a single 3D numpy.ndarray with dimensions
    # n_images-by-n_descriptors-by-D.
    # ASSUMPTION: all images of processed classes have equal dimensions, therefore equal n_points for the constructed
    # grids.
    HOG_features_class = np.array(HOG_features)

    return HOG_features_class


def split_features(dataset_dir, split, class_names):
    """
    Construct a list of 3D arrays, one for each class, with features for an entire split of the dataset.
    Inputs:
        |dataset_dir|: path to root dataset directory.
        |split|: name of processed split, e.g. 'train' or 'test'.
        |class_names|: list of names of considered classes.
    """

    # Form path to root split directory.
    split_dir = os.path.join(dataset_dir, split)

    HOG_features_split = []

    # Main loop over classes.
    for i in range(len(class_names)):
        current_class_name = class_names[i]

        # Display progress.
        print('Processing {:s} split, class {:d}: {:s}'.format(split, i + 1, current_class_name))

        # Extract features.
        HOG_features_split.append(class_features(os.path.join(split_dir, current_class_name)))

    return HOG_features_split

In [10]:
image = cv2.imread(dataset_dir + '/test/airplane/airplane_test_000.png', 0).astype('float')

# Define parameters.
n_points_x = 6
n_points_y = 6
cell_width = 4
cell_height = 4
margin_x = 2 * cell_width
margin_y = 2 * cell_height

print(f"shape: {image.shape}")

# Construct grid of feature points.
feature_points_x, feature_points_y = grid_of_feature_points(image, n_points_x, n_points_y, margin_x, margin_y)
print(f"feature_points_x = {feature_points_x} feature_points_y = {feature_points_y}")

HOG_descriptors = compute_HOG_descriptors(image, feature_points_x, feature_points_y, cell_width, cell_height)
assert HOG_descriptors.shape[0] == n_points_x * n_points_y
HOG_descriptors.shape

shape: (96, 96)
feature_points_x = [ 8. 24. 40. 56. 72. 88.] feature_points_y = [ 8. 24. 40. 56. 72. 88.]


(36, 128)

**Training** and **testing** - **step 1)** compute HOG features for the entire train and test splits.

In [11]:
train_split = 'train'
HOG_features_train = split_features(dataset_dir, train_split, class_names)

# Concatenate HOG features from all classes of the train split into one 2D matrix.
n_images_per_class, n_descriptors_per_image, D = HOG_features_train[0].shape
HOG_features_train_concatenated = np.empty((0, D))
for c in range(C):
    HOG_features_train_concatenated = np.concatenate((HOG_features_train_concatenated,
                                                      np.reshape(HOG_features_train[c], (-1, D))))

test_split = 'test'
HOG_features_test = split_features(dataset_dir, test_split, class_names)

Processing train split, class 1: cat
Feature extraction for image 1/500
Feature extraction for image 2/500
Feature extraction for image 3/500
Feature extraction for image 4/500
Feature extraction for image 5/500
Feature extraction for image 6/500
Feature extraction for image 7/500
Feature extraction for image 8/500
Feature extraction for image 9/500
Feature extraction for image 10/500
Feature extraction for image 11/500
Feature extraction for image 12/500
Feature extraction for image 13/500
Feature extraction for image 14/500
Feature extraction for image 15/500
Feature extraction for image 16/500
Feature extraction for image 17/500
Feature extraction for image 18/500
Feature extraction for image 19/500
Feature extraction for image 20/500
Feature extraction for image 21/500
Feature extraction for image 22/500
Feature extraction for image 23/500
Feature extraction for image 24/500
Feature extraction for image 25/500
Feature extraction for image 26/500
Feature extraction for image 27/500


Feature extraction for image 227/500
Feature extraction for image 228/500
Feature extraction for image 229/500
Feature extraction for image 230/500
Feature extraction for image 231/500
Feature extraction for image 232/500
Feature extraction for image 233/500
Feature extraction for image 234/500
Feature extraction for image 235/500
Feature extraction for image 236/500
Feature extraction for image 237/500
Feature extraction for image 238/500
Feature extraction for image 239/500
Feature extraction for image 240/500
Feature extraction for image 241/500
Feature extraction for image 242/500
Feature extraction for image 243/500
Feature extraction for image 244/500
Feature extraction for image 245/500
Feature extraction for image 246/500
Feature extraction for image 247/500
Feature extraction for image 248/500
Feature extraction for image 249/500
Feature extraction for image 250/500
Feature extraction for image 251/500
Feature extraction for image 252/500
Feature extraction for image 253/500
F

Feature extraction for image 450/500
Feature extraction for image 451/500
Feature extraction for image 452/500
Feature extraction for image 453/500
Feature extraction for image 454/500
Feature extraction for image 455/500
Feature extraction for image 456/500
Feature extraction for image 457/500
Feature extraction for image 458/500
Feature extraction for image 459/500
Feature extraction for image 460/500
Feature extraction for image 461/500
Feature extraction for image 462/500
Feature extraction for image 463/500
Feature extraction for image 464/500
Feature extraction for image 465/500
Feature extraction for image 466/500
Feature extraction for image 467/500
Feature extraction for image 468/500
Feature extraction for image 469/500
Feature extraction for image 470/500
Feature extraction for image 471/500
Feature extraction for image 472/500
Feature extraction for image 473/500
Feature extraction for image 474/500
Feature extraction for image 475/500
Feature extraction for image 476/500
F

Feature extraction for image 174/500
Feature extraction for image 175/500
Feature extraction for image 176/500
Feature extraction for image 177/500
Feature extraction for image 178/500
Feature extraction for image 179/500
Feature extraction for image 180/500
Feature extraction for image 181/500
Feature extraction for image 182/500
Feature extraction for image 183/500
Feature extraction for image 184/500
Feature extraction for image 185/500
Feature extraction for image 186/500
Feature extraction for image 187/500
Feature extraction for image 188/500
Feature extraction for image 189/500
Feature extraction for image 190/500
Feature extraction for image 191/500
Feature extraction for image 192/500
Feature extraction for image 193/500
Feature extraction for image 194/500
Feature extraction for image 195/500
Feature extraction for image 196/500
Feature extraction for image 197/500
Feature extraction for image 198/500
Feature extraction for image 199/500
Feature extraction for image 200/500
F

Feature extraction for image 398/500
Feature extraction for image 399/500
Feature extraction for image 400/500
Feature extraction for image 401/500
Feature extraction for image 402/500
Feature extraction for image 403/500
Feature extraction for image 404/500
Feature extraction for image 405/500
Feature extraction for image 406/500
Feature extraction for image 407/500
Feature extraction for image 408/500
Feature extraction for image 409/500
Feature extraction for image 410/500
Feature extraction for image 411/500
Feature extraction for image 412/500
Feature extraction for image 413/500
Feature extraction for image 414/500
Feature extraction for image 415/500
Feature extraction for image 416/500
Feature extraction for image 417/500
Feature extraction for image 418/500
Feature extraction for image 419/500
Feature extraction for image 420/500
Feature extraction for image 421/500
Feature extraction for image 422/500
Feature extraction for image 423/500
Feature extraction for image 424/500
F

Feature extraction for image 122/800
Feature extraction for image 123/800
Feature extraction for image 124/800
Feature extraction for image 125/800
Feature extraction for image 126/800
Feature extraction for image 127/800
Feature extraction for image 128/800
Feature extraction for image 129/800
Feature extraction for image 130/800
Feature extraction for image 131/800
Feature extraction for image 132/800
Feature extraction for image 133/800
Feature extraction for image 134/800
Feature extraction for image 135/800
Feature extraction for image 136/800
Feature extraction for image 137/800
Feature extraction for image 138/800
Feature extraction for image 139/800
Feature extraction for image 140/800
Feature extraction for image 141/800
Feature extraction for image 142/800
Feature extraction for image 143/800
Feature extraction for image 144/800
Feature extraction for image 145/800
Feature extraction for image 146/800
Feature extraction for image 147/800
Feature extraction for image 148/800
F

Feature extraction for image 347/800
Feature extraction for image 348/800
Feature extraction for image 349/800
Feature extraction for image 350/800
Feature extraction for image 351/800
Feature extraction for image 352/800
Feature extraction for image 353/800
Feature extraction for image 354/800
Feature extraction for image 355/800
Feature extraction for image 356/800
Feature extraction for image 357/800
Feature extraction for image 358/800
Feature extraction for image 359/800
Feature extraction for image 360/800
Feature extraction for image 361/800
Feature extraction for image 362/800
Feature extraction for image 363/800
Feature extraction for image 364/800
Feature extraction for image 365/800
Feature extraction for image 366/800
Feature extraction for image 367/800
Feature extraction for image 368/800
Feature extraction for image 369/800
Feature extraction for image 370/800
Feature extraction for image 371/800
Feature extraction for image 372/800
Feature extraction for image 373/800
F

Feature extraction for image 571/800
Feature extraction for image 572/800
Feature extraction for image 573/800
Feature extraction for image 574/800
Feature extraction for image 575/800
Feature extraction for image 576/800
Feature extraction for image 577/800
Feature extraction for image 578/800
Feature extraction for image 579/800
Feature extraction for image 580/800
Feature extraction for image 581/800
Feature extraction for image 582/800
Feature extraction for image 583/800
Feature extraction for image 584/800
Feature extraction for image 585/800
Feature extraction for image 586/800
Feature extraction for image 587/800
Feature extraction for image 588/800
Feature extraction for image 589/800
Feature extraction for image 590/800
Feature extraction for image 591/800
Feature extraction for image 592/800
Feature extraction for image 593/800
Feature extraction for image 594/800
Feature extraction for image 595/800
Feature extraction for image 596/800
Feature extraction for image 597/800
F

Feature extraction for image 795/800
Feature extraction for image 796/800
Feature extraction for image 797/800
Feature extraction for image 798/800
Feature extraction for image 799/800
Feature extraction for image 800/800
Processing test split, class 2: ship
Feature extraction for image 1/800
Feature extraction for image 2/800
Feature extraction for image 3/800
Feature extraction for image 4/800
Feature extraction for image 5/800
Feature extraction for image 6/800
Feature extraction for image 7/800
Feature extraction for image 8/800
Feature extraction for image 9/800
Feature extraction for image 10/800
Feature extraction for image 11/800
Feature extraction for image 12/800
Feature extraction for image 13/800
Feature extraction for image 14/800
Feature extraction for image 15/800
Feature extraction for image 16/800
Feature extraction for image 17/800
Feature extraction for image 18/800
Feature extraction for image 19/800
Feature extraction for image 20/800
Feature extraction for image 2

Feature extraction for image 219/800
Feature extraction for image 220/800
Feature extraction for image 221/800
Feature extraction for image 222/800
Feature extraction for image 223/800
Feature extraction for image 224/800
Feature extraction for image 225/800
Feature extraction for image 226/800
Feature extraction for image 227/800
Feature extraction for image 228/800
Feature extraction for image 229/800
Feature extraction for image 230/800
Feature extraction for image 231/800
Feature extraction for image 232/800
Feature extraction for image 233/800
Feature extraction for image 234/800
Feature extraction for image 235/800
Feature extraction for image 236/800
Feature extraction for image 237/800
Feature extraction for image 238/800
Feature extraction for image 239/800
Feature extraction for image 240/800
Feature extraction for image 241/800
Feature extraction for image 242/800
Feature extraction for image 243/800
Feature extraction for image 244/800
Feature extraction for image 245/800
F

Feature extraction for image 443/800
Feature extraction for image 444/800
Feature extraction for image 445/800
Feature extraction for image 446/800
Feature extraction for image 447/800
Feature extraction for image 448/800
Feature extraction for image 449/800
Feature extraction for image 450/800
Feature extraction for image 451/800
Feature extraction for image 452/800
Feature extraction for image 453/800
Feature extraction for image 454/800
Feature extraction for image 455/800
Feature extraction for image 456/800
Feature extraction for image 457/800
Feature extraction for image 458/800
Feature extraction for image 459/800
Feature extraction for image 460/800
Feature extraction for image 461/800
Feature extraction for image 462/800
Feature extraction for image 463/800
Feature extraction for image 464/800
Feature extraction for image 465/800
Feature extraction for image 466/800
Feature extraction for image 467/800
Feature extraction for image 468/800
Feature extraction for image 469/800
F

Feature extraction for image 667/800
Feature extraction for image 668/800
Feature extraction for image 669/800
Feature extraction for image 670/800
Feature extraction for image 671/800
Feature extraction for image 672/800
Feature extraction for image 673/800
Feature extraction for image 674/800
Feature extraction for image 675/800
Feature extraction for image 676/800
Feature extraction for image 677/800
Feature extraction for image 678/800
Feature extraction for image 679/800
Feature extraction for image 680/800
Feature extraction for image 681/800
Feature extraction for image 682/800
Feature extraction for image 683/800
Feature extraction for image 684/800
Feature extraction for image 685/800
Feature extraction for image 686/800
Feature extraction for image 687/800
Feature extraction for image 688/800
Feature extraction for image 689/800
Feature extraction for image 690/800
Feature extraction for image 691/800
Feature extraction for image 692/800
Feature extraction for image 693/800
F

In [31]:
HOG_features_train[0].shape
# HOG_features_train_concatenated.shape
# HOG_features_test[1].shape

(500, 36, 128)

Define functions for codebook construction via K-means clustering, Bag-of-Words histogram representation, nearest-neighbor classification, and evaluation.

In [49]:
def find_nearest_neighbor_L2(points_1, points_2):
    """
    Determine the nearest neighbor of each point of the first set from the second set in the L2-norm sense.
    Inputs:
        |points_1|: 2D numpy.ndarray containing the first set of points, with dimensions N-by-D.
        |points_2|: 2D numpy.ndarray containing the second set of points, with dimensions K-by-D.
    Output:
        1D NumPy array with N elements, corresponding to the indices of points in |points_2| that are the nearest
        neighbors of points in |points_1|
    """
    assert points_1.shape[1] == points_2.shape[1]
    
    nn_dist = spatial.distance.cdist(points_1, points_2, metric='euclidean')
    nearest_neighbor_indices = np.argmin(nn_dist, axis=1)
    assert nearest_neighbor_indices.shape[0] == points_1.shape[0]
    return nearest_neighbor_indices


def kmeans(points, K, n_iter):
    """
    Cluster the input points into K clusters using K-means with the specified number of iterations and 
    output the induced cluster centroids.
    Inputs:
        |points|: 2D numpy.ndarray containing feature vectors as its rows, with dimensions N-by-D
        |K|: number of clusters
        |n_iter|: number of iterations of K-means algorithm
    Output:
        |centroids|: 2D numpy.ndarray containing the final cluster centroids as its rows, with dimensions K-by-D
    """

    N, n_dims = points.shape[:2]

    # Centroid initialization with randomly selected feature vectors.
    centroids = points[np.random.randint(0, points.shape[0], K)]
    assert centroids.shape == (K, points.shape[1])

    # Main K-means loop.
    for i in range(n_iter):
        # 1) Cluster assignment: compute index of nearest cnetroid for each data point
        nn_indices = find_nearest_neighbor_L2(points, centroids)
        assert len(nn_indices) == points.shape[0]

        # 2) Centroid update based on current assignment.
        #       update each cluster centroid to the mean of all data points that are currently assigned 
        #       to that cluster
        for k in range(K):
            point_indices_with_k_as_nn = np.where(nn_indices == k)[0]
            # Check if cluster is empty.
            if len(point_indices_with_k_as_nn) == 0:
                new_dp = np.random.randint(0, points.shape[0], 1)[0]
                # randomly re-initialize centroid with a data point
                print(f"centroid currently empty, re-initialize with data point #{new_dp}")
                updated_centroid = points[new_dp]
            else:
                updated_centroid = np.reshape(np.mean(points[point_indices_with_k_as_nn], axis=0), (1,points.shape[1]))
                assert updated_centroid.shape == (1, points.shape[1]), f"{updated_centroid.shape}"
            centroids[k] = updated_centroid

        # Display progress.
        print('Completed K-means iteration {:d}/{:d}'.format(i+1, n_iter))

    return centroids


def bow_histograms_and_labels(HOG_features_split, codebook_words):
    """
    Compute the Bag-of-Words histograms for an entire split of the dataset, using the respective codebook 
    with visual words that has been computed with K-means. 
    Also create an array of ground truth labels for images in the split.
    Inputs:
        |HOG_features_split|: list of 3D arrays, one for each class, in which each array holds the features for all
        images in the split that belong to that class eg. (500, 36, 128)
        |codebook_words|: 2D numpy.ndarray containing codebook words as its rows, with dimensions K-by-D
                          e.g. (100, 128)
    """

    C = len(HOG_features_split)  # num classes
    K, D = codebook_words.shape

    # Initialize matrix of BoW histograms and array of ground truth labels.
    bow_histograms_split = np.empty((0, K))
    labels_split = np.empty((0, 1), dtype=int)

    for c in range(C):  # for each class
        HOG_features_class = HOG_features_split[c]
        n_images = HOG_features_class.shape[0]

        # Add labels of current class to overall label array.
        labels_split = np.concatenate((labels_split, c + np.zeros((n_images, 1), dtype=int)))

        # Initializations.
        bow_histograms_class = np.zeros((n_images, K))

        # Loop over all images in the class and compute BoW histograms.
        for i in range(n_images):
            # |HOG_features_image| is a 2D numpy.ndarray containing all HOG descriptors of the 
            #                      current image as its rows.
            HOG_features_image = HOG_features_class[i]
            assert HOG_features_image.shape == (36, 128)
            
            # Assign each descriptor of the current image to a word.
            codebook_word_nn_of_each_desc = find_nearest_neighbor_L2(HOG_features_image, codebook_words)
            nn_indices = codebook_word_nn_of_each_desc  # rename
            assert nn_indices.shape[0] == HOG_features_image.shape[0]
            assert (0 <= nn_indices).all() and (nn_indices < K).all()
            
            # Count how many descriptors are assigned to each word.
            bow_histogram_class = np.bincount(nn_indices, minlength=K)
            assert bow_histogram_class.shape[0] == K
            bow_histograms_class[i, :] = bow_histogram_class
            # print(f"bow_histogram_class:\n{bow_histogram_class}")

        # Append BoW histograms for images in current class to the overall split-level matrix.
        bow_histograms_split = np.concatenate((bow_histograms_split, bow_histograms_class))

    return bow_histograms_split, labels_split


def nearest_neighbor_classifier(points_test, points_train, labels_train, norm='L2'):
    """
    Classify test points by assigning to each of them the label of its nearest neighbor point 
    from the training set.
    Inputs:
        |points_test|:  2D numpy.ndarray containing the test points as its rows, with dimensions S-by-K.
        |points_train|: 2D numpy.ndarray containing the train points as its rows, with dimensions T-by-K.
        |labels_train|: 1D numpy.ndarray containing the ground truth labels of the train points, 
                        with dimensions T-by-1.
    """
    assert points_test.shape[1] == points_train.shape[1]
    
    # Compute nearest neighbors.
    if norm == 'L2':
        nn_indices = find_nearest_neighbor_L2(points_test, points_train)
        
    else:
        raise Exception("tbi")

    # Assign to test points the label of their nearest training neighbor.
    labels_test = labels_train[nn_indices]

    return labels_test


def confusion_matrix(labels_ground_truth, labels_predicted, C):
    """
    Compute the confusion matrix based on the ground truth labels and the respective predictions.
    Inputs:
        |labels_ground_truth|: 1D numpy.ndarray containing the ground truth labels, with dimensions S-by-1.
        |labels_predicted|: 1D numpy.ndarray containing the predicted labels, with same dimensions as
                            |labels_ground_truth|.
    """

    # Initialize confusion matrix to zero values.
    conf = np.zeros((C, C))

    # Use definition of confusion matrix to compute its values: rows correspond to ground truth labels, columns to
    # predictions.
    np.add.at(conf, (labels_ground_truth, labels_predicted), 1)

    return conf


def accuracy_from_confusion_matrix(conf):
    """
    Compute the accuracy of a classifier from the confusion matrix related to its predictions.
    Input:
        |conf|: confusion matrix as a 2D numpy.ndarray, with dimensions C-by-C.
    """

    accuracy = np.trace(conf) / np.sum(conf)

    return accuracy

In [25]:
codebook_words = kmeans(HOG_features_train_concatenated, K, 5)
print(codebook_words.shape)

Completed K-means iteration 1/5
Completed K-means iteration 2/5
Completed K-means iteration 3/5
Completed K-means iteration 4/5
Completed K-means iteration 5/5
(100, 128)


In [41]:
HOG_features_train[0].shape

(500, 36, 128)

In [44]:
b, l = bow_histograms_and_labels(HOG_features_train, codebook_words)

In [42]:
l.shape

(1000, 1)

**Training** and **testing** - **step 2)**

In [50]:
n_evaluation_rounds = 10

# Initialize confusion matrices and array of accuracy values.
confusion_matrices = np.zeros((n_evaluation_rounds, C, C))
accuracy_values = np.zeros(n_evaluation_rounds)

# Fix random seed to ensure reproducibility of the results.
np.random.seed(0)

# Define other parameters.
n_iters_kmeans = 10

# Main loop to repeat training and testing.
for i in range(n_evaluation_rounds):

    print('Running {:d}/{:d} evaluation round for Bag-of-Words classification'.format(i+1, n_evaluation_rounds))

    # TRAINING - STEP 2)i) Construct the codebook of HOG feature vectors by applying K-means to the entire set of
    # training features.
    print('Constructing codebook from training features using K-means...')
    codebook_words = kmeans(HOG_features_train_concatenated, K, n_iters_kmeans)
    print('Codebook constructed.')

    # TRAINING - STEP 2)ii) Compute the Bag-of-Words histogram representation of all training images 
    # that is induced by the constructed codebook.
    bow_histograms_train, labels_train = bow_histograms_and_labels(HOG_features_train, codebook_words)

    # TESTING - STEP 2)i) Compute the Bag-of-Words histogram representation of all testing images 
    # that is induced by the constructed codebook.
    bow_histograms_test, labels_test_ground_truth = bow_histograms_and_labels(HOG_features_test, codebook_words)

    # TESTING - STEP 2)ii) Predict test labels with nearest-neighbor classifier.
    labels_test_predicted = nearest_neighbor_classifier(bow_histograms_test, 
                                                        bow_histograms_train, 
                                                        labels_train,
                                                        nearest_neighbor_norm)

    # TESTING - STEP 2)iii) Evaluate the predictions of the classifier on the test split against ground truth.
    confusion_matrices[i] = confusion_matrix(labels_test_ground_truth, labels_test_predicted, C)
    accuracy_values[i] = accuracy_from_confusion_matrix(confusion_matrices[i])

# Report cumulative results over all evaluation rounds.
accuracy_average = np.mean(accuracy_values)
accuracy_std = np.std(accuracy_values, ddof=1)
print('%%%%%%%%%%%%%%%%%%%%%%%%%\n\n')
print('Average BoW classification accuracy over {:d} rounds: {:6.2f}% +/- {:5.2f}%'.format(n_evaluation_rounds,
                                                                                           100 * accuracy_average,
                                                                                           100 * (3 * accuracy_std)))

Running 1/10 evaluation round for Bag-of-Words classification
Constructing codebook from training features using K-means...
Completed K-means iteration 1/10
Completed K-means iteration 2/10
Completed K-means iteration 3/10
Completed K-means iteration 4/10
Completed K-means iteration 5/10
Completed K-means iteration 6/10
Completed K-means iteration 7/10
Completed K-means iteration 8/10
Completed K-means iteration 9/10
Completed K-means iteration 10/10
Codebook constructed.
Running 2/10 evaluation round for Bag-of-Words classification
Constructing codebook from training features using K-means...
Completed K-means iteration 1/10
Completed K-means iteration 2/10
Completed K-means iteration 3/10
Completed K-means iteration 4/10
Completed K-means iteration 5/10
Completed K-means iteration 6/10
Completed K-means iteration 7/10
Completed K-means iteration 8/10
Completed K-means iteration 9/10
Completed K-means iteration 10/10
Codebook constructed.
Running 3/10 evaluation round for Bag-of-Words