In [1]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC, SVC
from scipy.cluster.vq import vq
from math import pi, floor

In [3]:
def image_generator(source_path):
    """
    Generate images from a given source location, one at a time together with
    their class label.
    @param source_path: string for the directory to look for images from
    @return: yields a pair image, class_name (a string)
    """
    for directory in os.listdir(source_path):
        # deal with an unpleasant feature of Mac OS X
        if directory == ".DS_Store":
            continue
        for image in os.listdir(source_path + "/" + directory):
            # deal with an unpleasant feature of Mac OS X
            if image == ".DS_Store":
                continue
            yield cv2.imread(source_path + "/" + directory + "/" + image), directory

In [41]:
def get_descriptors(traindata_path):
    """
    Compute and extract descriptors from a set of images.
    @param traindata_path: the path to the training images
    @return: a pandas DataFrame with all necessary data
    """
    # initialize data structures, including SIFT object
    descriptors = pd.DataFrame(columns=["data", "image", "coords"])
    data = list()
    images = list()
    coords = list()
    labels = dict()
    sift = cv2.xfeatures2d.SIFT_create()
    count = 0
    
    # for each train image
    for image, class_name in tqdm(image_generator(traindata_path)):
        # extract keypoints and descriptors
        kp, des = sift.detectAndCompute(image, None)
        # update lists
        for num in range(len(des)):
            data.append(des[num])
            images.append(count)
            coords.append((kp[num].pt[1], kp[num].pt[0]))  # to be used with the spatial pyramid kernel
        labels[count] = classes[class_name]
        count += 1
        
    descriptors["data"] = data
    descriptors["image"] = images
    descriptors["coords"] = coords

    return descriptors

In [9]:
def get_visual_words(descriptors, k, n_sampled, num_descriptors):
    """
    Cluster the a sample of the descriptors into visual words.
    @param descriptors: a pandas DataFrame with all the descriptors
    @param k: the number of clusters
    @param n_sampled: the number of descriptors to sample
    @param num_descriptors: total number of descriptors
    @return: an object of type sklearn.cluster.KMea
    """
    kmeans = KMeans(n_clusters=k, n_init=5, n_jobs=-1)
    kmeans = kmeans.fit(np.array(descriptors["data"].sample(frac=n_sampled / num_descriptors).tolist()))
    return kmeans

In [16]:
def get_histograms(descriptors, count, vocabulary):
    """
    Compute the BoW histograms for all of the training images.
    @param descriptors: a pandas DataFrame containing the extracted
    descriptors
    @param count: the total number of training images
    @param vocabulary: the visual words
    @return: a NumPy containing the histograms, of size[n_samples, n_visual_words]
    """
    # get number of visual words and allocate matrix
    k = len(vocabulary)
    histograms = np.zeros((count, k))
    
    # for each training image
    for image in descriptors["image"].unique():
        # retrieve its descriptors
        desc = np.array([des for des in descriptors[descriptors["image"] == image]["data"]])
        # quantize them into visual words and update the array
        words, _ = vq(desc, vocabulary, check_finite=False)
        histograms[image], _ = np.histogram(words, bins=np.arange(k + 1))
        
    return histograms

In [60]:
def histEMD(hist1, hist2, k=k):
    """
    Compute the Earth Mover's Distance between two histograms.
    @param hist1: the first histogram
    @param hist2: the second histogram
    @param k: the number of elements to match per histogram
    @return: a float for the Earth Mover's Distance
    """
    # the OpenCV implementation of the EMD requires two 2xn arrays as input,
    # where the first column are the weights and the second are the 1-d coordinates
    hist1 = np.array([hist1, [num for num in range(k)]], dtype=np.float32).T
    hist2 = np.array([hist2, [num for num in range(k)]], dtype=np.float32).T
    D, _, _ = cv2.EMD(hist1, hist2, cv2.DIST_L2)
    return D

In [18]:
def histogram_intersection(hist1, hist2):
    """
    Compute the histogram intersection distance between two histograms.
    @param hist1: the firt histogram
    @param hist2: the second histogram
    @return: a scalar NumPy array with the sum of the minima taken bin-wise
    """
    return np.array([np.sum(np.min(np.array([hist1, hist2]), axis=0))])

In [34]:
def chisquared_distance(hist1, hist2, k=k):
    """
    Compute the generalized Gaussian kernel based on the Chi-square distance
    between two histograms.
    @param hist1: the first histogram
    @param hist2: the second histogram
    @param k: the number of visual words
    @return: a NumPy array with the scalar result
    """
    # since in the Chi-squared distance we compute ti - ti', square it, then divide by ti + ti', we might
    # encounter a 0/0 or x/0 issue. Luckily, since the tis are non-negative (being the histogram counts), this
    # happens if and only if both ti and ti' are equal to 0. Then, the following line scans the histograms
    # and stores the indexes of the "valid" bins, to be used as masks in the computation of the term D
    indexes = np.array([num for num in range(k) if not (hist1[num] == 0 and hist2[num] == 0)])
    D = np.sum(np.square(hist1[indexes] - hist2[indexes]) / (hist1[indexes] + hist2[indexes])) # the Chi-squared distance
    # plug into the generalized Gaussian kernel
    return 0.5 * D

In [35]:
def chisquared_kernel(hist1, hist2, k, A):
    """
    Compute the generalized Gaussian kernel based on the Chi-square distance
    between two histograms.
    @param hist1: the first histogram
    @param hist2: the second histogram
    @param k: the number of visual words
    @param A: the scale parameter (> 0)
    @return: a NumPy array with the scalar result
    """
    # since in the Chi-squared distance we compute ti - ti', square it, then divide by ti + ti', we might
    # encounter a 0/0 or x/0 issue. Luckily, since the tis are non-negative (being the histogram counts), this
    # happens if and only if both ti and ti' are equal to 0. Then, the following line scans the histograms
    # and stores the indexes of the "valid" bins, to be used as masks in the computation of the term D
    indexes = np.array([num for num in range(k) if not (hist1[num] == 0 and hist2[num] == 0)])
    D = np.sum(np.square(hist1[indexes] - hist2[indexes]) / (hist1[indexes] + hist2[indexes])) # the Chi-squared distance
    # plug into the generalized Gaussian kernel
    return np.exp(- (D * 0.5) / A)

In [19]:
def compute_Gram(data, kernel, *args):
    """
    Compute the Gram matrix associated to a dataset, from
    a predefined kernel function.
    @param data: the dataset under consideration, of size [n_samples, n_features]
    @param distance_function: the kernel used. Takes as minimal input two rows of 'data'
    @param *args: additional positional arguments to be passed to the kernel
    @return: the (symmetric) Gram matrix, of size [n_samples, n_samples]
    """
    num_samples, _ = np.shape(data)
    # pre-allocate matrix
    distance_matrix = np.zeros((num_samples, num_samples))
    # since the matrix is symmetric, we just need to fill half of it, then
    # copy on the other side
    for row in range(num_samples):
        for col in range(row, num_samples):
            temp = kernel(data[row, :], data[col, :], *args)
            distance_matrix[row, col] = temp
            distance_matrix[col, row] = temp
    return distance_matrix

In [20]:
def soft_kernel(descriptor, word, const, sigma_2):
    """
    Compute the likelihood of a descriptor belonging to a word, by kernel density 
    estimation using a Gaussian kernel with Euclidean distance, centered on the visual words. Used
    to compute a soft-assignment "histogram". See Van Gemert (2008).
    @param descriptor: the 128-dimensional SIFT descriptor under consideration
    @param word: the 128-dimensional visual word from the vocabulary under consideration
    @param const: the constant (for given std) appearing in front of the Gaussian density.
    Pre-computed for efficiency reasons
    @param sigma_2: the variance of the Gaussian kernel (aka shape). Fixed for given std, can then be pre-computed
    for efficiency reasons
    @return: the value of the proposed kernel between the descriptor and the word, a scalar NumPy array
    """
    return const * np.exp(- np.linalg.norm(descriptor - word)**2 / (2 * sigma_2))

In [56]:
def fit_and_predict_knn(clf, sift, vocabulary, train_data, labels, testdata_path, classes):
    """
    Fit a classifier object on a train set and formulate predictions for images in a test set.
    To be used with the k-neighbors classifier.
    @param clf: a classifier object implementing "fit" and "predict" methods, like any classifier
    conforming to the Scikit-Learn API
    @param sift: an object of type cv2.xfeatures2d_SIFT, to extract the SIFT descriptors
    @param vocabulary: a NumPy array containing the visual words of the vocabulary
    @param train_data: a NumPy array for the train dataset, of size [n_samples, n_features]
    @param labels: a dictionary mapping the training images' numbers to their respective class label
    @param testdata_path: a string for the directory containing the test images to evaluate
    @param classes: a dictionary of class_string: class_number pairs
    @return: a tuple (y_true, y_pred), the former being the list of true labels, the latter being the 
    list of predicted labels
    """
    # allocate lists
    y_pred = list()
    y_true = list()
    # get the number of visual words
    k = len(vocabulary)
    
    # fit the classifier object using training data
    clf.fit(train_data, np.array([label for label in labels.values()]))

    count = 0  # the image id
    # iterate over the images of the test set
    for image, class_name in tqdm(image_generator(testdata_path)):
        # extract SIFT features
        _, des = sift.detectAndCompute(image, None)
        # map each feature to its closest visual word
        words, _ = vq(des, vocabulary, check_finite=False)
        # build a histogram of visual word occurences inside the test image
        histogram = np.histogram(words, bins=np.arange(k + 1))[0].reshape(1, k).astype(np.float64)
        # normalize the histogram
        histogram /= np.sum(histogram)
        # use the classifier to formulate a prediction on the current image
        pred = clf.predict(histogram)
        # update lists
        y_pred.append(int(pred))
        y_true.append(classes[class_name])
        
    return y_true, y_pred

In [39]:
def fit_and_predict_linear_svm(sift, vocabulary, train_data, labels, testdata_path, classes, distance=None, kernel=None):
    """
    Fit a linear SVM on a train set and formulate predictions for images in a test set.
    In particular, the one-vs-rest approach is used and 15 different classifiers are trained.
    @param sift: an object of type cv2.xfeatures2d_SIFT, to extract the SIFT descriptors
    @param vocabulary: a NumPy array containing the visual words of the vocabulary
    @param train_data: a NumPy array for the train dataset, of size [n_samples, n_features]
    @param labels: a dictionary mapping the training images' numbers to their respective class label
    @param testdata_path: a string for the directory containing the test images to evaluate
    @param std_devs: a NumPy array containing the stds of the training features, of size [1, num_visual_words]
    @param classes: a dictionary of class_string: class_number pairs
    @return: a tuple (y_true, y_pred), the former being the list of true labels, the latter being the 
    list of predicted labels
    """
    # allocate lists
    y_pred = list()
    y_true = list()
    # get the number of visual words
    k = len(vocabulary)
    
    # fit 15 one-vs-rest classifier objects using training data
    num_classes = len(classes)
    classifiers = [SVC(kernel="linear") for _ in range(num_classes)]
    curr_label = 0
    for clf in classifiers:
        # use positive label for the current "one" class and negative for the "rest" classes
        clf = clf.fit(histograms, np.array([1 if label == curr_label else -1 for label in labels.values()]))
        curr_label += 1

    count = 0  # the image id
    # iterate over the images of the test set
    for image, class_name in tqdm(image_generator(testdata_path)):
        # extract SIFT features
        _, des = sift.detectAndCompute(image, None)
        # map each feature to its closest visual word
        words, _ = vq(des, vocabulary, check_finite=False)
        # build a histogram of visual word occurences inside the test image
        histogram = np.histogram(words, bins=np.arange(k + 1))[0].reshape(1, k).astype(np.float64)
        # normalize the histogram
        histogram /= np.sum(histogram)
        # use the classifier to formulate a prediction on the current image
        # compute the distance from each hyperplane
        preds = np.array([np.dot(clf.coef_, histogram.reshape(k, 1)) + clf.intercept_ for clf in classifiers])
        # update lists
        y_pred.append(np.argmax(preds))  # assign image to the class with the highest real-valued output
        y_true.append(classes[class_name])
        
    return y_true, y_pred

In [38]:
def fit_and_predict_kernel_svm(sift, vocabulary, train_data, labels, testdata_path, classes, distance, kernel):
    """
    Fit a non-linear SVM on a train set and formulate predictions for images in a test set.
    @param sift: an object of type cv2.xfeatures2d_SIFT, to extract the SIFT descriptors
    @param vocabulary: a NumPy array containing the visual words of the vocabulary
    @param train_data: a NumPy array for the train dataset, of size [n_samples, n_features]
    @param labels: a dictionary mapping the training images' numbers to their respective class label
    @param testdata_path: a string for the directory containing the test images to evaluate
    @param std_devs: a NumPy array containing the stds of the training features, of size [1, num_visual_words]
    @param classes: a dictionary of class_string: class_number pairs
    @param distance: distance function to compute the kernel from
    @param kernel: kernel function used to perform the "kernel trick"
    @return: a tuple (y_true, y_pred), the former being the list of true labels, the latter being the 
    list of predicted labels
    """
    # allocate lists
    y_pred = list()
    y_true = list()
    # get the number of visual words
    k = len(vocabulary)
    
    # pre-compute the Gram matrix among training instances to be
    # fed to the "fit" function
    gram = compute_Gram(train_data, distance, k)
    # since this function is supposed to be used with Chi-squared kernel
    A = np.mean(gram[np.triu_indices(np.shape(train_data)[0])])  # scale parameter
    gram = np.exp(- (gram / A))  # generalized Gaussian kernel
    
    # fit 15 one-vs-rest classifier objects using training data
    num_classes = len(classes)
    svc = OneVsRestClassifier(SVC(kernel="precomputed"), n_jobs=-1)
    svc = svc.fit(gram, np.array([label for label in labels.values()]))

    count = 0  # the image id
    # iterate over the images of the test set
    for image, class_name in tqdm(image_generator(testdata_path)):
        # extract SIFT features
        _, des = sift.detectAndCompute(image, None)
        # map each feature to its closest visual word
        words, _ = vq(des, vocabulary, check_finite=False)
        # build a histogram of visual word occurences inside the test image
        histogram = np.histogram(words, bins=np.arange(k + 1))[0].reshape(1, k).astype(np.float64)
        # normalize the histogram
        histogram /= np.sum(histogram)
        # use the classifier to formulate a prediction on the current image
        pred = svc.predict(np.array([kernel(histogram[0, :], histograms[num, :], k, A) for num in range(num_obs)]).reshape(1, -1))
        # update lists
        y_pred.append(int(pred))  # assign image to the class with the highest real-valued output
        y_true.append(classes[class_name])
        
    return y_true, y_pred

In [25]:
def soft_assignment(descriptors, sift, labels, testdata_path, classes, count, vocabulary):
    """
    Fit a linear one-vs-rest SVM on a train set and formulate predictions for images in a test set.
    Histograms are computed using the soft-assignment rule proposed by Van Gemert (2008).
    @param descriptors: a pandas DataFrame containing the train descriptors and metadata
    @param sift: an object of type cv2.xfeatures2d_SIFT, to extract the SIFT descriptors
    @param labels: a dictionary mapping the training images' numbers to their respective class label
    @param testdata_path: a string for the directory containing the test images to evaluate
    @param classes: a dictionary of class_string: class_number pairs
    @param count: the total number of training images
    @param vocabulary: a NumPy array containing the visual words of the vocabulary
    @return: a tuple (y_true, y_pred), the former being the list of true labels, the latter being the 
    list of predicted labels
    """
    # allocate the train histograms
    k = len(vocabulary)
    histograms = np.zeros((count, k))
    # compute kernel invariants; these quantities are fixed for a given sigma in the soft kernel,
    # and so are pre-computed for efficiency reasons. In particular, we follow the advice of the
    # aforementioned paper to set sigma between 100 and 200
    sigma = 150
    sigma_2 = 150**2
    const = (1 / (sigma * (2 * pi)**(1 / 2)))  # term in front of the Gaussian density

    # iterate over the train images and compute the distance-weighted histograms, where each descriptor
    # contributes to each bin according to the kernel, and not only to the closest one
    for image in tqdm(descriptors["image"].unique()):
        desc = np.array([des for des in descriptors[descriptors["image"] == image]["data"]])
        words = np.array([[soft_kernel(d, word, const, sigma_2) for word in vocabulary] for d in desc])
        histograms[image] = np.sum(words, axis=0)
        
    # normalize histograms
    sums = np.sum(histograms, axis=1).reshape(num_obs, 1)
    histograms /= sums
    
    # create and fit a linear SVM to the data
    # create and fit a histogram intersection SVM to the data
    svc = OneVsRestClassifier(SVC(kernel="precomputed"), n_jobs=-1)
    gram = compute_Gram(histograms, histogram_intersection)
    svc = svc.fit(gram, np.array([label for label in labels.values()]))

    y_pred = list()
    y_true = list()

    # iterate over the test set
    for image, class_name in tqdm(image_generator(testdata_path)):
        # extract features
        _, desc = sift.detectAndCompute(image, None)
        # compute histogram using soft assignment
        words = np.array([[soft_kernel(d, word, const, sigma_2) for word in vocabulary] for d in desc])
        histogram = np.sum(words, axis=0).reshape(1, k)
        # normalize
        histogram /= np.sum(histogram)
        # predict and update
        pred = svc.predict(np.array([histogram_intersection(histogram[0, :], histograms[num, :]) for num in range(num_obs)]).reshape(1, -1))
        y_pred.append(int(pred))
        y_true.append(classes[class_name])
        
    return y_true, y_pred

In [40]:
def spatial_pyramid_kernel(descriptors, sift, labels, classes, count, vocabulary, paths, levels=3, weights={0: np.array([0.25]), 1: np.array([0.15]), 2: np.array([0.5])}):
    """
    Fit a linear one-vs-rest SVM on a train set and formulate predictions for images in a test set.
    The algorithm followed is the Spatial Pyramid Kernel proposed by Lazebnik et al., 2006.
    @param descriptors: a pandas DataFrame containing the train descriptors and metadata
    @param sift: an object of type cv2.xfeatures2d_SIFT, to extract the SIFT descriptors
    @param labels: a dictionary mapping the training images' numbers to their respective class label
    @param classes: a dictionary of class_string: class_number pairs
    @param count: the total number of training images
    @param vocabulary: a NumPy array containing the visual words of the vocabulary
    @param paths: a tuple of strings for the directories containing the training and test images
    @param levels: the number of levels of the SPK
    @param weights: a dictionary mapping each level to its weight (a scalar NumPy array)
    @return: a tuple (y_true, y_pred), the former being the list of true labels, the latter being the 
    list of predicted labels
    """
    # unpack paths, get number of visual words
    traindata_path, testdata_path = paths
    k = len(vocabulary)
    # compute total number of features across all the subregions
    ext_k = k * sum([4**l for l in range(levels)])
    
    # allocate "extended" histograms
    histograms = np.zeros((count, ext_k))

    # compute extended histograms
    count = 0
    for image, _ in tqdm(image_generator(traindata_path)):
        start = 0
        a, b, _ = np.shape(image)
        # for each level of the pyramid
        for l in range(levels):
            # get width and height of the subregions
            x_step = floor(a / (2**l))
            y_step = floor(b / (2**l))
            w = weights[l]
            # for each subregion of the current level
            for _ in range(1, 2**l + 1):
                x = 0
                for _ in range(1, 2**l + 1):
                    y = 0
                    # extract descriptors considering only the subregion
                    _, des = sift.detectAndCompute(image[x:x+x_step, y:y+y_step], None)
                    # there might be some subregions having no descriptors; just fill the corresponding
                    # histogram with zeros and move to the next iteration
                    if not len(_):
                        histograms[count, start:start+k] = np.zeros((1, k))
                        start += k
                        y += y_step
                        continue
                    # quantize descriptors
                    words, _ = vq(des, vocabulary, check_finite=False)
                    # concatenate current histogram
                    histograms[count, start:start+k], _ = w * np.histogram(words, bins=np.arange(k + 1))
                    start += k
                    y += y_step
                x += x_step
        count += 1
    
    # normalize histograms
    sums = np.sum(histograms, axis=1).reshape(num_obs, 1)
    histograms /= sums
    
    y_pred = list()
    y_true = list()
    
    # create and fit a histogram intersection SVM to the data
    svc = OneVsRestClassifier(SVC(kernel="precomputed"), n_jobs=-1)
    gram = compute_Gram(histograms, histogram_intersection)
    svc = svc.fit(gram, np.array([label for label in labels.values()]))

    # for each test image, perform prediction pipeline
    for image, class_name in tqdm(image_generator(testdata_path)):
        a, b, _ = np.shape(image)
        histogram = np.zeros((1, ext_k))
        start = 0
        # for each level of the pyramid
        for l in range(levels):
            # get height and width of the subregions
            x_step = floor(a / (2**l))
            y_step = floor(b / (2**l))
            w = weights[l]
            # for each subregion of the current level
            for _ in range(1, 2**l + 1):
                x = 0
                for _ in range(1, 2**l + 1):
                    y = 0
                    # extract descriptors from the subregion
                    _, des = sift.detectAndCompute(image[x:x+x_step, y:y+y_step], None)
                    # there might be regions with no descriptors computed; just fill the histogram
                    # with zeros and move to the next iteration
                    if not len(_):
                        histogram[0, start:start+k] = np.zeros((1, k))
                        start += k
                        y += y_step
                        continue
                    # quantize the descriptors into histograms of visual words and concatenate
                    words, _ = vq(des, kmeans.cluster_centers_, check_finite=False)
                    histogram[0, start:start+k], _ = w * np.histogram(words, bins=np.arange(k + 1))
                    start += k
                    y += y_step
                x += x_step
        # normalize
        histogram /= np.sum(histogram)
        # predict using a histogram intersection kernel
        pred = svc.predict(np.array([histogram_intersection(histogram[0, :], histograms[num, :]) for num in range(num_obs)]).reshape(1, -1))
        # update
        y_pred.append(int(pred))
        y_true.append(classes[class_name])
        
    return y_true, y_pred

In [13]:
def build_confusion_matrix(y_true, y_pred, num_classes):
    """
    Construct a confusion matrix.
    @param y_true: a list with the true labels for each test example
    @param y_pred: a list with the predicted labels for each test example
    @param num_classes: the total number of different classes
    @return: a NumPy array for the confusion matrix, of size [num_classes, num_classes]
    """
    matrix = np.zeros((num_classes, num_classes))
    for num in range(len(y_true)):
        matrix[y_true[num], y_pred[num]] += 1
    return matrix

In [14]:
def plot_confusion_matrix(confusion_matrix, classes, name=None, cmap=plt.cm.Blues):
    """
    Produce a plot for a given confusion matrix, normalizing with respect to the true
    labels, with the true class on the y-axis and the predicted class on the x-axis.
    @param confusion_matrix: the confusion matrix to plot
    @param classes: a dictionary of class_string: class_number pairs
    @param name: the name used to save the plot, including the extension
    @param cmap: the colormap to use when plotting
    @return: None, just plot
    """
    confusion_matrix /= np.sum(confusion_matrix, axis=1)
    plt.imshow(confusion_matrix, interpolation="nearest", cmap=cmap)
    plt.title("normalized confusion matrix")
    plt.colorbar()
    marks = np.arange(len(classes))
    plt.xticks(marks, list(classes.keys()), rotation=90)
    plt.yticks(marks, list(classes.keys()))
    plt.xlabel("predicted class")
    plt.ylabel("true class")
    if name:
        plt.savefig(name)

In [15]:
def accuracy(confusion_matrix):
    """
    Compute the accuracy on a dataset from a confusion matrix, as the
    sum of the diagonal entries.
    @param confusion_matrix: the output of the evaluation of a classifier on a test set
    @return: the accuracy as a float
    """
    return np.sum(np.diag(confusion_matrix)) / np.sum(confusion_matrix)

In [None]:
# MAIN CELL

# paths and important global variables
traindata_path = "./data/train"
testdata_path = "./data/test" 
k = 50
n_sampled = 100000
classes = {os.listdir(traindata_path)[num]: num - 1 for num in range(1, len(os.listdir(traindata_path)))}

# compute descriptors
descriptors = get_descriptors(traindata_path)
# quantize descriptors into visual words
kmeans = get_visual_words(descriptors, k, n_sampled, descriptors.shape[0])
# compute histograms
histograms = get_histograms(descriptors, count, kmeans.cluster_centers_)
num_obs, _ = np.shape(histograms)
bins = np.arange(k + 1)
# normalize
sums = np.sum(histograms, axis=1).reshape(num_obs, 1)
histograms /= sums

# RUN TESTS: UNCOMMENT/COMMENT WHERE NECESSARY
# run KNN test
knn = KNeighborsClassifier(n_neighbors=1, metric=histEMD, n_jobs=-1)
y_true, y_pred = fit_and_predict_knn(knn, sift, kmeans3.cluster_centers_, histograms, labels, testdata_path, classes)
# run linear SVM test
#y_true, y_pred = fit_and_predict_linear_svm(sift, kmeans.cluster_centers_, histograms, labels, testdata_path, classes)
# run chisquared SVM test
#y_true, y_pred = fit_and_predict_kernel_svm(sift, kmeans.cluster_centers_, histograms, labels, testdata_path, classes, distance=chisquared_distance, kernel=chisquared_kernel)
# run soft assignment test
#y_true, y_pred = soft_assignment(descriptors, sift, labels, testdata_path, classes, count, kmeans.cluster_centers_)
# run SPK test
#y_true, y_pred = spatial_pyramid_kernel(descriptors, sift, labels, classes, count, kmeans.cluster_centers_, (traindata_path, testdata_path))

# compute confusion matrix and visualize results
cm = build_confusion_matrix(y_true, y_pred, len(classes))
print(accuracy(cm))
plot_confusion_matrix(cm, classes)