In [None]:
# Adds SIFT and SURF functionalities.
import sys
sys.path.append("/usr/local/lib/python3.7/site-packages/")

In [None]:
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import numpy as np
import numpy.linalg as lin

In [None]:
import utils        # Utility to measure code execution
                    # and plot confusion matrices.
import vocabulary   # Read images and produce visual vocabulary.
import classifiers  # Implements different classifiers.

In [None]:
# Reload modules without reloading the kernel.
import importlib
importlib.reload(classifiers)

In [None]:
help(vocabulary.k_means_words)

---

# Main

## 1. and 2.: reading images, computing visual words and histograms

In [None]:
train_path = "./dataset/train"
test_path = "./dataset/test"

n_clusters = 100 # Size of dictionary.
n_descriptors = 100000

In [None]:
with utils.codeTimer("Build train set"):
    
    # Read images and compute descriptors, saving them in a dataframe.
    train_df = vocabulary.compute_descriptors(train_path)
    
    print("Total number of {}-dimensional descriptors: {}"
          .format(len(train_df['descriptor'][0]), len(train_df)))

    # Compute kmeans clustering using descriptors to obtain visual dictionary.

    # Note: cluster centers are stored in this object,
    # and can be obtained using `kmeans.cluster_centers_`.
    kmeans = vocabulary.k_means_words(train_df, n_clusters, n_descriptors)
    
    # Aggregate descriptor info, making dataframe more compact.
    # Now the third column contains the list of descriptors.
    train_df = train_df.groupby(['image_id', 'label'],
                                as_index = False).agg({'descriptor':
                                                       (lambda x: list(x))})

    # Compute histograms and add them to dataframe.
    train_histograms = vocabulary.compute_histogram(train_df, kmeans)
    train_df['histogram'] = train_histograms

In [None]:
# Perform analogous operations to compute histograms for test set,
# using words extracted from train.

with utils.codeTimer("Build test set"):

    test_df = vocabulary.compute_descriptors(test_path)

    test_df = test_df.groupby(['image_id', 'label'],
                                as_index = False).agg({'descriptor':
                                                       (lambda x: list(x))})

    # Note, kmeans has not been recomputed, the training one is used.
    test_histograms = vocabulary.compute_histogram(test_df, kmeans)
    test_df['histogram'] = test_histograms

----

## 3.: Nearest Neighbor classifier

In [None]:
with utils.codeTimer("NN classifier"):
    true, predicted = nn_classifier(train_df, test_df)
    
# Adding predicted lables to dataframe.
test_df["predicted"] = predicted

In [None]:
utils.plot_confusion_matrix(true, predicted,
                            title = "Confusion matrix: NN classifier")

----

## 4. and 5.: linear SVM

In [None]:
with utils.codeTimer("Linear SVM classifier"):
    true, predicted = classifiers.linear_SVM_classifier(train_df, test_df)
    
# Adding predicted lables to dataframe.
test_df["predicted"] = predicted

In [None]:
utils.plot_confusion_matrix(true, predicted,
                            title = "Confusion matrix: linear SVM")

----

## 6. and 7.: Gaussian SVM

In [None]:
with utils.codeTimer("Gaussian SVM classifier, chi squared distance"):
    true, predicted = classifiers.gaussian_SVM_classifier(train_df, test_df,
                                                          dist = 'chi')
    
# Adding predicted lables to dataframe.
test_df["predicted"] = predicted

In [None]:
utils.plot_confusion_matrix(true, predicted,
                            title = "Confusion matrix: gaussian kernel SVM")

In [None]:
with utils.codeTimer("Gaussian SVM classifier, earth mover distance"):
    true, predicted = classifiers.gaussian_SVM_classifier(train_df, test_df,
                                                          dist = 'emd')
    
# Adding predicted lables to dataframe.
test_df["predicted"] = predicted

In [None]:
utils.plot_confusion_matrix(true, predicted,
                            title = "Confusion matrix: gaussian kernel SVM")

----

## 8.: Error Correcting Output Code

In [None]:
with utils.codeTimer("ECOC SVM classifier"):
    true, predicted = classifiers.ecoc_classifier(train_df, test_df,
                                                  n_classifiers = 100)
    
# Adding predicted lables to dataframe.
test_df["predicted"] = predicted

In [None]:
utils.plot_confusion_matrix(true, predicted,
                            title = "Confusion matrix: ECOC")

----