In [None]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.preprocessing import normalize

import numpy as np
import cv2
from google.colab.patches import cv2_imshow

import glob
import os
%matplotlib inline 
from matplotlib import pyplot as plt
import random

Upload the `images.zip` file to colab and unzip using the cells below. Remember that the file upload is ephemeral, so the files are uploaded only for each session.

In [None]:
# from google.colab import files
# uploaded = files.upload()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# !unzip images.zip
!unzip /content/drive/MyDrive/images.zip

The **image** folder contains 5 sub-directories each of which contains images from one of the following image classes: airplanes, cars, dog, faces and keyboard. In each class there are 80 images, the first 60 will be used for training and the rest will be used for testing. 

An example of the images can be seen below:

In [None]:
## example of 5 images from each class
example_img = list()
for root, dirs, _ in os.walk('image/'):
    for class_folder in dirs:
      img_files = os.listdir(os.path.join(root, class_folder))
      img_files = [
          os.path.join(*[root, class_folder, filename]) for filename in img_files
          ]
      example_img.extend(img_files[:5])

img_lst = list()
for imgf in example_img:
    img = cv2.imread(imgf)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_lst.append(img)

f, axarr = plt.subplots(5,5,figsize=(15, 15))
axarr = axarr.flatten()
for img, ax in zip(img_lst, axarr):
    ax.imshow(img)
plt.show()

SIFT (Scale Invariant Fourier Transform) Detector is used in the detection of interest points on an input image. It allows identification of localized features in images. The following script generates images with keypoints for the above examples.

In [None]:
f, axarr = plt.subplots(5,5,figsize=(15, 15))
axarr = axarr.flatten()
for img, ax in zip(img_lst, axarr):
    # Converting image to grayscale
    gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    # Applying SIFT detector
    sift = cv2.xfeatures2d.SIFT_create()
    kp = sift.detect(gray, None)
    # Marking the keypoint on the image using circles
    img=cv2.drawKeypoints(gray,kp,img,flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
    ax.imshow(img)
plt.show()

The first step is to compute the SIFT descriptors for all images in the data directory. We have precomputed the SIFT image descriptors for 900 predifined patches in each image. These can be imported from `all_features.mat` file.

In [None]:
label = {'airplanes':0, 'cars':1, 'dog':2, 'faces':3, 'keyboard':4}

In [None]:
from scipy.io import loadmat

mat = loadmat('all_features.mat')
train_des = mat['TrainMat']
test_des = mat['TestMat']
train_label = list()
test_label = list()
for i in range(5):
    train_label.extend([i] * 60)
    test_label.extend([i] * 20)

In [None]:
train_des = train_des.reshape((300, -1, 128))
test_des = test_des.reshape((100, -1, 128))

## 2. Dictionary Creation - Feature Quantization

Task 1: Using [sklearn KMeans](https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html), create a dictionary by clustering a **subset** (eg. 6000) of the extracted descriptors. 

Use a dictionary of 500 words and the `elkan` algorithm to compute the dictionary. 

In [None]:
### YOUR CODE HERE
# Step 1: create an array (N x 128) of all descriptors in the training set (not by image)
train_des_2 = train_des.reshape((-1, 128))

# Step 2: use Kmeans to create the Dictionary (the resulting Dictionary should have K words and 128 features)
random_indices = random.sample(range(train_des_2.shape[0]), k=60000)
subset = train_des_2[(random_indices)]

# To create dictionary from subset
kmeans = KMeans(n_clusters=500, algorithm='elkan').fit(subset)

Task 2: Assign each image descriptor in the training and test sets, to the nearest codeword cluster.

In [None]:
### YOUR CODE HERE
test_des_2 = test_des.reshape((-1, 128))

train_cluster = kmeans.predict(train_des_2)
test_cluster = kmeans.predict(test_des_2)

## 3. Image Representation using BoW
Task 3: Represent each image in the training and the test dataset as a histogram of visual words (i.e. represent each image using the Bag of Words representation). Normalise the histograms by their L1 norm.

In [None]:
### YOUR CODE HERE
# Step 1: For each image, create a histogram of the descriptors
# i.e. a histogram of the allocated clusters
train_hist = list()
test_hist = list()

train_cluster = train_cluster.reshape((300, -1))
test_cluster = test_cluster.reshape((100, -1))

for i in range(300):
  hist = np.zeros(500, dtype="int")
  for val in train_cluster[i]:
    hist[val] = hist[val] + 1
  train_hist.append(hist)

for j in range(100):
  hist = np.zeros(500, dtype="int")
  for val in test_cluster[j]:
    hist[val] = hist[val] + 1
  test_hist.append(hist)

train_hist = np.array(train_hist)
test_hist = np.array(test_hist)


# Step 2: Normalize by the L1 norm of the vector
l1_norm = train_hist[0].sum()
train_hist = train_hist/l1_norm
test_hist = test_hist/l1_norm

## 4. Image Classification using a Nearest Neighbour Classifier
Task 4: Implement the Euclidean distance for the multi-dimensional case. Using sklearn [KNN classifier](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html) and the distance implemented  (passed using keyword argument `metric`, check KNN API for details), train a model on the train set BoW, for K=1

In [None]:
def euclidean_distance(hist_1: np.array, hist_2: np.array):
    """
      hist_1: a 1D vector representing a histogram
      hist_2: a 1D vector representing a histogram
      returns:
      The distance between two histograms (float)
    """
    dist = 0
    ### YOUR CODE HERE
    diff = ((hist_2 - hist_1) ** 2)
    sum = diff.sum()
    dist = np.sqrt(sum)
    return dist

### YOUR CODE HERE
euclidean_classifier = KNeighborsClassifier(n_neighbors=1, metric=euclidean_distance)
euclidean_classifier.fit(train_hist, np.array(train_label))

Task 5: Implement a method for histogram intersection.

In [None]:
def hist_intersection(hist_1: np.array, hist_2: np.array):
  """
    hist_1: a 1D vector representing a histogram
    hist_2: a 1D vector representing a histogram
    returns:
    The distance between two histograms (float)
  """
  dist = 0.
  ### YOUR CODE HERE
  dist = np.minimum(hist_1, hist_2).sum()
  return 1 - dist

Task 6: Train a second classifier using the `hist_intersection()` as the distance metric.

In [None]:
### YOUR CODE HERE
hist_intersection_classifier = KNeighborsClassifier(n_neighbors=1, metric=hist_intersection)
hist_intersection_classifier.fit(train_hist, np.array(train_label))

In [None]:
# Evaluating the euclidean classifier on the test set
euclidean_pred = euclidean_classifier.predict(test_hist)

euclidean_score = accuracy_score(test_label, euclidean_pred, normalize=False)
print("Accuracy for euclidean classifier is {}%".format(euclidean_score))

euclidean_cm = confusion_matrix(test_label, euclidean_pred)
euclidean_cm_display = ConfusionMatrixDisplay(euclidean_cm, display_labels=list(label)).plot()

In [None]:
# Evaluating the histogram intersection based classifier on the test set
hist_intersection_pred = hist_intersection_classifier.predict(test_hist)

hist_intersection_score = accuracy_score(test_label, hist_intersection_pred, normalize=False)
print("Accuracy for hist_intersection classifier is {}%".format(hist_intersection_score))

hist_intersection_cm = confusion_matrix(test_label, hist_intersection_pred)
hist_intersection_cm_display = ConfusionMatrixDisplay(hist_intersection_cm, display_labels=list(label)).plot()

## 5. Dictionary size

Task 7: Repeat steps 1-6 using a very small dictionary size (eg. 5). Compute the accuracy and confusion matrices.

In [None]:
### YOUR CODE HERE
# Step 1
kmeans = KMeans(n_clusters=5, algorithm='elkan').fit(subset)

# Step 2
train_cluster2 = kmeans.predict(train_des_2)
test_cluster2 = kmeans.predict(test_des_2)

In [None]:
# Step 3
train_hist2 = list()
test_hist2 = list()

train_cluster2 = train_cluster2.reshape((300, -1))
test_cluster2 = test_cluster2.reshape((100, -1))

for i in range(300):
  hist = np.zeros(500, dtype="int")
  for val in train_cluster2[i]:
    hist[val] = hist[val] + 1
  train_hist2.append(hist)

for j in range(100):
  hist = np.zeros(500, dtype="int")
  for val in test_cluster2[j]:
    hist[val] = hist[val] + 1
  test_hist2.append(hist)

train_hist2 = np.array(train_hist2)
test_hist2 = np.array(test_hist2)


# Normalize by the L1 norm of the vector
l1_norm = train_hist2[0].sum()
train_hist2 = train_hist2/l1_norm
test_hist2 = test_hist2/l1_norm

In [None]:
# Step 4
euclidean_classifier2 = KNeighborsClassifier(n_neighbors=1, metric=euclidean_distance)
euclidean_classifier2.fit(train_hist2, np.array(train_label))

# Step 5
hist_intersection_classifier2 = KNeighborsClassifier(n_neighbors=1, metric=hist_intersection)
hist_intersection_classifier2.fit(train_hist2, np.array(train_label))

In [None]:
# Step 6 - euclidean classifier
# Evaluating the euclidean classifier on the test set
euclidean_pred2 = euclidean_classifier2.predict(test_hist2)

euclidean_score2 = accuracy_score(test_label, euclidean_pred2, normalize=False)
print("Accuracy for euclidean classifier when k=5 is {}%".format(euclidean_score2))

euclidean_cm2 = confusion_matrix(test_label, euclidean_pred2)
euclidean_cm_display2 = ConfusionMatrixDisplay(euclidean_cm2, display_labels=list(label)).plot()

In [None]:
# Step 6 - histogram intersection classifier
hist_intersection_pred2 = hist_intersection_classifier2.predict(test_hist2)

hist_intersection_score2 = accuracy_score(test_label, hist_intersection_pred2, normalize=False)
print("Accuracy for hist_intersection classifier when k=5 is {}%".format(hist_intersection_score2))

hist_intersection_cm2 = confusion_matrix(test_label, hist_intersection_pred2)
hist_intersection_cm_display2 = ConfusionMatrixDisplay(hist_intersection_cm2, display_labels=list(label)).plot()

## 6. Support Vector Machines
In this section we will train a linear [SVM classifier](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html).


Task 8: Using [Grid Search](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV) select optimal hyperparameters `C` and `gamma`. 

Evaluate SVC classifier on the test set (in terms of accuracy and confusion matrices) and compare to the KNN classifier results.
For each class show some images that are correctly classified and some images that are incorrectly classified.

In [None]:
parameters = {'gamma':[2**x for x in np.arange(-1, 1.6, 0.1)], 'C':[2**x for x in range(1, 11)]}

### YOUR CODE HERE
svclassifier = svm.SVC()
clf = GridSearchCV(svclassifier, parameters)
clf.fit(train_hist, train_label)

svc_pred = clf.predict(test_hist)
svc_score = accuracy_score(test_label, svc_pred, normalize=False)
print("Accuracy for SVM classifier is {}%".format(svc_score))

svc_cm = confusion_matrix(test_label, svc_pred)
svc_cm_display = ConfusionMatrixDisplay(svc_cm, display_labels=list(label)).plot()

In [None]:
classifications = np.where(test_label == svc_pred, True, False)
classifications = classifications.reshape((5, -1))

# Get the index of the good and bad predictions for all the classes
# Adding 61 to the index values to offset the first 60 training images and 1 because the images are labelled starting with 1 and not 0.
good_airplanes = np.where(classifications[0])[0] + 61
bad_airplanes = np.where(classifications[0] == False)[0] + 61

good_cars = np.where(classifications[1])[0] + 61
bad_cars = np.where(classifications[1] == False)[0] + 61

good_dogs = np.where(classifications[2])[0] + 61
bad_dogs = np.where(classifications[2] == False)[0] + 61

good_faces = np.where(classifications[3])[0] + 61
bad_faces = np.where(classifications[3] == False)[0] + 61

good_keyboards = np.where(classifications[4])[0] + 61
bad_keyboards = np.where(classifications[4] == False)[0] + 61

In [None]:
print("Airplanes - correct predictions - image numbers", good_airplanes)
print("Airplanes - wrong predictions - image numbers", bad_airplanes)
print("------------------------------------------")
print("Cars - correct predictions - image numbers", good_cars)
print("Cars - wrong predictions - image numbers", bad_cars)
print("------------------------------------------")
print("Dog - correct predictions - image numbers", good_dogs)
print("Dog - wrong predictions - image numbers", bad_dogs)
print("------------------------------------------")
print("Faces - correct predictions - image numbers", good_faces)
print("Faces - bad predictions - image numbers", bad_faces)
print("------------------------------------------")
print("Keyboard - correct predictions for images", good_keyboards)
print("Keyboards - bad predictions - image numbers", bad_keyboards)

In [None]:
# Get the id of the images that were wrongly classified and the class they were wrongly classified into
gt_pred = np.stack((np.array(test_label), svc_pred), axis=1)
gt_pred = gt_pred.reshape((5, -1, 2))

result = []
for class_pred in gt_pred:
  class_result = []
  for j, val in enumerate(class_pred):

    if (val[0] != val[1]):
      class_result.append([j + 61, val[1]])
  result.append(class_result)

result