# Installation

## 2. Imports

In [1]:
import importlib
import numpy as np
import os
from sklearn.cluster import KMeans
from tqdm import tqdm
import torch
import torch.nn as nn
import math
from torch.nn.modules.activation import ReLU
import torch.nn.functional as F
import datetime
import json
import random

In [None]:
from src.dataset import ImageDataModule

# Dataset

In [5]:
ImageDataModule = _temp.dataset.ImageDataModule

In [6]:
loader = ImageDataModule()

In [7]:
loader.setup()

Loaded samples into dataset with label 'no'=0 and 'yes'=1


# Training

## 1. Hyper-parameters

In [8]:
EPOCH_NUMBER = 5

# 2. Model

In [9]:
def findnn(D1, D2):
    """
    :param D1: NxD matrix containing N feature vectors of dim. D
    :param D2: MxD matrix containing M feature vectors of dim. D
    :return:
        Idx: N-dim. vector containing for each feature vector in D1 the index of the closest feature vector in D2.
        Dist: N-dim. vector containing for each feature vector in D1 the distance to the closest feature vector in D2
    """
    N = D1.shape[0]
    M = D2.shape[0]  # [k]

    # Find for each feature vector in D1 the nearest neighbor in D2
    Idx, Dist = [], []
    for i in range(N):
        minidx = 0
        mindist = np.linalg.norm(D1[i, :] - D2[0, :])
        for j in range(1, M):
            d = np.linalg.norm(D1[i, :] - D2[j, :])

            if d < mindist:
                mindist = d
                minidx = j
        Idx.append(minidx)
        Dist.append(mindist)
    return Idx, Dist

class Sobel(nn.Module):
    # Sourced from https://github.com/chaddy1004/sobel-operator-pytorch/blob/master/model.py
    def __init__(self):
        super().__init__()
        self.filter = nn.Conv2d(in_channels=1, out_channels=2, kernel_size=1, stride=1, padding='same', bias=False)

        Gx = torch.tensor([[2.0, 0.0, -2.0], [4.0, 0.0, -4.0], [2.0, 0.0, -2.0]])
        Gy = torch.tensor([[2.0, 4.0, 2.0], [0.0, 0.0, 0.0], [-2.0, -4.0, -2.0]])
        G = torch.cat([Gx.unsqueeze(0), Gy.unsqueeze(0)], 0)
        G = G.unsqueeze(1)

        self.filter.weight = nn.Parameter(G, requires_grad=False)

    def forward(self, img):
        #Note there is a Gx and Gy on this
        x = self.filter(img)
        ''' 
        x = torch.mul(x, x)
        x = torch.sum(x, dim=1, keepdim=True)
        x = torch.sqrt(x) '''
        return x


def grid_points(img, nPointsX, nPointsY, border):
    """
    :param img: input gray img, numpy array, [h, w]
    :param nPointsX: number of grids in x dimension
    :param nPointsY: number of grids in y dimension
    :param border: leave border pixels in each image dimension
    :return: vPoints: 2D grid point coordinates, numpy array, [nPointsX*nPointsY, 2]
    """
    vPoints = None  # numpy array, [nPointsX*nPointsY, 2]

    vPoints = np.zeros((nPointsX * nPointsY, 2))
    
    w, h = img.shape


    # Are these integers? Should we make x Points integers
    mult_x = (w - border - 1) / nPointsX

    mult_y = (h - border - 1) / nPointsY

    xPoints = [int(i * mult_x + 8) for i in range(nPointsX)]
    yPoints = [int(i * mult_y + 8) for i in range(nPointsY)]
    # Wrap around i*mult_x + 8 the term int

    counter_g = 0
    for x in xPoints:
        for y in yPoints:
            vPoints[counter_g][0] = x
            vPoints[counter_g][1] = y
            counter_g += 1

    return vPoints


def descriptors_hog(img, vPoints, cellWidth, cellHeight):
    nBins = 8
    w = cellWidth
    h = cellHeight
    
    # to calculate the derivatives from an image
    
    grad_x, grad_y = Sobel()(img.view(-1, 128,128))

    # What is cv2.CV_16S? a numerical type in CV

    # tan^-1(dy  / dx)
    orientation = np.arctan2(grad_x, grad_y) * 180 / np.pi

    _, bin_edges_orientation = np.histogram(orientation, bins=nBins)
    # bin_edges_orientation = np.array([i * 45 for i in range(9)])

    descriptors = (
        []
    )  # list of descriptors for the current image, each entry is one 128-d vector for a grid point

    for point in vPoints:
        # nPointsX * nPointsY
        point_descriptor = []
        for pixel_x in range(w):
            # 4 width
            for pixel_y in range(h):
                # 4 height
                x_coord = int(point[0] - 2 + pixel_x)
                y_coord = int(point[1] - 2 + pixel_y)

                selection = orientation[x_coord, y_coord]
                # 8 for histogram size of 8 bins
                histogram, _ = np.histogram(selection, bins=bin_edges_orientation)
                point_descriptor += [histogram]

        descriptors += [point_descriptor]

    descriptors = np.asarray(
        descriptors
    )  # [nPointsX*nPointsY, 128], descriptor for the current image (100 grid points)

    # Checking that we do have the right shape
    descriptors = np.reshape(descriptors, (vPoints.shape[0], 128))

    # CLEARED FOR DEBUGGING
    return descriptors


def create_codebook(k, iterations):
    """
    :param nameDirPos: dir to positive training images
    :param nameDirNeg: dir to negative training images
    :param k: number of kmeans cluster centers
    :param iterations: maximum iteration numbers for kmeans clustering
    :return: vCenters: center of kmeans clusters, numpy array, [k, 128]
    """
    
    cellWidth = 4
    cellHeight = 4
    nPointsX = 10
    nPointsY = 10
    border = 8

    vFeatures = []
    # list for all features of all images (each feature: 128-d, 16 histograms containing 8 bins)
    # Extract features for all image
    for batch in loader.train_dataloader():
        batch = batch[0]
        grey_batch = batch.mean(axis=1)
        
        for img in grey_batch:
          vPoints = grid_points(img, nPointsX, nPointsY, border)
          descriptors = descriptors_hog(img, vPoints, cellWidth, cellHeight)
          vFeatures += [descriptors]

    vFeatures = np.asarray(vFeatures)  # [n_imgs, n_vPoints, 128]
    vFeatures = vFeatures.reshape(-1, vFeatures.shape[-1])  #[n_imgs*n_vPoints, 128]
    print("number of extracted features: ", len(vFeatures))

    # Cluster the features using K-Means
    print("clustering ...")
    kmeans_res = KMeans(n_clusters=k, max_iter=iterations).fit(vFeatures)
    vCenters = kmeans_res.cluster_centers_  # [k, 128]

    return vCenters


def bow_histogram(vFeatures, vCenters):
    """
    :param vFeatures: MxD matrix containing M feature vectors of dim. D
    :param vCenters: NxD matrix containing N cluster centers of dim. D
    :return: histo: N-dim. numpy vector containing the resulting BoW activation histogram.
    """

    # TODO

    histo = np.zeros(vCenters.shape[0])
    for descriptor in vFeatures:
        dist = np.linalg.norm(vCenters - descriptor, axis=1)
        chosen_cluster_center = np.argmin(dist)
        histo[chosen_cluster_center] += 1

    # Return a histogram based on the cluster centers
    # Error was in labeling np.linalg norm with axis = 0 instea of 1
    return histo


def create_bow_histograms(label, vCenters, loader):
    """
    :param label: dir of input images
    :param vCenters: kmeans cluster centers, [k, 128] (k is the number of cluster centers)
    :return: vBoW: matrix, [n_imgs, k]
    """

    cellWidth = 4
    cellHeight = 4
    nPointsX = 10
    nPointsY = 10
    border = 8

    # Extract features for all images in the given directory
    vBoW = []
    for batch in loader:
        
        # print('processing image {} ...'.format(i + 1))
        for i in range(batch[1].shape[0]):
          
          img_label = batch[1][i]
          if img_label != label: continue
          img = batch[0][i]
    
          img = img.mean(axis=0) # [h, w]

          vPoints = grid_points(img, nPointsX, nPointsY, border)
          descriptors = descriptors_hog(img, vPoints, cellWidth, cellHeight)
          # Adding to the histogram

          vBoW += [bow_histogram(descriptors, vCenters)]

    vBoW = np.asarray(vBoW)  # [n_imgs, k]

    return vBoW


def bow_recognition_nearest(histogram, vBoWPos, vBoWNeg):
    """
    :param histogram: bag-of-words histogram of a test image, [1, k]
    :param vBoWPos: bag-of-words histograms of positive training images, [n_imgs, k]
    :param vBoWNeg: bag-of-words histograms of negative training images, [n_imgs, k]
    :return: sLabel: predicted result of the test image, 0(without car)/1(with car)
    """

    DistPos, DistNeg = None, None

    # Find the nearest neighbor in the positive and negative sets and decide based on this neighbor

    # It shows that all elements in negative have 100, 0,0,0,0,0,0,0 Histograms
    """ DistNeg = np.argmin(np.linalg.norm(vBoWNeg - histogram))
    print('Item with the smallest distance', vBoWNeg[DistNeg])
    print('Histogram', histogram)
    print('Rest of items in set', DistNeg) """

    DistNeg = np.min(np.linalg.norm(vBoWNeg - histogram, axis=1))

    DistPos = np.min(np.linalg.norm(vBoWPos - histogram, axis=1))

    if DistPos < DistNeg:
        sLabel = 1
    else:
        sLabel = 0
    return sLabel


In [10]:
# TODO
k = 20
iterations = 99
print("creating codebook ...")
vCenters = create_codebook(k, iterations)


creating codebook ...
number of extracted features:  22200
clustering ...


In [11]:
print("creating bow histograms (pos) ...")
vBoWPos = create_bow_histograms(1, vCenters, loader.train_dataloader())
print("creating bow histograms (neg) ...")
vBoWNeg = create_bow_histograms(0, vCenters, loader.train_dataloader())

# test pos samples
print("creating bow histograms for test set (pos) ...")
vBoWPos_test = create_bow_histograms(1, vCenters, loader.test_dataloader())  # [n_imgs, k]
result_pos = 0

print("testing pos samples ...")
for i in range(vBoWPos_test.shape[0]):
    cur_label_pos = bow_recognition_nearest(vBoWPos_test[i : (i + 1)], vBoWPos, vBoWNeg)
    result_pos = result_pos + cur_label_pos
acc_pos = result_pos / vBoWPos_test.shape[0]
print("test pos sample accuracy:", acc_pos)

# test neg samples
print("creating bow histograms for test set (neg) ...")
vBoWNeg_test = create_bow_histograms(0, vCenters, loader.test_dataloader())  # [n_imgs, k]

result_neg = 0
print("testing neg samples ...")
for i in range(vBoWNeg_test.shape[0]):
    cur_label_neg = bow_recognition_nearest(vBoWNeg_test[i : (i + 1)], vBoWPos, vBoWNeg)
    result_neg = result_neg + cur_label_neg
acc_neg = 1 - result_neg / vBoWNeg_test.shape[0]
print("test neg sample accuracy:", acc_neg)

overall_accuracy = ((acc_neg *vBoWNeg_test.shape[0]) + (acc_pos * vBoWPos_test.shape[0])) / (vBoWPos_test.shape[0] + vBoWNeg_test.shape[0])
print(f"Overall accuracy: {overall_accuracy}")


creating bow histograms (pos) ...
creating bow histograms (neg) ...
creating bow histograms for test set (pos) ...
testing pos samples ...
test pos sample accuracy: 0.875
creating bow histograms for test set (neg) ...
testing neg samples ...
test neg sample accuracy: 1.0
Overall accuracy: 0.9285714285714286
