# Classification.ipynb
Date: November 29th, 2018  
Course: ECSE415, McGill University  
Authors:  
*Shawn Vosburg  
Tristan Bouchard  
Alex Masciotra  
Nayem Alam  
Thomas Philippon *

## NOTE: The classification files folder must be found in the same folder as this code. 

In [1]:
#Import necessary libraries. Strategy: Find HoG features of each image and build SVM with them.
#For second classifier, try Kmeans or Kneighboors
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
#from sklearn.neighbors import KNeighborsClassifier

#Declare Constants
CLASSIFY_DIM = (128,128)   #Size of the training images

### Loading and resizing images

In [11]:
#Import training images
folder = "./MIO-TCD-Classification/train/"
vehicleDir  = [                                  #Main Directory of Training images
#    "articulated_truck",        #10346 imgs
#    "background",               #160000 imgs
    "bicycle",                  #2284 imgs
#    "bus",                      #10316 imgs
#    "car",                      #260518 imgs
    "motorcycle",               #1982 imgs
    "non-motorized_vehicle",    #1751 imgs
    "pedestrian",               #6262 imgs
#    "pickup_truck",             #50906 imgs
    "single_unit_truck",        #5120 imgs
    "work_van"]                 #9679 imgs
#Some vehicletypes are commented out as they take too much time to load. After writing code, will remove comment.

vehicleTypes = {}                                #This is the main hash that will map the vehicles type to a number. Maps string to index.
vehicleImgArr,vehicleLabels = [],[]
idx = 0

#Build Hash Table
for bucket in os.listdir(folder):
    vehicleTypes[bucket] = idx
    idx +=1

#Loop across directory to fetch all images
for typeName in vehicleDir:
    vehicleTypeDir = folder + typeName+"/"
    print("Presently loading " + str(len(os.listdir(vehicleTypeDir))) + " images from: " + typeName)
    
    #fetch images
    for imgPath in os.listdir(vehicleTypeDir):
        img = cv2.imread(vehicleTypeDir+imgPath)
        img = cv2.resize(img,CLASSIFY_DIM)       #Resize images so that they are all CLASSIFY_DIM in size
        vehicleImgArr.append(img)
        vehicleLabels.append(vehicleTypes[typeName])          #Have a seperate array with the labels.

Presently loading 2284 images from: bicycle
Presently loading 1982 images from: motorcycle
Presently loading 1751 images from: non-motorized_vehicle
Presently loading 6262 images from: pedestrian
Presently loading 5120 images from: single_unit_truck
Presently loading 9679 images from: work_van


### Compute HOG features of all images.

In [24]:
def HoGFromImageArr(imgArr,cs,bs,nb):
    """ This function takes in an image array and HoG param and returns the computed histogram for the image array
        imgArr = image array. all images must be same resolution. 
        cs = cell size in pixel x pixel (height x width)
        bc = bin size in cell x cell (height x width)
        nb = number of bins

    """
    arr = []
    # create HoG Object
    # winSize is the size of the image cropped to an multiple of the cell size
    hog = cv2.HOGDescriptor(_winSize=(imgArr[0].shape[1] // cs[1] * cs[1],
                                      imgArr[0].shape[0] // cs[0] * cs[0]),
                            _blockSize=(bs[1] * cs[1],
                                        bs[0] * cs[0]),
                            _blockStride=(cs[1], cs[0]),
                            _cellSize=(cs[1], cs[0]),
                            _nbins=nb)
    
    
    n_cells = (imgArr[0].shape[0] // cs[0], imgArr[0].shape[1] // cs[1])
    for img in imgArr:
        

        # Compute HoG features
        hog_feats = hog.compute(img)\
                       .reshape(n_cells[1] - bs[1] + 1,
                                n_cells[0] - bs[0] + 1,
                                bs[0], bs[1], nb) \
                       .transpose((1, 0, 2, 3, 4))  # index blocks by rows first

        # hog_feats now contains the gradient amplitudes for each direction,for each cell of its group for each group.
        # Indexing is by rows then columns.

        # computation for BlockNorm
        gradients = np.full((n_cells[0], n_cells[1], 8), 0, dtype=float)
        cell_count = np.full((n_cells[0], n_cells[1], 1), 0, dtype=int)

        #Add each contributions to the histogram.
        for off_y in range(bs[0]):
            for off_x in range(bs[1]):
                gradients[off_y:n_cells[0] - bs[0] + off_y + 1,
                          off_x:n_cells[1] - bs[1] + off_x + 1] += \
                    hog_feats[:, :, off_y, off_x, :]
                cell_count[off_y:n_cells[0] - bs[0] + off_y + 1,
                           off_x:n_cells[1] - bs[1] + off_x + 1] += 1

        # Average gradients
        gradients /= cell_count
        arr = np.append(arr,gradients)
    return arr.reshape(len(imgArr),gradients.shape[0],gradients.shape[1],gradients.shape[2])

In [25]:
#Deducing values of required variables. 
#Since HoG features must be (32,32,8) in size, there must be a 2D-array of 32x32 cells with each containing 8 bins.
#Since image is 128x128, each cell's side is 128/32 = 4px in length. (1 cell = 4x4 pixels)
#Blocks must cover a 4x4 cell neighborhood (Assume 50% overlap between blocks).
#Arbitrarly decide number of bins to be 8. 

cell_size = (4,4)  # h x w in pixels
block_size = (4,4)  # h x w in cells
nbins = 8  # number of orientation bins

vehicleHOGArr = HoGFromImageArr(vehicleImgArr,cell_size,block_size,nbins)

KeyboardInterrupt: 

In [18]:
print(len(vehicleImgArr))

27078
