In [10]:
import numpy as np 
from skimage.transform import pyramid_gaussian
from imutils.object_detection import non_max_suppression
import imutils
from skimage.feature import hog
from sklearn.externals import joblib
from skimage.io import imread
from sklearn.svm import LinearSVC
import cv2
from config import *
from skimage import color
import matplotlib.pyplot as plt 
import os 
import glob

### Path to Directories

In [11]:
pos_im_path = '../data/images/pos_person'
neg_im_path = '../data/images/neg_person'
pos_feat_ph = '../data/features/pos'
neg_feat_ph = '../data/features/neg'
model_path = '../data/models/'

###  HyperParameters 

In [12]:
min_wdw_sz = [64, 128]
step_size = [10, 10]
orientations = 9
pixels_per_cell = [6, 6]
cells_per_block = [2, 2]
visualize = True
normalize = True
threshold = .3

In [13]:
% load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
# If feature directories don't exist, create them
if not os.path.isdir(pos_feat_ph):
    os.makedirs(pos_feat_ph)
if not os.path.isdir(neg_feat_ph):
    os.makedirs(neg_feat_ph)
# If model directories don't exist, create them
if not os.path.isdir(os.path.split(model_path)[0]):
    os.makedirs(os.path.split(model_path)[0])


## Extract HOG Features

In [15]:
def extract_features():
    des_type = 'HOG'
    print ("Calculating the descriptors for the positive samples and saving them")
    for im_path in glob.glob(os.path.join(pos_im_path, "*")):   
        im = imread(im_path, as_grey=True)
        fd = hog(im, orientations, pixels_per_cell, cells_per_block,'L1')
        #print(fd)
        fd_name = os.path.split(im_path)[1].split(".")[0] + ".feat"
        fd_path = os.path.join(pos_feat_ph, fd_name)
        joblib.dump(fd, fd_path)
    print ("Positive features saved in {}".format(pos_feat_ph))

    print ("Calculating the descriptors for the negative samples and saving them")
    for im_path in glob.glob(os.path.join(neg_im_path, "*")):
        im = imread(im_path, as_grey=True)
        fd = hog(im,  orientations, pixels_per_cell, cells_per_block, 'L1')
        fd_name = os.path.split(im_path)[1].split(".")[0] + ".feat"
        fd_path = os.path.join(neg_feat_ph, fd_name)
        joblib.dump(fd, fd_path)
    print ("Negative features saved in {}".format(neg_feat_ph))
    print ("Completed calculating features from training images")

In [16]:
extract_features()

Calculating the descriptors for the positive samples and saving them


/home/khushal/anaconda2/envs/dl/lib/python3.6/site-packages/skimage/feature/_hog.py:119: skimage_deprecation: Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15
  'be changed to `L2-Hys` in v0.15', skimage_deprecation)


Positive features saved in ../data/features/pos
Calculating the descriptors for the negative samples and saving them
Negative features saved in ../data/features/neg
Completed calculating features from training images


### Total Negative Images = 4146
### Total Positive Images = 2416

## Training th SVM Classifier

In [17]:
def train_svm():
    fds = []
    labels = []
    # Load the positive features
    for feat_path in glob.glob(os.path.join(pos_feat_ph,"*.feat")):
        fd = joblib.load(feat_path)  
        fds.append(fd)
        labels.append(1)

    # Load the negative features
    for feat_path in glob.glob(os.path.join(neg_feat_ph,"*.feat")):
        fd = joblib.load(feat_path)
        fds.append(fd)
        labels.append(0)
    fds=np.asarray(fds)
    #print (fds.shape,len(labels))
    #print(fds[0])
    clf = LinearSVC()
    print ("Training a Linear SVM Classifier")
    clf.fit(fds, labels)
    joblib.dump(clf, model_path+'svm.model')
    print ("Classifier saved to {}".format(model_path))
    
    

        

In [18]:
train_svm()

Training a Linear SVM Classifier
Classifier saved to ../data/models/


### Testing the Detector

In [19]:
min_wdw_sz = (64, 128)
step_size = (10, 10)
downscale = 1.25

In [20]:
def sliding_window(image, window_size, step_size):
     for y in range(0, image.shape[0], step_size[1]):
        for x in range(0, image.shape[1], step_size[0]):
            yield (x, y, image[y: y + window_size[1], x: x + window_size[0]])

In [24]:
def detector(filename):
    im = cv2.imread(filename)
    im = imutils.resize(im, width = min(400, im.shape[1]))
    # Load the Model
    clf = joblib.load(os.path.join(model_path, 'svm.model'))
    detections = []
    scale = 0

    for im_scaled in pyramid_gaussian(im, downscale = downscale):
        #The list contains detections at the current scale
        if im_scaled.shape[0] < min_wdw_sz[1] or im_scaled.shape[1] < min_wdw_sz[0]:
            break
        for (x, y, im_window) in sliding_window(im_scaled, min_wdw_sz, step_size):
            if im_window.shape[0] != min_wdw_sz[1] or im_window.shape[1] != min_wdw_sz[0]:
                continue
            im_window = color.rgb2gray(im_window)
            fd = hog(im_window, orientations, pixels_per_cell, cells_per_block,'L1')

            fd = fd.reshape(1, -1)
            pred = clf.predict(fd)

            if pred == 1:
                
                if clf.decision_function(fd) > 0.5:
                    detections.append((int(x * (downscale**scale)), int(y * (downscale**scale)), clf.decision_function(fd), 
                    int(min_wdw_sz[0] * (downscale**scale)),
                    int(min_wdw_sz[1] * (downscale**scale))))
                 

            
        scale += 1

    clone = im.copy()

    for (x_tl, y_tl, _, w, h) in detections:
        cv2.rectangle(im, (x_tl, y_tl), (x_tl + w, y_tl + h), (0, 255, 0), thickness = 2)

    rects = np.array([[x, y, x + w, y + h] for (x, y, _, w, h) in detections])
    sc = [score[0] for (x, y, score, w, h) in detections]
    print ("sc: "+ str(sc))
    sc = np.array(sc)
    pick = non_max_suppression(rects, probs = sc, overlapThresh = 0.3)
    print ("shape "+ str(pick.shape))

    for(xA, yA, xB, yB) in pick:
        cv2.rectangle(clone, (xA, yA), (xB, yB), (0, 255, 0), 2)
    
    plt.axis("off")
    plt.imshow(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
    plt.title("Raw Detection before NMS")
    plt.show()

    plt.axis("off")
    plt.imshow(cv2.cvtColor(clone, cv2.COLOR_BGR2RGB))
    plt.title("Final Detections after applying NMS")
    plt.show()

In [25]:
def test_folder(foldername):
    filenames = glob.iglob(os.path.join(foldername, '*'))
    for filename in filenames:
        detector(filename)

In [1]:
foldername = 'test_image'
test_folder(foldername)

## HOG 5-STAGE ALGORITHM

The first stage applies an optional global image normalization
equalisation that is designed to reduce the influence of illumination
effects. In practice we use gamma (power law) compression, either
computing the square root or the log of each color channel.
Image texture strength is typically proportional to the local surface
illumination so this compression helps to reduce the effects of local
shadowing and illumination variations.

The second stage computes first order image gradients. These capture
contour, silhouette and some texture information, while providing
further resistance to illumination variations. The locally dominant
color channel is used, which provides color invariance to a large
extent. Variant methods may also include second order image derivatives,
which act as primitive bar detectors - a useful feature for capturing,
e.g. bar like structures in bicycles and limbs in humans.

The third stage aims to produce an encoding that is sensitive to
local image content while remaining resistant to small changes in
pose or appearance. The adopted method pools gradient orientation
information locally in the same way as the SIFT [Lowe 2004]
feature. The image window is divided into small spatial regions,
called "cells". For each cell we accumulate a local 1-D histogram
of gradient or edge orientations over all the pixels in the
cell. This combined cell-level 1-D histogram forms the basic
"orientation histogram" representation. Each orientation histogram
divides the gradient angle range into a fixed number of
predetermined bins. The gradient magnitudes of the pixels in the
cell are used to vote into the orientation histogram.

The fourth stage computes normalization, which takes local groups of
cells and contrast normalizes their overall responses before passing
to next stage. Normalization introduces better invariance to illumination,
shadowing, and edge contrast. It is performed by accumulating a measure
of local histogram "energy" over local groups of cells that we call
"blocks". The result is used to normalize each cell in the block.
Typically each individual cell is shared between several blocks, but
its normalizations are block dependent and thus different. The cell
thus appears several times in the final output vector with different
normalizations. This may seem redundant but it improves the performance.
We refer to the normalized block descriptors as Histogram of Oriented
Gradient (HOG) descriptors.

The final step collects the HOG descriptors from all blocks of a dense
overlapping grid of blocks covering the detection window into a combined
feature vector for use in the window classifier.

In [None]:
if feature_vector:
    normalized_blocks = normalized_blocks.ravel()

In [None]:
def hog_histograms(double[:, ::1] gradient_columns, double[:, ::1] gradient_rows,int cell_columns, int cell_rows,
                   int size_columns, int size_rows,  int number_of_cells_columns, int number_of_cells_rows,
                   int number_of_orientations, cnp.float64_t[:, :, :] orientation_histogram):
    """Extract Histogram of Oriented Gradients (HOG) for a given image.

    Parameters
    ----------
    gradient_columns : ndarray
        First order image gradients (rows).
    gradient_rows : ndarray
        First order image gradients (columns).
    cell_columns : int
        Pixels per cell (rows).
    cell_rows : int
        Pixels per cell (columns).
    size_columns : int
        Number of columns.
    size_rows : int
        Number of rows.
    number_of_cells_columns : int
        Number of cells (rows).
    number_of_cells_rows : int
        Number of cells (columns).
    number_of_orientations : int
        Number of orientation bins.
    orientation_histogram : ndarray
        The histogram array which is modified in place.
    """

    cdef double[:, ::1] magnitude = np.hypot(gradient_columns, gradient_rows)
    cdef double[:, ::1] orientation = np.rad2deg(np.arctan2(gradient_rows, gradient_columns)) % 180
    cdef int i, c, r, o, r_i, c_i, cc, cr, c_0, r_0, range_rows_start, range_rows_stop, range_columns_start, \
    range_columns_stop
    cdef float orientation_start, orientation_end, number_of_orientations_per_180

    r_0 = cell_rows / 2 #3
    c_0 = cell_columns / 2 #3
    cc = cell_rows * number_of_cells_rows
    cr = cell_columns * number_of_cells_columns
    range_rows_stop = cell_rows / 2
    range_rows_start = -range_rows_stop
    range_columns_stop = cell_columns / 2
    range_columns_start = -range_columns_stop
    number_of_orientations_per_180 = 180. / number_of_orientations

    with nogil:
        # compute orientations integral images
        for i in range(number_of_orientations):
            # isolate orientations in this range
            orientation_start = number_of_orientations_per_180 * (i + 1)
            orientation_end = number_of_orientations_per_180 * i
            c = c_0
            r = r_0
            r_i = 0
            c_i = 0

            while r < cc:
                c_i = 0
                c = c_0

                while c < cr:
                    orientation_histogram[r_i, c_i, i] = cell_hog(magnitude, orientation,
                                 orientation_start, orientation_end,
                                 cell_columns, cell_rows, c, r,
                                 size_columns, size_rows,range_rows_start, range_rows_stop,
                                 range_columns_start, range_columns_stop)
                    c_i += 1
                    c += cell_columns

                r_i += 1
                r += cell_rows

In [None]:
cdef float cell_hog(double[:, ::1] magnitude,
                    double[:, ::1] orientation,
                    float orientation_start, float orientation_end,
                    int cell_columns, int cell_rows,
                    int column_index, int row_index,
                    int size_columns, int size_rows,
                    int range_rows_start, int range_rows_stop,
                    int range_columns_start, int range_columns_stop) nogil:
    """Calculation of the cell's HOG value

    Parameters
    ----------
    magnitude : ndarray
        The gradient magnitudes of the pixels.
    orientation : ndarray
        Lookup table for orientations.
    orientation_start : float
        Orientation range start.
    orientation_end : float
        Orientation range end.
    cell_columns : int
        Pixels per cell (rows).
    cell_rows : int
        Pixels per cell (columns).
    column_index : int
        Block column index.
    row_index : int
        Block row index.
    size_columns : int
        Number of columns.
    size_rows : int
        Number of rows.
    range_rows_start : int
        Start row of cell.
    range_rows_stop : int
        Stop row of cell.
    range_columns_start : int
        Start column of cell.
    range_columns_stop : int
        Stop column of cell

    Returns
    -------
    total : float
        The total HOG value.
    """
    cdef int cell_column, cell_row, cell_row_index, cell_column_index
    cdef float total = 0.

    for cell_row in range(range_rows_start, range_rows_stop):
        cell_row_index = row_index + cell_row
        if (cell_row_index < 0 or cell_row_index >= size_rows):
            continue

        for cell_column in range(range_columns_start, range_columns_stop):
            cell_column_index = column_index + cell_column
            if (cell_column_index < 0 or cell_column_index >= size_columns
                    or orientation[cell_row_index, cell_column_index]
                    >= orientation_start
                    or orientation[cell_row_index, cell_column_index]
                    < orientation_end):
                continue

            total += magnitude[cell_row_index, cell_column_index]

    return total / (cell_rows * cell_columns)