In [1]:
import glob
import os
import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
from PIL import Image
from sklearn.svm import LinearSVC
from skimage.feature import hog
from skimage.feature import haar_like_feature
from skimage.feature import local_binary_pattern as lbp
from skimage.io import imread
from skimage.transform import pyramid_gaussian, integral_image
from sklearn.externals import joblib

%matplotlib inline

In [37]:
class Config(object):
    def __init__(self):
        
        # General Config
        self.DES_TYPE = "HAAR"
        self.PROJECT_ID = "no_hands_" + self.DES_TYPE
        self.THRESHOLD = 0.3
        self.CLF_TYPE = "LIN_SVM"
        self.DOWNSCALE = 1.25
        
        # Pathes
        self.DIR_PATHS = {
            "POS_FEAT_PH"    : os.path.join("./source/features", self.PROJECT_ID,"pos"),
            "NEG_FEAT_PH"    : os.path.join("./source/features", self.PROJECT_ID,"neg"),
            "MODEL_DIR_PH"   : os.path.join("./source/models", self.PROJECT_ID),
            "POS_IMG_PH"     : "./source/images/pos",
            "NEG_IMG_PH"     : "./source/images/neg",
            "TEST_IMG_DIR_PH": "./source/test_images"
        }
        self.MODEL_PH = os.path.join(self.DIR_PATHS["MODEL_DIR_PH"], "svm.model")
        self.TEST_IMG_PH = os.path.join(self.DIR_PATHS["TEST_IMG_DIR_PH"], "test.jpg")
        
        # HOG Features
        self.MIN_WDW_SIZE = [64, 64]
        self.STEP_SIZE = [12, 12]
        self.ORIENTATIONS = 9
        self.PIXELS_PER_CELL = [3, 3]
        self.CELLS_PER_BLOCK = [3, 3]
        self.VISUALIZE = False
        self.NORMALIZE = True
        
        # LBP Features
        self.LBP_RADIUS = 3
        self.LBP_POINTS = 8 * self.LBP_RADIUS

args = Config()
for ph in args.DIR_PATHS.values():
    if not os.path.exists(ph):
        os.makedirs(ph)
        print("==> Directory Tree",ph,"created")

==> Directory Tree ./source/features/no_hands_HAAR/pos created
==> Directory Tree ./source/features/no_hands_HAAR/neg created
==> Directory Tree ./source/models/no_hands_HAAR created


In [38]:
def process_image(image, args=args):
    if args.DES_TYPE == "HOG":
        fd = hog(image, block_norm='L2', pixels_per_cell=args.PIXELS_PER_CELL)
    elif args.DES_TYPE == "LBP":
        fd = lbp(image, args.LBP_POINTS, args.LBP_RADIUS)
    elif args.DES_TYPE == "HAAR":
        fd = haar_like_feature(integral_image(image), 0, 0, 5, 5, 'type-3-x')
    else:
        raise KeyError("==> The Processing method does not exist!")
    return fd

def extract_features(args=args):
    if os.path.exists(args.DIR_PATHS["POS_FEAT_PH"]):
        shutil.rmtree(args.DIR_PATHS["POS_FEAT_PH"])
    if os.path.exists(args.DIR_PATHS["NEG_FEAT_PH"]):
        shutil.rmtree(args.DIR_PATHS["NEG_FEAT_PH"])
    os.makedirs(args.DIR_PATHS["POS_FEAT_PH"])
    os.makedirs(args.DIR_PATHS["NEG_FEAT_PH"])
    
    print("==> Calculating the descriptors for the positive samples and saving them")
    for im_path in tqdm(glob.glob(os.path.join(args.DIR_PATHS["POS_IMG_PH"], "*"))):
        im = imread(im_path, as_grey=True)
        fd = process_image(im)
        fd_name = os.path.split(im_path)[1].split(".")[0] + ".feat"
        fd_path = os.path.join(args.DIR_PATHS["POS_FEAT_PH"], fd_name)
        joblib.dump(fd, fd_path)
    print("==> Positive features saved in {}".format(args.DIR_PATHS["POS_FEAT_PH"]))

    print("==> Calculating the descriptors for the negative samples and saving them")
    for im_path in tqdm(glob.glob(os.path.join(args.DIR_PATHS["NEG_IMG_PH"], "*"))):
        im = imread(im_path, as_grey=True)
        fd = process_image(im)
        fd_name = os.path.split(im_path)[1].split(".")[0] + ".feat"
        fd_path = os.path.join(args.DIR_PATHS["NEG_FEAT_PH"], fd_name)
        joblib.dump(fd, fd_path)
    print("==> Negative features saved in {}".format(args.DIR_PATHS["NEG_FEAT_PH"]))
    print("==> Completed calculating features from training images")

In [39]:
def train_classifier(args=args):
    fds = []
    labels = []
    print("==> Loading the positive features")
    for feat_path in tqdm(glob.glob(os.path.join(args.DIR_PATHS["POS_FEAT_PH"], "*.feat"))):
        fd = joblib.load(feat_path)
        fds.append(fd.reshape(-1))
        labels.append(1)

    print("==> Load the negative features")
    for feat_path in tqdm(glob.glob(os.path.join(args.DIR_PATHS["NEG_FEAT_PH"], "*.feat"))):
        fd = joblib.load(feat_path)
        fds.append(fd.reshape(-1))
        labels.append(0)

    if args.CLF_TYPE is "LIN_SVM":
        clf = LinearSVC()
        print("==> Training a Linear SVM Classifier")
        clf.fit(fds, labels)
        joblib.dump(clf, args.MODEL_PH)
        print("==> Classifier saved to {}".format(args.MODEL_PH))

In [40]:
def overlapping_area(detection_1, detection_2):
    """
        Function to calculate overlapping area'si
        `detection_1` and `detection_2` are 2 detections whose area
        of overlap needs to be found out.
        Each detection is list in the format ->
        [x-top-left, y-top-left, confidence-of-detections, width-of-detection, height-of-detection]
        The function returns a value between 0 and 1,
        which represents the area of overlap.
        0 is no overlap and 1 is complete overlap.
        Area calculated from ->
        http://math.stackexchange.com/questions/99565/simplest-way-to-calculate-the-intersect-area-of-two-rectangles
    """
    # Calculate the x-y co-ordinates of the rectangles
    x1_tl = detection_1[0]
    x2_tl = detection_2[0]
    x1_br = detection_1[0] + detection_1[3]
    x2_br = detection_2[0] + detection_2[3]
    y1_tl = detection_1[1]
    y2_tl = detection_2[1]
    y1_br = detection_1[1] + detection_1[4]
    y2_br = detection_2[1] + detection_2[4]
    # Calculate the overlapping Area
    x_overlap = max(0, min(x1_br, x2_br) - max(x1_tl, x2_tl))
    y_overlap = max(0, min(y1_br, y2_br) - max(y1_tl, y2_tl))
    overlap_area = x_overlap * y_overlap
    area_1 = detection_1[3] * detection_2[4]
    area_2 = detection_2[3] * detection_2[4]
    total_area = area_1 + area_2 - overlap_area
    return overlap_area / float(total_area)


def nms(detections, threshold=.5):
    """
        This function performs Non-Maxima Suppression.
        `detections` consists of a list of detections.
        Each detection is in the format ->
        [x-top-left, y-top-left, confidence-of-detections, width-of-detection, height-of-detection]
        If the area of overlap is greater than the `threshold`,
        the area with the lower confidence score is removed.
        The output is a list of detections.
    """
    if len(detections) == 0:
        return []
    # Sort the detections based on confidence score
    detections = sorted(detections, key=lambda detections: detections[2],
                        reverse=True)
    new_detections = [] # Unique detections will be appended to this list
    new_detections.append(detections[0]) # Append the first detection
    del detections[0] # Remove the detection from the original list
    """
        For each detection, calculate the overlapping area
        and if area of overlap is less than the threshold set
        for the detections in `new_detections`, append the 
        detection to `new_detections`.
        In either case, remove the detection from `detections` list.
    """
    for index, detection in enumerate(detections):
        for new_detection in new_detections:
            if overlapping_area(detection, new_detection) > threshold:
                del detections[index]
                break
        else:
            new_detections.append(detection)
            del detections[index]
    return new_detections

In [41]:
def resize_by_short(img, short_len=256):
    print(img.size)
    (x, y) = img.size
    if x > y:
        y_s = short_len
        x_s = int(x * y_s / y)
        img = img.resize((x_s, y_s))
    else:
        x_s = short_len
        y_s = int(y * x_s / x)
        img = img.resize((x_s, y_s))
    return img


def sliding_window(image, window_size, step_size):
    """
        This function returns a patch of the input image `image` of size equal
        to `window_size`. The first image returned top-left co-ordinates (0, 0)
        and are increment in both x and y directions by the `step_size` supplied.
        So, the input parameters are -
        * `image` - Input Image
        * `window_size` - Size of Sliding Window
        * `step_size` - Incremented Size of Window

        The function returns a tuple -
        (x, y, im_window)
        where
        * x is the top-left x co-ordinate
        * y is the top-left y co-ordinate
        * im_window is the sliding window image
    """
    for y in range(0, image.shape[0], step_size[1]):
        for x in range(0, image.shape[1], step_size[0]):
            yield (x, y, image[y:y + window_size[1], x:x + window_size[0]])


def test_classifier(args=args):
    # Read the Image
    im = Image.open(args.TEST_IMG_PH).convert('L')
    im = np.array(resize_by_short(im))

    clf = joblib.load(args.MODEL_PH) # Load the classifier
    detections = [] # List to store the detections
    scale = 0 # The current scale of the image
    
    # Downscale the image and iterate
    for im_scaled in pyramid_gaussian(im, downscale=args.DOWNSCALE):
        cd = [] # This list contains detections at the current scale
        # If the width or height of the scaled image is less than
        # the width or height of the window, then end the iterations.
        if im_scaled.shape[0] < args.MIN_WDW_SIZE[1] or im_scaled.shape[1] < args.MIN_WDW_SIZE[0]:
            break
        for (x, y, im_window) in sliding_window(im_scaled, args.MIN_WDW_SIZE, args.STEP_SIZE):
            if im_window.shape[0] != args.MIN_WDW_SIZE[1] or im_window.shape[1] != args.MIN_WDW_SIZE[0]:
                continue
            # Calculate the HOG features
            fd = process_image(im_window).reshape([1, -1])
            pred = clf.predict(fd)
            if pred == 1:
                print("Detection:: Location -> ({}, {})".format(x, y))
                print("Scale ->  {} | Confidence Score {} \n".format(scale, clf.decision_function(fd)))
                detections.append((x, y, clf.decision_function(fd),
                                   int(args.MIN_WDW_SIZE[0] * (args.DOWNSCALE ** scale)),
                                   int(args.MIN_WDW_SIZE[1] * (args.DOWNSCALE ** scale))))
                cd.append(detections[-1])
                
            # If visualize is set to true, display the working of the sliding window
            if args.VISUALIZE:
                clone = im_scaled.copy()
                for x1, y1, _, _, _ in cd:
                    # Draw the detections at this scale
                    cv2.rectangle(clone, (x1, y1), (x1 + im_window.shape[1], y1 +
                                                    im_window.shape[0]), (0, 0, 0), thickness=2)
                cv2.rectangle(clone, (x, y), (x + im_window.shape[1], y +
                                              im_window.shape[0]), (255, 255, 255), thickness=2)
                cv2.imshow("Sliding Window in Progress", clone)
                cv2.waitKey(30)
        
        # Move the the next scale
        scale += 1

    # Display the results before performing NMS
    clone = im.copy()
    
    # Draw the detections
    for (x_tl, y_tl, _, w, h) in detections:
        cv2.rectangle(im, (x_tl, y_tl), (x_tl + w, y_tl + h), (0, 0, 0), thickness=2)
    
    detections = nms(detections, args.THRESHOLD) # Perform Non Maxima Suppression

    # Display the results after performing NMS
    for (x_tl, y_tl, _, w, h) in detections:
        # Draw the detections
        cv2.rectangle(clone, (x_tl, y_tl), (x_tl + w, y_tl + h), (0, 0, 0), thickness=2)
    cv2.imshow("Final Detections after applying NMS", clone)
    cv2.imwrite('test1.jpg', clone)
    cv2.waitKey()
    

In [42]:
extract_features()
train_classifier()
test_classifier()

  warn('`as_grey` has been deprecated in favor of `as_gray`')
  3%|▎         | 54/1553 [00:00<00:02, 536.32it/s]

==> Calculating the descriptors for the positive samples and saving them


100%|██████████| 1553/1553 [00:02<00:00, 549.15it/s]
  3%|▎         | 56/2000 [00:00<00:03, 556.62it/s]

==> Positive features saved in ./source/features/no_hands_HAAR/pos
==> Calculating the descriptors for the negative samples and saving them


100%|██████████| 2000/2000 [00:03<00:00, 523.01it/s]
 29%|██▉       | 450/1553 [00:00<00:00, 4494.34it/s]

==> Negative features saved in ./source/features/no_hands_HAAR/neg
==> Completed calculating features from training images
==> Loading the positive features


100%|██████████| 1553/1553 [00:00<00:00, 4475.48it/s]
 23%|██▎       | 460/2000 [00:00<00:00, 4593.29it/s]

==> Load the negative features


100%|██████████| 2000/2000 [00:00<00:00, 4524.74it/s]


==> Training a Linear SVM Classifier




==> Classifier saved to ./source/models/no_hands_HAAR/svm.model
(3088, 2320)
Detection:: Location -> (180, 84)
Scale ->  0 | Confidence Score [0.09203613] 

Detection:: Location -> (192, 84)
Scale ->  0 | Confidence Score [0.26940841] 

Detection:: Location -> (276, 84)
Scale ->  0 | Confidence Score [0.28059458] 

Detection:: Location -> (180, 96)
Scale ->  0 | Confidence Score [0.24278005] 

Detection:: Location -> (168, 108)
Scale ->  0 | Confidence Score [0.26062003] 

Detection:: Location -> (156, 120)
Scale ->  0 | Confidence Score [0.17438816] 

Detection:: Location -> (276, 132)
Scale ->  0 | Confidence Score [0.0971844] 

Detection:: Location -> (168, 144)
Scale ->  0 | Confidence Score [0.18590915] 

Detection:: Location -> (132, 156)
Scale ->  0 | Confidence Score [0.17252411] 

Detection:: Location -> (168, 156)
Scale ->  0 | Confidence Score [0.22050838] 

Detection:: Location -> (216, 156)
Scale ->  0 | Confidence Score [0.2412928] 

Detection:: Location -> (132, 168)
Sca

  warn('The default multichannel argument (None) is deprecated.  Please '
  warn('The default multichannel argument (None) is deprecated.  Please '
  warn('The default multichannel argument (None) is deprecated.  Please '
  warn('The default multichannel argument (None) is deprecated.  Please '
  warn('The default multichannel argument (None) is deprecated.  Please '
  warn('The default multichannel argument (None) is deprecated.  Please '
  warn('The default multichannel argument (None) is deprecated.  Please '


Detection:: Location -> (144, 72)
Scale ->  1 | Confidence Score [0.22021507] 

Detection:: Location -> (132, 96)
Scale ->  1 | Confidence Score [0.17893063] 

Detection:: Location -> (132, 108)
Scale ->  1 | Confidence Score [0.01380688] 

Detection:: Location -> (132, 120)
Scale ->  1 | Confidence Score [0.1719158] 

Detection:: Location -> (132, 132)
Scale ->  1 | Confidence Score [0.10167459] 

Detection:: Location -> (144, 132)
Scale ->  1 | Confidence Score [0.22818731] 

Detection:: Location -> (120, 24)
Scale ->  2 | Confidence Score [0.28145401] 

Detection:: Location -> (108, 60)
Scale ->  2 | Confidence Score [0.00467414] 

Detection:: Location -> (108, 72)
Scale ->  2 | Confidence Score [0.10830081] 

Detection:: Location -> (108, 84)
Scale ->  2 | Confidence Score [0.00497437] 

Detection:: Location -> (108, 96)
Scale ->  2 | Confidence Score [0.18418809] 

Detection:: Location -> (96, 12)
Scale ->  3 | Confidence Score [0.23901622] 

Detection:: Location -> (84, 60)
Scale