In [2]:
import numpy as np
import cv2
import imutils
import os 
import joblib
import matplotlib.pyplot as plt
%matplotlib inline

from imutils.object_detection import non_max_suppression
from skimage.transform import pyramid_gaussian
from skimage.feature import hog, local_binary_pattern
from skimage.color import rgb2gray

import warnings
warnings.filterwarnings('ignore')

In [14]:
model_path = r'D:\UGM\Semester 4\Computer Vision and Image Analysis\Final Project\model'
winname = 'Window'

clf = joblib.load(os.path.join(model_path, 'svm_tuned.sav'))

In [4]:
def sliding_window(image, window_size, step_size):
    for y in range(0, image.shape[0], step_size[1]):
        for x in range(0, image.shape[1], step_size[0]):
            yield (x, y, image[y:y + window_size[1], x:x + window_size[0]])

In [19]:
def detector(filename):
    image = cv2.imread(filename)
    image = imutils.resize(image, width=min(1000, image.shape[1]))
    min_window_size = (64, 64)
    step_size = (16, 16)
    downscale = 2.5

    detections = []
    scale = 0

    for img_scaled in pyramid_gaussian(image, downscale=downscale):
        if img_scaled.shape[0] < min_window_size[1] or \
           img_scaled.shape[1] < min_window_size[0]:
            break

        for (x, y, img_window) in sliding_window(img_scaled, min_window_size, step_size):
            if img_window.shape[0] != min_window_size[1] or \
               img_window.shape[1] != min_window_size[0]:
                continue
            
            if img_window.shape[2] == 3:
                img_window = cv2.resize(img_window, (128, 128), interpolation=cv2.INTER_AREA)
                cv2.namedWindow(winname)        
                cv2.moveWindow(winname, 40, 200)
                cv2.imshow(winname, img_window)
                cv2.waitKey(1)

                img_window = rgb2gray(img_window)
                hog_features, _ = hog(img_window, orientations=9, pixels_per_cell=(8, 8), 
                                    cells_per_block=(2, 2), visualize=True)
                lbp = local_binary_pattern(img_window, 24, 3, method='uniform')
                lbp_features = lbp.flatten()

                combined_features = np.concatenate((hog_features, lbp_features))
                combined_features = combined_features.reshape(1, -1)
                pred = clf.predict(combined_features)


            clone = image.copy()
            # print(pred)
            cv2.rectangle(clone, (x, y), (x + min_window_size[0], y + min_window_size[1]), (0, 255, 0), 2)
            cv2.putText(clone, 'Prediction: {}'.format(pred), (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
            cv2.imshow('Sliding Window', clone)
            # cv2.imshow('HOG', hog_features)
            cv2.imshow('LBP', lbp)
            cv2.waitKey(1)

            if pred == 1:
                print('Detection:: Location -> ({}, {}) | Scale -> {} | Confidence Score -> {}'.format(x, y, scale, clf.decision_function(combined_features)))
                if clf.decision_function(combined_features) > 0.5:
                    detections.append((int(x * (downscale**scale)), 
                                       int(y * (downscale**scale)), 
                                       clf.decision_function(combined_features), 
                                       int(min_window_size[0] * (downscale**scale)), 
                                       int(min_window_size[1] * (downscale**scale))))
            
        scale += 1
    
    clone = image.copy()

    for (x_tl, y_tl, _, w, h) in detections:
        cv2.rectangle(image, (x_tl, y_tl), (x_tl + w, y_tl + h), (0, 255, 0), thickness=2)
    
    rects = np.array([[x, y, x + w, y + h] for (x, y, _, w, h) in detections])
    sc = [score[0] for (_, _, score, _, _) in detections]
    print(sc)

    sc = np.array(sc)
    pick = non_max_suppression(rects, probs=sc, overlapThresh=0.3)

    for (startX, startY, endX, endY) in pick:
        cv2.rectangle(clone, (startX, startY), (endX, endY), (0, 255, 0), 2)

    plt.axis("off")
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.title("Raw Detection before NMS")
    plt.show()

    plt.axis("off")
    plt.imshow(cv2.cvtColor(clone, cv2.COLOR_BGR2RGB))
    plt.title("Final Detections after NMS")
    plt.show()

In [20]:
filename = "D:/UGM/Semester 4/Computer Vision and Image Analysis/Final Project/dataset/raw/IMG_2392.jpg"
detector(filename)

Detection:: Location -> (0, 0) | Scale -> 0 | Confidence Score -> [1.63484886]
Detection:: Location -> (30, 0) | Scale -> 0 | Confidence Score -> [1.15694686]
Detection:: Location -> (60, 0) | Scale -> 0 | Confidence Score -> [1.7797868]
Detection:: Location -> (90, 0) | Scale -> 0 | Confidence Score -> [1.32175239]
Detection:: Location -> (120, 0) | Scale -> 0 | Confidence Score -> [1.49696441]
Detection:: Location -> (150, 0) | Scale -> 0 | Confidence Score -> [1.10814599]
Detection:: Location -> (180, 0) | Scale -> 0 | Confidence Score -> [1.3952721]
Detection:: Location -> (210, 0) | Scale -> 0 | Confidence Score -> [1.26713658]
Detection:: Location -> (240, 0) | Scale -> 0 | Confidence Score -> [1.29495272]
Detection:: Location -> (270, 0) | Scale -> 0 | Confidence Score -> [1.52525834]
Detection:: Location -> (300, 0) | Scale -> 0 | Confidence Score -> [1.38003961]
Detection:: Location -> (330, 0) | Scale -> 0 | Confidence Score -> [1.28652684]
Detection:: Location -> (360, 0) | 

KeyboardInterrupt: 