In [1]:
%matplotlib inline
from matplotlib import pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [2]:
import glob
import os
import pickle
import json

import cv2
import numpy as np

In [4]:
from skimage.feature import hog

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn import svm
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, f1_score 
from sklearn.metrics import accuracy_score, make_scorer

In [5]:
DATA_DIR = os.path.join('../data')
VEHICLES = os.path.join(DATA_DIR, 'vehicles')
NON_VEHICLES = os.path.join(DATA_DIR, 'non-vehicles')

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import GridSearchCV

In [None]:
class HogFeatureExtractor(BaseEstimator, TransformerMixin):
    def __init__(self, color_scheme, orientation, pixels_per_cell, cells_per_block):
        self.color_scheme = color_scheme
        self.orientation = orientation
        self.pixels_per_cell = pixels_per_cell
        self.cells_per_block = cells_per_block
    
    def _hog_feature(self, image):
        return hog(image, orientations=self.orientation, 
                   pixels_per_cell=(self.pixels_per_cell, self.pixels_per_cell), 
                   cells_per_block=(self.cells_per_block, self.cells_per_block), 
                   feature_vector=True)
    
    def fit(self, X, y, **fit_params):
        return self
    
    def transform(self, X):
        X_new = []
        for x in X:
            img = cv2.cvtColor(x, self.color_scheme).astype(np.float32)/255
            # HOG Features
            rhf = self._hog_feature(img[:, :, 0])
            ghf = self._hog_feature(img[:, :, 1])
            bhf = self._hog_feature(img[:, :, 2])
            
            # Histogram Features
            channel1_hist = np.histogram(img[:,:,0], bins=32)
            channel2_hist = np.histogram(img[:,:,1], bins=32)
            channel3_hist = np.histogram(img[:,:,2], bins=32)
            # Concatenate the histograms into a single feature vector
            hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
            
            X_new.append(np.hstack((rhf, ghf, bhf, hist_features)))
        return np.array(X_new)
    
    def fit_transform(self, X, y=None, **fit_params):
        return self.fit(X, y).transform(X)

In [19]:
def read(data_dir):
    for img_path in glob.iglob(os.path.join(data_dir, '*', '*.png')):
        image = cv2.imread(img_path)
        yield cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

In [20]:
vehicles = [feature for feature in read(VEHICLES)]

In [21]:
non_vehicles = [feature for feature in read(NON_VEHICLES)]

In [22]:
data = np.vstack((vehicles, non_vehicles))
labels = np.hstack((np.ones(len(vehicles), np.int32), np.zeros(len(non_vehicles), np.int32)))

In [23]:
len(labels) == len(data)

True

In [24]:
data.shape

(17760, 64, 64, 3)

In [25]:
labels.shape

(17760,)

In [26]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2)

In [27]:
x_train.shape

(14208, 64, 64, 3)

In [None]:
clf = svm.LinearSVC()
scaler = StandardScaler()
extractor = HogFeatureExtractor(cv2.COLOR_BGR2YCrCb, 9, 8, 2)
pipeline = Pipeline([('hog', extractor), ('scaler', scaler), ('svc', clf)])
# pipeline.fit(x_train, y_train)

In [None]:
color_schemes = [cv2.COLOR_BGR2RGB, cv2.COLOR_BGR2HLS, cv2.COLOR_BGR2YCrCb]
orientations = [9, 11, 13]
ppc = [8, 16]
cpb = [2, 3]
param_grid = dict(hog__color_scheme=color_schemes,
                  hog__orientation=orientations, 
                  hog__pixels_per_cell=ppc, 
                  hog__cells_per_block=cpb)

In [None]:
grid = GridSearchCV(pipeline, param_grid=param_grid, 
                    scoring=make_scorer(f1_score))

In [None]:
grid.fit(x_train, y_train)

In [None]:
with open('grid_cv_hist.p', 'wb') as _file:
    pickle.dump(grid, _file)

In [None]:
grid.best_params_

In [None]:
grid.best_score_

In [None]:
import pandas as pd

In [None]:
results = pd.DataFrame(grid.cv_results_)

In [None]:
results

In [None]:
acc = grid.best_estimator_.score(x_test, y_test)

In [None]:
acc

In [None]:
pred = grid.best_estimator_.predict(x_test)

In [None]:
cm = confusion_matrix(y_test, pred)

In [None]:
cm

In [None]:
with open('classifier_hist.p', 'wb') as _file:
    pickle.dump(grid.best_estimator_, _file)

In [None]:
grid.best_estimator_

In [29]:
def hog_feature(image, orientations):
    return hog(image, orientations=orientations, 
               pixels_per_cell=(8, 8), 
               cells_per_block=(2, 2), 
               feature_vector=True)
    
def feature_extractor(image, orientations):
    img = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb).astype(np.float32)/255
    # HOG Features
    rhf = hog_feature(img[:, :, 0], orientations)
    ghf = hog_feature(img[:, :, 1], orientations)
    bhf = hog_feature(img[:, :, 2], orientations)
    return np.hstack((rhf, ghf, bhf))

def feature_extractor_w_hist(image, orientations):
    img = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb).astype(np.float32)/255
    # HOG Features
    rhf = hog_feature(img[:, :, 0], orientations)
    ghf = hog_feature(img[:, :, 1], orientations)
    bhf = hog_feature(img[:, :, 2], orientations)
    # Histogram Features
    channel1_hist = np.histogram(img[:,:,0], bins=32)
    channel2_hist = np.histogram(img[:,:,1], bins=32)
    channel3_hist = np.histogram(img[:,:,2], bins=32)
    # Concatenate the histograms into a single feature vector
    hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
    return np.hstack((rhf, ghf, bhf, hist_features))
            

In [30]:
x_feat1 = [feature_extractor(x, 13) for x in x_train]
x_test_feat1 = [feature_extractor(x, 13) for x in x_test]

pipe1 = Pipeline([('scaler', StandardScaler()), ('svc', svm.LinearSVC())])
pipe1.fit(x_feat1, y_train)

pred = pipe1.predict(x_test_feat1)
cm = confusion_matrix(pred, y_test)
acc = accuracy_score(pred, y_test)
f1 = f1_score(pred, y_test)
print(acc, cm, f1)

0.990146396396 [[1806   20]
 [  15 1711]] 0.989875614695


In [31]:
x_feat2 = [feature_extractor_w_hist(x, 13) for x in x_train]
x_test_feat2 = [feature_extractor_w_hist(x, 13) for x in x_test]

pipe2 = Pipeline([('scaler', StandardScaler()), ('svc', svm.LinearSVC())])
pipe2.fit(x_feat2, y_train)

pred = pipe2.predict(x_test_feat2)
cm = confusion_matrix(pred, y_test)
acc = accuracy_score(pred, y_test)
f1 = f1_score(pred, y_test)
print(acc, cm, f1)

0.99268018018 [[1810   15]
 [  11 1716]] 0.992481203008


In [32]:
x_feat3 = [feature_extractor(x, 9) for x in x_train]
x_test_feat3 = [feature_extractor(x, 9) for x in x_test]

pipe3 = Pipeline([('scaler', StandardScaler()), ('svc', svm.LinearSVC())])
pipe3.fit(x_feat3, y_train)

pred = pipe3.predict(x_test_feat3)
cm = confusion_matrix(pred, y_test)
acc = accuracy_score(pred, y_test)
f1 = f1_score(pred, y_test)
print(acc, cm, f1)

0.989301801802 [[1802   19]
 [  19 1712]] 0.989023685731


In [33]:
x_feat4 = [feature_extractor_w_hist(x, 9) for x in x_train]
x_test_feat4 = [feature_extractor_w_hist(x, 9) for x in x_test]

pipe4 = Pipeline([('scaler', StandardScaler()), ('svc', svm.LinearSVC())])
pipe4.fit(x_feat4, y_train)

pred = pipe4.predict(x_test_feat4)
cm = confusion_matrix(pred, y_test)
acc = accuracy_score(pred, y_test)
f1 = f1_score(pred, y_test)
print(acc, cm, f1)

0.99268018018 [[1808   13]
 [  13 1718]] 0.992489890237


In [34]:
classifiers = {
    'pipe1': pipe1,
    'pipe2': pipe2,
    'pipe3': pipe3,
    'pipe4': pipe4
}

with open('classifier.p', 'wb') as _file:
    pickle.dump(classifiers, _file)