## Import Packages

In [1]:
import glob
import cv2
import progressbar
import time
from skimage.feature import hog
import numpy as np
from sklearn.preprocessing import StandardScaler

## Loading Data

In [2]:
vehicle_image_directory = "../data/vehicles"
non_vehicle_image_directory = "../data/non-vehicles"

In [3]:
vehicle_files = glob.glob(vehicle_image_directory + '/**/*.png')
non_vehicle_files = glob.glob(non_vehicle_image_directory + '/**/*.png')

In [4]:
data = []
labels = []

# Extracting Features

The raw pixel intensities of the saturation channel shouldn't be helpful because of ordering, but for what ever reason seem to help the model. I'm leaving it in beacuse I'm superstitous.

In [5]:
def extract_features(img):
    orient = 9
    pixels_per_cell = (8, 8)
    cell_per_block = (2, 2)

    HSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    
    h1 = hog(HSV[:,:,0], orientations=orient, pixels_per_cell=pixels_per_cell, transform_sqrt=True, cells_per_block=cell_per_block)
    h2 = hog(HSV[:,:,1], orientations=orient, pixels_per_cell=pixels_per_cell, transform_sqrt=True, cells_per_block=cell_per_block)
    h3 = hog(HSV[:,:,2], orientations=orient, pixels_per_cell=pixels_per_cell, transform_sqrt=True, cells_per_block=cell_per_block)
#     channel1_hist, _ = np.histogram(img[:,:,0], bins=32)
#     channel2_hist, _ = np.histogram(img[:,:,1], bins=32)
#     channel3_hist, _ = np.histogram(img[:,:,2], bins=32)
    
    return np.concatenate((h1, h2, h3, HSV[:, :, 1].ravel()))

# Load data into data array

In [7]:
print("LOADING CAR IMAGES")
data = []
labels = []

bar = progressbar.ProgressBar()
for file in bar(vehicle_files):
    img = cv2.imread(file)
    features = extract_features(img)
    data.append(features)
    labels.append(1)
    
print("LOADING NON CAR IMAGES")
bar = progressbar.ProgressBar()
for file in bar(non_vehicle_files):
    img = cv2.imread(file)
    features = extract_features(img)
    data.append(features)
    labels.append(-1)

N/A% (0 of 8792) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--/home/banner/miniconda3/envs/carnd-term1/lib/python3.5/site-packages/skimage/feature/_hog.py:119: skimage_deprecation: Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15
  'be changed to `L2-Hys` in v0.15', skimage_deprecation)
  0% (9 of 8792) |                         | Elapsed Time: 0:00:00 ETA: 0:01:42

LOADING CAR IMAGES


100% (8792 of 8792) |#####################| Elapsed Time: 0:00:39 Time: 0:00:39
  0% (13 of 8968) |                        | Elapsed Time: 0:00:00 ETA: 0:01:09

LOADING NON CAR IMAGES


100% (8968 of 8968) |#####################| Elapsed Time: 0:00:39 Time: 0:00:39


## Imports for training the model

In [8]:
import _pickle as cPickle
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle

## Normalize model and we will save the normalizer later

In [9]:
X = np.vstack(data).astype(np.float64)
# Fit a per-column scaler
X_scaler = StandardScaler().fit(X)
# Apply the scaler to X
scaled_X = X_scaler.transform(X)

X, y = shuffle(scaled_X, labels)

(17760, 9388)
17760


# Split data into training validation and test data sets 

We use the validation set before the hard negative mining

In [10]:
X_1, X_test, y_1, y_test = train_test_split(X, y, test_size=0.33)
X_train, X_validation, y_train, y_validation = train_test_split(X_1, y_1, test_size=0.33)

Fit model on training set adn predict on validation set

In [11]:
model = SVC(probability=True, C=.4, kernel="linear")
model.fit(X_train, y_train)

SVC(C=0.4, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [12]:
preds = model.predict(X_validation)

In [13]:
accuracy = accuracy_score(y_validation, preds)

In [14]:
print("Validation accuracy", accuracy)

Validation accuracy 0.981920040744


## Find examples for the validation set that were false positives and add them back into the training sets as hard negatives. 

In [15]:
negatives = []
nn = 0
for i, (pred, truth) in enumerate(zip(preds, y_validation)):
    if pred == 1 and truth ==  -1:
        negatives.append(i)
    else:
        nn += 1

In [16]:
new_X = np.concatenate([X_train, X_validation[negatives]], axis=0)
new_y = np.concatenate([np.array(y_train), np.array(y_validation)[negatives]], axis=0)

Retrain model with hard negatives

In [17]:
model.fit(new_X, new_y)

SVC(C=0.4, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

## Report accuracy of predictions on test data set. 

In [18]:
preds = model.predict(X_test)
accuracy = accuracy_score(y_test, preds)

print("Test accuracy", accuracy)

Test accuracy 0.985326736052


## Save the model along with the normalizing function

In [19]:
f = open("classifier.pkl", "wb")
cPickle.dump(model, f)
f.close()

f = open("normalizer.pkl", "wb")
cPickle.dump(X_scaler, f)
f.close()