In [1]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import cv2
import glob
import time
import pickle
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.svm import SVC, LinearSVC
from sklearn.preprocessing import StandardScaler
from skimage.feature import hog
from sklearn.cross_validation import train_test_split
from classifier_functions import *


# Read in data
car_images = glob.glob('final_dataset/vehicles/**/*.png', recursive=True)
notcar_images = glob.glob('final_dataset/non-vehicles/**/*.png', recursive=True)
cars = []
notcars = []

for image in car_images:
        cars.append(image)
for image in notcar_images:
        notcars.append(image)
     

### TODO: Tweak these parameters and see how the results change.
color_space = 'YCrCb' # Can be RGB, HSV, LUV, HLS, YUV, YCrCb
orient = 9  # HOG orientations
pix_per_cell = 8 # HOG pixels per cell
cell_per_block = 2 # HOG cells per block
hog_channel = 'ALL' # Can be 0, 1, 2, or "ALL"
spatial_size = (16, 16) # Spatial binning dimensions
hist_bins = 32    # Number of histogram bins
spatial_feat = True # Spatial features on or off
hist_feat = True # Histogram features on or off
hog_feat = True # HOG features on or off



car_features = extract_features(cars, color_space=color_space, 
                        spatial_size=spatial_size, hist_bins=hist_bins, 
                        orient=orient, pix_per_cell=pix_per_cell, 
                        cell_per_block=cell_per_block, 
                        hog_channel=hog_channel, spatial_feat=spatial_feat, 
                        hist_feat=hist_feat, hog_feat=hog_feat)

notcar_features = extract_features(notcars, color_space=color_space, 
                        spatial_size=spatial_size, hist_bins=hist_bins, 
                        orient=orient, pix_per_cell=pix_per_cell, 
                        cell_per_block=cell_per_block, 
                        hog_channel=hog_channel, spatial_feat=spatial_feat, 
                        hist_feat=hist_feat, hog_feat=hog_feat)


# This is the number of car images we must generate to balance the two classes.
diff = abs(len(car_features)-len(notcar_features))

while diff != 0:
    
    # Augment car images by flipping 
    random_imgs = []

    for i in range(diff):
        car_ind = np.random.randint(0, len(cars))
        random_imgs.append(cars[car_ind])

    flipped_features = extract_features(random_imgs, color_space=color_space, 
                        spatial_size=spatial_size, hist_bins=hist_bins, 
                        orient=orient, pix_per_cell=pix_per_cell, 
                        cell_per_block=cell_per_block, 
                        hog_channel=hog_channel, spatial_feat=spatial_feat, 
                        hist_feat=hist_feat, hog_feat=hog_feat,flip=True)

    car_features.extend(flipped_features)
    diff = abs(len(car_features)-len(notcar_features))
  
    
print(len(car_features))
print(len(notcar_features))

assert(len(car_features) == len(notcar_features))




8968
8968


In [8]:

# Create an array stack of feature vectors
X = np.vstack((car_features, notcar_features)).astype(np.float64)



# Define the labels vector
y = np.hstack((np.ones(len(car_features)), np.zeros(len(notcar_features))))

# Split up data into randomized training and test sets
rand_state = np.random.randint(0, 100)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=rand_state)
    
print('Feature vector length:', len(X_train[0]))

# Fit a per-column scaler only on the training data
X_scaler = StandardScaler().fit(X_train)
# Apply the scaler to X_train and X_test
X_train = X_scaler.transform(X_train)
X_test = X_scaler.transform(X_test)



# This bit of code regarding the log spaces is from:
# http://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html
#kernels = ('linear','poly')
C_range = np.logspace(-3, 3, 100)
#gamma_range = np.logspace(-3, 3, 10)
#param_grid = dict(kernel=kernels, gamma=gamma_range, C=C_range)
#param_grid = dict(gamma=gamma_range, C=C_range)
param_grid = dict(C=C_range)
svr = LinearSVC()
grid = RandomizedSearchCV(svr, param_grid, n_iter = 60, verbose=3 )

t=time.time()
grid.fit(X_train[0:2000], y_train[0:2000])
t2 = time.time()
print(round(t2-t, 2), 'Seconds to find parameters...')

print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))

best_C = grid.best_params_['C']
#best_gamma = grid.best_params_['gamma']
#best_kernel = grid.best_params_['kernel']

#svc = SVC(C=best_C, gamma=best_gamma, kernel='rbf')
svc = LinearSVC(C=best_C)
#svc = SVC()
# Check the training time for the SVC
t=time.time()
svc.fit(X_train, y_train)
t2 = time.time()
print(round(t2-t, 2), 'Seconds to train SVC...')
# Check the score of the SVC
print('Test Accuracy of SVC = ', round(svc.score(X_test, y_test), 4))
# Check the prediction time for a single sample
t=time.time()
n_predict = 10
print('My SVC predicts: ', svc.predict(X_test[0:n_predict]))
print('For these',n_predict, 'labels: ', y_test[0:n_predict])
t2 = time.time()
print(round(t2-t, 5), 'Seconds to predict', n_predict,'labels with SVC')


Feature vector length: 6156
Fitting 3 folds for each of 60 candidates, totalling 180 fits
[CV] C=0.00265608778295 ..............................................
[CV] ...... C=0.00265608778295, score=0.974512743628186, total=   1.0s
[CV] C=0.00265608778295 ..............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.3s remaining:    0.0s


[CV] ..... C=0.00265608778295, score=0.9775112443778111, total=   0.3s
[CV] C=0.00265608778295 ..............................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.7s remaining:    0.0s


[CV] ...... C=0.00265608778295, score=0.984984984984985, total=   0.3s
[CV] C=0.00351119173422 ..............................................
[CV] ...... C=0.00351119173422, score=0.974512743628186, total=   0.7s
[CV] C=0.00351119173422 ..............................................
[CV] ..... C=0.00351119173422, score=0.9775112443778111, total=   0.3s
[CV] C=0.00351119173422 ..............................................
[CV] ...... C=0.00351119173422, score=0.984984984984985, total=   0.3s
[CV] C=4.97702356433 .................................................
[CV] ......... C=4.97702356433, score=0.974512743628186, total=   0.2s
[CV] C=4.97702356433 .................................................
[CV] ........ C=4.97702356433, score=0.9790104947526237, total=   0.3s
[CV] C=4.97702356433 .................................................
[CV] ......... C=4.97702356433, score=0.984984984984985, total=   0.3s
[CV] C=0.265608778295 ................................................
[CV] .

[CV] ..... C=0.00200923300257, score=0.9730134932533733, total=   0.5s
[CV] C=0.00200923300257 ..............................................
[CV] ..... C=0.00200923300257, score=0.9790104947526237, total=   0.2s
[CV] C=0.00200923300257 ..............................................
[CV] ...... C=0.00200923300257, score=0.984984984984985, total=   0.3s
[CV] C=13.2194114847 .................................................
[CV] ......... C=13.2194114847, score=0.974512743628186, total=   0.2s
[CV] C=13.2194114847 .................................................
[CV] ........ C=13.2194114847, score=0.9790104947526237, total=   0.2s
[CV] C=13.2194114847 .................................................
[CV] ......... C=13.2194114847, score=0.984984984984985, total=   0.3s
[CV] C=0.1 ...........................................................
[CV] ................... C=0.1, score=0.974512743628186, total=   0.2s
[CV] C=0.1 ...........................................................
[CV] .

[CV] ...... C=0.0215443469003, score=0.9790104947526237, total=   0.3s
[CV] C=0.0215443469003 ...............................................
[CV] ....... C=0.0215443469003, score=0.984984984984985, total=   0.4s
[CV] C=0.231012970008 ................................................
[CV] ........ C=0.231012970008, score=0.974512743628186, total=   3.9s
[CV] C=0.231012970008 ................................................
[CV] ....... C=0.231012970008, score=0.9790104947526237, total=   0.3s
[CV] C=0.231012970008 ................................................
[CV] ........ C=0.231012970008, score=0.984984984984985, total=   0.5s
[CV] C=0.00174752840001 ..............................................
[CV] ..... C=0.00174752840001, score=0.9730134932533733, total=   0.7s
[CV] C=0.00174752840001 ..............................................
[CV] ..... C=0.00174752840001, score=0.9790104947526237, total=   0.2s
[CV] C=0.00174752840001 ..............................................
[CV] .

[CV] ...... C=0.00705480231072, score=0.984984984984985, total=   0.3s
[CV] C=0.533669923121 ................................................
[CV] ........ C=0.533669923121, score=0.974512743628186, total=   0.2s
[CV] C=0.533669923121 ................................................
[CV] ....... C=0.533669923121, score=0.9790104947526237, total=   0.2s
[CV] C=0.533669923121 ................................................
[CV] ........ C=0.533669923121, score=0.984984984984985, total=   0.3s


[Parallel(n_jobs=1)]: Done 180 out of 180 | elapsed:  1.9min finished


115.52 Seconds to find parameters...
The best parameters are {'C': 4.9770235643321135} with a score of 0.98
4.83 Seconds to train SVC...
Test Accuracy of SVC =  0.9919
My SVC predicts:  [ 1.  0.  0.  0.  0.  1.  1.  0.  1.  1.]
For these 10 labels:  [ 1.  0.  0.  0.  0.  1.  1.  0.  1.  1.]
0.004 Seconds to predict 10 labels with SVC


In [9]:
import pickle
pickle.dump(svc, open( "pickle/classifier_hog.p", "wb" ))

In [10]:
### TODO: Tweak these parameters and see how the results change.
color_space = 'YCrCb' # Can be RGB, HSV, LUV, HLS, YUV, YCrCb
orient = 9  # HOG orientations
pix_per_cell = 8 # HOG pixels per cell
cell_per_block = 2 # HOG cells per block
hog_channel = 'ALL' # Can be 0, 1, 2, or "ALL"
spatial_size = (16, 16) # Spatial binning dimensions
hist_bins = 32    # Number of histogram bins
spatial_feat = True # Spatial features on or off
hist_feat = True # Histogram features on or off
hog_feat = True # HOG features on or off


feature_params = {'color_space': color_space, 
                  'orient': orient ,
                  'pix_per_cell': pix_per_cell,
                  'cell_per_block': cell_per_block, 
                  'hog_channel': hog_channel,
                  'spatial_size': spatial_size,
                  'hist_bins': hist_bins,
                  'spatial_feat': True,
                  'hist_feat': True,
                  'hog_feat': True,
                  'scaler': X_scaler}

In [11]:
pickle.dump(feature_params, open( "pickle/feature_params_hog.p", "wb" ))