In [None]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import cv2
import glob
import pickle
import time
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from skimage.feature import hog

from utils import extract_features_many_filenames

# NOTE: the next import is only valid for scikit-learn version <= 0.17
# for scikit-learn >= 0.18 use:
# from sklearn.model_selection import train_test_split
from sklearn.cross_validation import train_test_split

In [69]:
# To save as normal python script (easier to git diff)
!jupyter nbconvert --to script classifier.ipynb

[NbConvertApp] Converting notebook classifier.ipynb to script
[NbConvertApp] Writing 4136 bytes to classifier.py


In [63]:
training_folders = {
    'vehicles': [
        'KITTI_extracted',
        'GTI_Right',
        'GTI_MiddleClose',
        'GTI_Left',
        'GTI_Far'
    ],
    'non-vehicles': [
        'GTI',
        'Extras'
    ]
}

training_filenames = {
    'vehicles': [],
    'non-vehicles': []
}

for folder, subfolders in training_folders.items():
    for subfolder in subfolders:
        files = glob.glob(folder + '/' + subfolder + '/' + '*.png')
        training_filenames[folder].extend(files)

In [64]:
# Prepare training data
    # Load image
    # Extract features from image
    # Add features to training set
    # Preprocess data (might not be here?)

In [65]:
# Smaller sample size while preparing pipeline
# sample_size = 500
# training_filenames['vehicles'] = training_filenames['vehicles'][0:sample_size]
# training_filenames['non-vehicles'] = training_filenames['non-vehicles'][0:sample_size]

In [66]:
# PARAMETERS
color_space = 'LUV' # Can be RGB, HSV, LUV, HLS, YUV, YCrCb
orient = 9  # HOG orientations
pix_per_cell = 8 # HOG pixels per cell
cell_per_block = 2 # HOG cells per block
hog_channel = 'ALL' # Can be 0, 1, 2, or "ALL"
spatial_size = (16, 16) # Spatial binning dimensions
hist_bins = 16    # Number of histogram bins
spatial_feat = True # Spatial features on or off
hist_feat = False # Histogram features on or off
hog_feat = False # HOG features on or off
y_start_stop = [400, 700] # Min and max in y to search in slide_window()

In [67]:
%%time
# Feature extraction and preparation
car_features = extract_features_many_filenames(training_filenames['vehicles'], color_space=color_space, 
                        spatial_size=spatial_size, hist_bins=hist_bins, 
                        orient=orient, pix_per_cell=pix_per_cell, 
                        cell_per_block=cell_per_block, 
                        hog_channel=hog_channel, spatial_feat=spatial_feat, 
                        hist_feat=hist_feat, hog_feat=hog_feat)
notcar_features = extract_features_many_filenames(training_filenames['non-vehicles'], color_space=color_space, 
                        spatial_size=spatial_size, hist_bins=hist_bins, 
                        orient=orient, pix_per_cell=pix_per_cell, 
                        cell_per_block=cell_per_block, 
                        hog_channel=hog_channel, spatial_feat=spatial_feat, 
                        hist_feat=hist_feat, hog_feat=hog_feat)

print(np.asarray(car_features).shape, np.asarray(notcar_features).shape)
X = np.vstack((car_features, notcar_features)).astype(np.float64)                        
# Fit a per-column scaler
X_scaler = StandardScaler().fit(X)
# Apply the scaler to X
scaled_X = X_scaler.transform(X)

# Define the labels vector
y = np.hstack((np.ones(len(car_features)), np.zeros(len(notcar_features))))


# Split up data into randomized training and test sets
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.2)

print('Feature vector length:', len(X_train[0]))

(8792, 768) (8968, 768)
Feature vector length: 768
CPU times: user 23.1 s, sys: 4.45 s, total: 27.6 s
Wall time: 3min 15s


In [68]:
%%time
# Create the classifier
svc = LinearSVC()

# Train the classifier
svc.fit(X_train, y_train)

# Classifier accuracy
print('Test Accuracy of SVC = ', round(svc.score(X_test, y_test), 4))

Test Accuracy of SVC =  0.9133
CPU times: user 17.6 s, sys: 252 ms, total: 17.9 s
Wall time: 19.2 s


In [61]:
# Save the classifier
pickle.dump(svc, open('svc_classifier.pkl', 'wb'))
# Load with:
# clf = joblib.load('filename.pkl') 