In [37]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import cv2
import glob
import time
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from skimage.feature import hog
from sklearn.externals import joblib
import json
# NOTE: the next import is only valid for scikit-learn version <= 0.17
# for scikit-learn >= 0.18 use:
# from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split

vehicle_folders = ['GTI_Far', 'GTI_Left', 'GTI_MiddleClose', 'GTI_Right', 'KITTI_extracted']
non_vehicle_folders = ['Extras', 'GTI', 'augmented']
#non_vehicle_folders = ['augmented']
is_car = []
cars = []
notcars = []
for f in vehicle_folders:
    cars = cars +  glob.glob('data/vehicles/{}/*.png'.format(f))
for f in non_vehicle_folders:
    notcars = notcars +  glob.glob('data/non-vehicles/{}/*.png'.format(f))

In [38]:
valid_notcars = []

In [39]:
for file in notcars:
    try:
        img = mpimg.imread(file)
        if img.shape == (64, 64, 3):
            valid_notcars.append(file)
    except:
        pass

In [40]:
img = mpimg.imread(file)

In [41]:
img.shape

(64, 64, 3)

In [42]:
len(valid_notcars)

9430

In [43]:
len(notcars)

9430

In [44]:
len(cars)

8792

In [45]:
len(notcars)

9430

In [46]:
# Get the extract features function
filename = 'lesson_functions.py'
exec(compile(open(filename, "rb").read(), filename, 'exec'))
%matplotlib inline

# Modeling approach

It looks like there are different combinations of models one could try out for this exercise. For example, with / without HOG features, different color spaces etc.  I am going to tune a bunch of models first, and then try to predict whether an image contains a car or not as an ensemble of these different models.

In [47]:
options = [
    {'color_space': 'HLS', 'hog_feat' : True, 'hog_channel' : 'ALL', 'hist_feat' : True},
    {'color_space': 'HSV', 'hog_feat' : True, 'hog_channel' : 'ALL', 'hist_feat' : True},
    {'color_space': 'HSV', 'hog_feat' : True, 'hog_channel': 'ALL', 'hist_feat' : False},
    {'color_space': 'YUV', 'hog_feat' : True, 'hog_channel': 'ALL', 'hist_feat' : False},
]

static_options = {
    'orient': 9,
    'spatial_size': (16, 16),
    'hist_bins': 16,
    'hist_feat': True,
    'spatial_feat': True,
    'pix_per_cell': 8, 
    'cell_per_block': 2}

In [49]:
options

[{'color_space': 'HLS',
  'hist_feat': True,
  'hog_channel': 'ALL',
  'hog_feat': True}]

In [50]:
%pdb off
import copy
model_configurations = []
model_number = 101
for option in options:
    #try:
    t=time.time()
    model_config = copy.deepcopy(option)
    model_config['version'] = model_number
    model_config.update(static_options)
    model_configurations.append(model_config)
    notcar_features = extract_features(notcars, **model_config)
    car_features = extract_features(cars, **model_config)
    X = np.vstack((car_features, notcar_features)).astype(np.float64)                        
    # Fit a per-column scaler
    X_scaler = StandardScaler().fit(X)
    # Apply the scaler to X
    scaled_X = X_scaler.transform(X)
    # Define the labels vector
    y = np.hstack((np.ones(len(car_features)), np.zeros(len(notcar_features))))
    # Split up data into randomized training and test sets
    rand_state = np.random.randint(0, 100)
    X_train, X_test, y_train, y_test = train_test_split(
        scaled_X, y, test_size=0.2, random_state=rand_state)
    print('-' * 20)
    print(model_config)
    print('Model version', model_number)
    print('Feature vector length:', len(X_train[0]))
    # Use a linear SVC 
    svc = LinearSVC()
    # Check the training time for the SVC
    svc.fit(X_train, y_train)
    t2 = time.time()
    print(round(t2-t, 2), 'Seconds to train SVC...')
    # Check the score of the SVC
    test_accuracy = round(svc.score(X_test, y_test), 4)
    print('Test Accuracy of SVC = ', test_accuracy)
    model_config['test_accuracy'] = test_accuracy
    # Check the prediction time for a single sample
    t=time.time()
    model_configurations.append(model_config)
    joblib.dump(svc, 'models/model_{}.pkl'.format(model_number))
    joblib.dump(X_scaler, 'models/X_scaler_{}.pkl'.format(model_number))
    with open('models/model_config_{}.json'.format(model_number), 'w') as f:
        json.dump(model_config, f)
    #except Exception:
    #    pass
    model_number = model_number + 1

Automatic pdb calling has been turned OFF


/Users/krishnakumarselvam/miniconda3/envs/carnd-term1/lib/python3.5/site-packages/skimage/feature/_hog.py:119: skimage_deprecation: Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15
  'be changed to `L2-Hys` in v0.15', skimage_deprecation)


--------------------
{'color_space': 'HLS', 'orient': 9, 'spatial_feat': True, 'version': 101, 'hog_feat': True, 'hog_channel': 'ALL', 'cell_per_block': 2, 'hist_bins': 16, 'spatial_size': (16, 16), 'pix_per_cell': 8, 'hist_feat': True}
Model version 101
Feature vector length: 6108
184.16 Seconds to train SVC...
Test Accuracy of SVC =  0.9877
