In [11]:
import glob
import cv2
import pickle
import numpy as np
import matplotlib.image as mpimg
from skimage.feature import hog
from tqdm import *
from lesson_functions import *

In this notebook an SVM classifier for vehicles is going to be trained.

## Load images

In [2]:
# Append vehicle data to a list
vehicles_data = []
files = glob.glob ("./data/vehicles/**/*.png")
for myFile in tqdm(files):
    image = mpimg.imread (myFile)
    vehicles_data.append(image)

100%|██████████| 8792/8792 [00:32<00:00, 272.14it/s]


In [3]:
# Append non-vehicle data to a list
non_vehicles_data = []
files = glob.glob ("./data/non-vehicles/**/*.png")
for myFile in tqdm(files):
    image = mpimg.imread (myFile)
    non_vehicles_data.append(image)

100%|██████████| 8968/8968 [00:30<00:00, 297.40it/s]


## Extract features from images

In [9]:
orient = 9
pix_per_cell = 8
cell_per_block = 2
spatial_size = (32, 32)
hist_bins = 32

In [16]:
trans_img = convert_color(vehicles_data[0], conv='RGB2YCrCb')
# Separate channels
ch1 = trans_img[:,:,0]
ch2 = trans_img[:,:,1]
ch3 = trans_img[:,:,2]
hog1 = get_hog_features(ch1, orient, pix_per_cell, cell_per_block, feature_vec=False)
hog2 = get_hog_features(ch2, orient, pix_per_cell, cell_per_block, feature_vec=False)
hog3 = get_hog_features(ch3, orient, pix_per_cell, cell_per_block, feature_vec=False)
hog_features = np.hstack((hog1.ravel(), hog2.ravel(), hog3.ravel()))
spatial_features = bin_spatial(trans_img, size=spatial_size)
hist_features = color_hist(trans_img, nbins=hist_bins)
train_features = np.hstack((spatial_features, hist_features, hog_features)).reshape(1, -1)

/home/guillermo/anaconda3/envs/carnd-term1/lib/python3.5/site-packages/skimage/feature/_hog.py:119: skimage_deprecation: Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15
  'be changed to `L2-Hys` in v0.15', skimage_deprecation)


In [19]:
train_features = []
for img in tqdm(vehicles_data + non_vehicles_data):
    trans_img = convert_color(img, conv='RGB2YCrCb')
    # Separate channels
    ch1 = trans_img[:,:,0]
    ch2 = trans_img[:,:,1]
    ch3 = trans_img[:,:,2]
    hog1 = get_hog_features(ch1, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog2 = get_hog_features(ch2, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog3 = get_hog_features(ch3, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog_features = np.hstack((hog1.ravel(), hog2.ravel(), hog3.ravel()))
    spatial_features = bin_spatial(trans_img, size=spatial_size)
    hist_features = color_hist(trans_img, nbins=hist_bins)
    train_features.append(np.hstack((spatial_features, hist_features, hog_features)).reshape(1, -1))
    
X = np.vstack(train_features).astype(np.float64)
y = np.hstack((np.ones(len(vehicles_data)), np.zeros(len(non_vehicles_data))))

  0%|          | 0/17760 [00:00<?, ?it/s]/home/guillermo/anaconda3/envs/carnd-term1/lib/python3.5/site-packages/skimage/feature/_hog.py:119: skimage_deprecation: Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15
  'be changed to `L2-Hys` in v0.15', skimage_deprecation)
100%|██████████| 17760/17760 [10:11<00:00, 29.04it/s]


Subset of data to train the SVM.

In [21]:
np.random.seed(48)
numsamples = 7000
arr = np.arange(len(X))
np.random.shuffle(arr)
Xs = X[arr[:numsamples]]
ys = y[arr[:numsamples]]

In [22]:
from sklearn import svm
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit

svr = svm.SVC(kernel='linear')
param_grid = dict(svr__C = [0.1, 1, 10])
pipeline = Pipeline([('scaling', StandardScaler()), ('svr',svr)])
clf = GridSearchCV(pipeline,param_grid=param_grid,cv=ShuffleSplit(train_size = 0.8, random_state=0), n_jobs=-1)
clf.fit(Xs,ys)

GridSearchCV(cv=ShuffleSplit(n_splits=10, random_state=0, test_size=0.1, train_size=0.8),
       error_score='raise',
       estimator=Pipeline(steps=[('scaling', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svr', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))]),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'svr__C': [0.1, 1, 10]}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score=True, scoring=None, verbose=0)

In [23]:
clf.best_score_

0.99271428571428566

In [24]:
clf.score(X[arr[numsamples:]], y[arr[numsamples:]])

0.98959107806691449

In [25]:
import pickle
pickle.dump(clf,open('svc.p','wb'))