In [26]:
# Necessary imports

import numpy as np
import cv2
import glob
import pickle
import time
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm

from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from skimage.feature import hog

from helper_functions import *

In [27]:
def extract_features(imgs, params):
    if not params:
        raise Exception('ERROR: Please provide a valid params dict')
        
    features = []
    for img_path in tqdm(imgs):
        img = mpimg.imread(img_path)
        
        features.append(single_img_features(
            img,
            color_space=params['color_space'],
            spatial_size=params['spatial_size'],
            hist_bins=params['hist_bins'],
            orient=params['orient'],
            pix_per_cell=params['pix_per_cell'],
            cell_per_block=params['cell_per_block'],
            hog_channel=params['hog_channel'],
            spatial_feat=params['spatial_feat'],
            hist_feat=params['hist_feat'],
            hog_feat=params['hog_feat']
        ))
        
    return features

In [29]:
### Feature Extraction
params = {
    'color_space': 'HSV',   # Can be RGB, HSV, LAB, HLS, YUV, YCrCb
    'orient': 12,             # HOG orientations
    'pix_per_cell': 8,        # HOG pixels per cell
    'cell_per_block': 2,      # HOG cells per block
    'spatial_size': (24, 24), # Spatial binning dimensions
    'hist_bins': 24,          # Number of histogram bins
    'hog_channel': 'ALL',     # Can be 0, 1, 2, or "ALL"
    'spatial_feat': True,    # Spatial features on or off
    'hist_feat': True,        # Histogram features on or off
    'hog_feat': True,         # HOG features on or off
}
     
# Load pickled raw data set
with open('rawdata.p', mode='rb') as f:
    data = pickle.load(f)
    
cars_train = data['cars_train']
notcars_train = data['notcars_train']

cars_valid = data['cars_valid']
notcars_valid = data['notcars_valid']

cars_test = data['cars_test']
notcars_test = data['notcars_test']

# Extract features
print('Extracting features...')
start = time.time()

cars_features_train = extract_features(cars_train, params)
notcars_features_train = extract_features(notcars_train, params)

cars_features_valid = extract_features(cars_valid, params)
notcars_features_valid = extract_features(notcars_valid, params)

cars_features_test = extract_features(cars_test, params)
notcars_features_test = extract_features(notcars_test, params)

end = time.time()
print('Time taken to extract features (Spatial, Color Hist, HOG): {}'.format(end-start))

Extracting features...



/Users/sid/anaconda/envs/carnd-term1/lib/python3.5/site-packages/skimage/feature/_hog.py:119: skimage_deprecation: Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15
  'be changed to `L2-Hys` in v0.15', skimage_deprecation)








Time taken to extract features (Spatial, Color Hist, HOG): 100.40632796287537


In [30]:
### Data set exploratory visualisation and summarisation

print('Length of feature vector: {}'.format(len(cars_features_train[0])))

Length of feature vector: 8856


In [31]:
### Pre-processing

X_train = np.vstack([cars_features_train, notcars_features_train]).astype(np.float64) 
X_valid = np.vstack([cars_features_valid, notcars_features_valid]).astype(np.float64) 
X_test = np.vstack([cars_features_test, notcars_features_test]).astype(np.float64) 

y_train = np.hstack([np.ones(len(cars_features_train)), np.zeros(len(notcars_features_train))])
y_valid = np.hstack([np.ones(len(cars_features_valid)), np.zeros(len(notcars_features_valid))])
y_test = np.hstack([np.ones(len(cars_features_test)), np.zeros(len(notcars_features_test))])

scaler = StandardScaler().fit(X_train)

X_train = scaler.transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

# Sanity check
assert(len(X_train) == len(y_train))
assert(len(X_valid) == len(y_valid))
assert(len(X_test) == len(y_test))

X_train, y_train = shuffle(X_train, y_train)
X_valid, y_valid = shuffle(X_valid, y_valid)
X_test, y_test = shuffle(X_test, y_test)

In [32]:
### Define and implement model architecture
### Training and evaluation pipeline
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint

clf = LinearSVC()
# parameters = {'C': sp_randint(0, 20)}
# clf = RandomizedSearchCV(lsvc, param_distributions=parameters)

print('Starting training...')
start = time.time()
clf.fit(X_train, y_train)
end = time.time()
print('Training took: {}'.format(end-start))

print('Training accuracy: {:.4f}'.format(clf.score(X_train, y_train)))
print('Validation accuracy: {:.4f}'.format(clf.score(X_valid, y_valid)))

Starting training...
Training took: 4.363361120223999
Training accuracy: 1.0000
Validation accuracy: 0.9910


In [33]:
### Testing accuracy
from sklearn.metrics import confusion_matrix
import pandas as pd

print('Test accuracy: {:.4f}'.format(clf.score(X_test, y_test)))
print()
preds = clf.predict(X_test)
df = pd.DataFrame(confusion_matrix(preds, y_test))
print('Confusion Matrix:')
df

Test accuracy: 0.9691

Confusion Matrix:


Unnamed: 0,0,1
0,863,21
1,34,860


In [35]:
### Once happy, pickle the data
try:
    with open('classifier_data.p', mode='wb') as f:
        pickle.dump({
            'clf': clf,
            'scaler': scaler,
            'orient': params['orient'],
            'pix_per_cell': params['pix_per_cell'],
            'cell_per_block': params['cell_per_block'],
            'spatial_size': params['spatial_size'],
            'hist_bins': params['hist_bins'],
            'color_space': params['color_space']
        }, f)
except Exception as e:
    print('ERROR: Failed to pickle the classifier and its params with exception: {}'.format(e))
    
print('Successfully pickled the classifier data!')

Successfully pickled the classifier data!
