# Vehicle Detection
[![Udacity - Self-Driving Car NanoDegree](https://s3.amazonaws.com/udacity-sdc/github/shield-carnd.svg)](http://www.udacity.com/drive)

This project implements a software pipeline to detect vehicles in a video.  

The Project
---

The goals / steps of this project are the following:

* Perform a Histogram of Oriented Gradients (HOG) feature extraction on a labeled training set of images and train a classifier Linear SVM classifier
* Optionally, you can also apply a color transform and append binned color features, as well as histograms of color, to your HOG feature vector. 
* Note: for those first two steps don't forget to normalize your features and randomize a selection for training and testing.
* Implement a sliding-window technique and use your trained classifier to search for vehicles in images.
* Run your pipeline on a video stream (start with the test_video.mp4 and later implement on full project_video.mp4) and create a heat map of recurring detections frame by frame to reject outliers and follow detected vehicles.
* Estimate a bounding box for vehicles detected.

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
import importlib
import numpy as np
from sklearn import svm
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from scipy.ndimage.measurements import label
from tqdm import tqdm
import time
import glob
import itertools
import math
import collections
from moviepy.editor import VideoFileClip
from IPython.display import HTML

%matplotlib inline

np.random.seed(12061979) # make random number generation repeatable

## Image Plotting

In [None]:
def plot_images_grid(images, labels=[],axis_on=False):
    grid_cols = 8 if len(images) >= 16 else 4 if len(images) >= 8 else 2 if len(images) >=2 else 1
    
    grid_rows = math.ceil(len(images)/float(grid_cols))
    fig_height_inches = math.ceil((16/float(grid_cols))*grid_rows)
    fig, axes = plt.subplots(grid_rows, grid_cols, figsize=(16, fig_height_inches))
    fig.subplots_adjust(wspace=0.001, hspace=0.2)
    
    for ax, image, label in itertools.zip_longest(axes.ravel(), images, labels):
        if image is not None:
            ax.imshow(image)
            if label is not None:
                ax.set_title(label)
        if not axis_on:
            ax.axis('off')


## Load Test Images

In [None]:
import glob
image_files = glob.glob('./test_images/*.jpg')
imgs_test = []
for img_file in image_files:
    imgs_test.append(mpimg.imread(img_file))
img_test = imgs_test[0]
plt.imshow(img_test)

## Load Training Data Set
Data set images are loaded into memory in order to speed up feature extraction necessary for
e.g. hog parameter exploration.

In [None]:
vehicle_img_files = glob.glob('training_dataset/vehicles/**/*.png')
nonvehicle_img_files = glob.glob('training_dataset/non-vehicles/**/*.png')
print("vehicles: " + str(len(vehicle_img_files)), ", non vehicles: " + str(len(nonvehicle_img_files)))

def load_img(file):
    return mpimg.imread(file)

vehicle_imgs = [load_img(file) for file in tqdm(vehicle_img_files)]
nonvehicle_imgs = [load_img(file) for file in tqdm(nonvehicle_img_files)]

The number of positive and negative training images in the data set is nearly equal and thus sufficiently balanced already - no need to add / remove samples for a category.

## Vehicle Images from Training Data Set

In [None]:
plot_images_grid([vehicle_imgs[i] for i in np.random.randint(0,len(vehicle_imgs),(32))])

## Non Vehicle Images from Training Data Set

In [None]:
plot_images_grid([nonvehicle_imgs[i] for i in np.random.randint(0,len(nonvehicle_imgs),(32))])

## HOG Feature Extraction

In [None]:
def convert_color_from_rgb(img, color_space_dst):
    if color_space_dst == 'RGB':
        return img
    elif color_space_dst == 'HSV':
        return cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    elif color_space_dst == 'LUV':
        return cv2.cvtColor(img, cv2.COLOR_RGB2LUV)
    elif color_space_dst == 'HLS':
        return cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
    elif color_space_dst == 'YUV':
        return cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
    elif color_space_dst == 'YCrCb':
        return cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)

In [None]:
def hog_features(img, orientation, pixels_per_cell, cells_per_block, visualise):
    features, hog_image = hog(
        img, 
        orientations=orientation, 
        pixels_per_cell=(pixels_per_cell, pixels_per_cell),
        cells_per_block=(cells_per_block, cells_per_block), 
        transform_sqrt=False, 
        visualise=visualise, 
        feature_vector=True)
    return features, hog_image

def hog_visualize(img):
    img_conv = convert_color_from_rgb(img, 'YUV')
    _, img_hog = hog_features(img_conv[:,:,0], orientation=8, pixels_per_cell=8, 
                              cells_per_block=2, visualise=True)
    return img_hog
    
vehicle_img = vehicle_imgs[0]
vehicle_hog = hog_visualize(vehicle_img)

nonvehicle_img = nonvehicle_imgs[1]
nonvehicle_hog = hog_visualize(nonvehicle_img)

# plot original and hog features for vehicle and non vehicle image
f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(7,7))
f.subplots_adjust(hspace = .4, wspace=.2)
ax1.imshow(vehicle_img)
ax1.set_title('vehicle image', fontsize=16)
ax2.imshow(vehicle_hog, cmap='gray')
ax2.set_title('vehicle hog', fontsize=16)
ax3.imshow(nonvehicle_img)
ax3.set_title('non vehicle image', fontsize=16)
ax4.imshow(nonvehicle_hog, cmap='gray')
ax4.set_title('non vehicle hog', fontsize=16)


## Spatial bins

In [None]:
def bin_spatial(img, size=(32, 32)):
    color1 = cv2.resize(img[:,:,0], size).ravel()
    color2 = cv2.resize(img[:,:,1], size).ravel()
    color3 = cv2.resize(img[:,:,2], size).ravel()
    return np.hstack((color1, color2, color3))

## Color Histogram

In [None]:
def color_hist(img, nbins=32):    #bins_range=(0, 256)
    # Compute the histogram of the color channels separately
    channel1_hist = np.histogram(img[:,:,0], bins=nbins)
    channel2_hist = np.histogram(img[:,:,1], bins=nbins)
    channel3_hist = np.histogram(img[:,:,2], bins=nbins)
    # Concatenate the histograms into a single feature vector
    hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
    # Return the individual histograms, bin_centers and feature vector
    return hist_features

def color_hist_explore(img, axes):
    axes[0].imshow(img)
    cspace = 'RGB'
    cimg = convert_color_from_rgb(img, cspace)
    for i in range(3):
        ax = axes[1+i]
        channel_histogram = np.histogram(cimg[:,:,i], bins=16)
        ax.hist(channel_histogram)
        ax.set_title(cspace[i])

# plot original and hog features for vehicle and non vehicle image
f, (axes1, axes2) = plt.subplots(2, 4, figsize=(14,7))
f.subplots_adjust(hspace = .4, wspace=.2)
color_hist_explore(vehicle_imgs[70], axes1)
color_hist_explore(nonvehicle_imgs[3], axes2)

## Feature Extraction for Training Data Set

In [None]:
class FeatureExtractionParams(object):
    def __init__(self, hog_color_space, hog_orientation, hog_pixels_per_cell, hog_cells_per_block, hog_channels, 
                 color_hist_bins=16, color_hist_cspace='RGB', spat_bins=32):
        self.hog_color_space = hog_color_space 
        self.hog_orientation = hog_orientation
        self.hog_pixels_per_cell = hog_pixels_per_cell
        self.hog_cells_per_block = hog_cells_per_block
        self.hog_channels=hog_channels
        self.color_hist_bins = color_hist_bins
        self.color_hist_cspace = color_hist_cspace
        self.spat_bins = spat_bins
        
    def __str__(self):
        return ("FeatureExtractionParams[orientation=" + str(self.hog_orientation) + 
            ", color space=" + self.hog_color_space +
            ", channels=" + str(self.hog_channels) +
            ", pixels per cell=" + str(self.hog_pixels_per_cell) + 
            ", cells per block=" + str(self.hog_cells_per_block) + 
            ", color histogram bins=" + str(self.color_hist_bins) +
            ", color histogram color space=" + self.color_hist_cspace +
            ", spatial bins=" + self.spat_bins +
            "]")


In [None]:
def hog_features_for_image(img_channel, params: FeatureExtractionParams, feature_vec=True):
    return hog(img_channel, 
        orientations=params.hog_orientation, 
        pixels_per_cell=(params.hog_pixels_per_cell, params.hog_pixels_per_cell),
        cells_per_block=(params.hog_cells_per_block, params.hog_cells_per_block), 
        transform_sqrt=False, 
        visualise=False, 
        feature_vector=feature_vec)

In [None]:
def features_for_image(img, params: FeatureExtractionParams, hog_channel_features=None):
    features = []
    if hog_channel_features is None:
        img_for_hog = convert_color_from_rgb(img, params.hog_color_space)
        for channel in params.hog_channels:
            channel_features = hog_features_for_image(img_for_hog[:,:,channel], params)
            features.append(channel_features)
    else:
        features.append(hog_channel_features)
        
    if params.color_hist_bins > 0:
        img_for_hist = convert_color_from_rgb(img, params.color_hist_cspace)
        features.append(color_hist(img_for_hist))
        
    if params.spat_bins > 0:
        features.append(bin_spatial(img, (params.spat_bins, params.spat_bins)))
        
    return np.concatenate(features)


In [None]:
# Duplicated and adapted from udacity lesson code
def features_for_image_list(imgs, params: FeatureExtractionParams):
    features_list = []
    for img in tqdm(imgs):
        features_list.append(features_for_image(img, params))
        # augment data set by flipping images
        features_list.append(features_for_image(cv2.flip(img,1), params))
    return features_list

In [None]:
def build_dataset(params: FeatureExtractionParams):
    vehicles_features = features_for_image_list(vehicle_imgs, params)
    nonvehicles_features = features_for_image_list(nonvehicle_imgs, params)

    # float64 for X is needed because StandardScaler expects it
    X = np.vstack((vehicles_features, nonvehicles_features)).astype(np.float64)  
    y = np.hstack((np.ones(len(vehicles_features)), np.zeros(len(nonvehicles_features))))
    
    return X, y

## Parameter Exploration for SVM Training

In [None]:
def train_svm(X_train, y_train, X_test, y_test):
    svc = LinearSVC()
    svc.fit(X_train, y_train)
    return svc, svc.score(X_test, y_test)
    

In [None]:
def explore_feature_extraction_params(params_list):
    rand_state = 33
    for params in params_list:
        t_start = time.time()
        X, y = build_dataset(params)
        t_feature_extraction = time.time() - t_start

        # split into train and test set
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=rand_state)

        # scale features
        X_scaler = StandardScaler().fit(X_train)
        X_train = X_scaler.transform(X_train)
        X_test = X_scaler.transform(X_test)
        
        t_train_start = time.time()
        _, score = train_svm(X_train, y_train, X_test, y_test)
        t_training = time.time() - t_train_start
        
        img_test_features = features_for_image(img_test, params)

        print("score=" + "{0:.2f}".format(score) + " for " + str(params) +
             " in " + "{0:.1f}".format(t_feature_extraction) + " secs feature extraction and " + 
             "{0:.1f}".format(t_training) + " secs training with " + str(len(img_test_features)) + " features/img")

explore_params = []
explore_params.append(
    FeatureExtractionParams(
        hog_color_space = 'LUV',
        hog_orientation = 8,
        hog_pixels_per_cell = 8,
        hog_cells_per_block = 2,
        hog_channels=[0])
)
"""
FeatureExtractionParams(
    hog_color_space = 'HSV',
    hog_orientation = 9,
    hog_pixels_per_cell = 8,
    hog_cells_per_block = 2,
    hog_channels=[2]),
FeatureExtractionParams(
    hog_color_space = 'HSV',
    hog_orientation = 9,
    hog_pixels_per_cell = 8,
    hog_cells_per_block = 2,
    hog_channels=[0,1,2]),
FeatureExtractionParams(
    hog_color_space = 'HLS',
    hog_orientation = 9,
    hog_pixels_per_cell = 8,
    hog_cells_per_block = 2,
    hog_channels=[2]),
FeatureExtractionParams(
    hog_color_space = 'HLS',
    hog_orientation = 9,
    hog_pixels_per_cell = 8,
    hog_cells_per_block = 2,
    hog_channels=[0,1,2]),
FeatureExtractionParams(
    hog_color_space = 'YUV',
    hog_orientation = 9,
    hog_pixels_per_cell = 8,
    hog_cells_per_block = 2,
    hog_channels=[0]),
FeatureExtractionParams(
    hog_color_space = 'YUV',
    hog_orientation = 9,
    hog_pixels_per_cell = 8,
    hog_cells_per_block = 2,
    hog_channels=[0,1,2]),
FeatureExtractionParams(
    hog_color_space = 'YCrCb',
    hog_orientation = 9,
    hog_pixels_per_cell = 8,
    hog_cells_per_block = 2,
    hog_channels=[0,1,2]),
FeatureExtractionParams(
        hog_color_space = 'YUV',
        hog_orientation = 11,
        hog_pixels_per_cell = 16,
        hog_cells_per_block = 2,
        hog_channels=[0,1,2]),
FeatureExtractionParams(
        hog_color_space = 'YUV',
        hog_orientation = 11,
        hog_pixels_per_cell = 16,
        hog_cells_per_block = 2,
        hog_channels=[0])
"""
#explore_feature_extraction_params(explore_params)



|  No. |  Accuracy | Orientations | Color Space  | Channels  | Pixels per Cell  | Cells per Block | Color Histogram Bins | Time Feature Extraction | Time Training | Features/Img | Augmented? |
|---|---|---|---|---|---|---|
|  1 |  0.96 | 9  | HSV  |  2 | 8  | 2  | - | 22.5 | 5.2 | ? | n |
|  2 |  0.98 | 9  | HSV  |  0,1,2 | 8  | 2  | - | 131.7 | 9 | ? | n |
|  3 |  0.91 | 9  | HLS  |  2 | 8  | 2  | - | 39.6 | 7.2 | ? | n |
|  4 |  0.98 | 9  | HLS  |  0,1,2 | 8  | 2  | - | 72.3 | 19.6 | ? | n |
|  5 |  0.96 | 9  | YUV  |  0 | 8  | 2  | - | 36.6 | 5.1 | ? | n |
|  6 |  0.98 | 9  | YUV  |  0,1,2 | 8  | 2  | - | 112.4 | 8.3 | ? | n |
|  7 |  0.96 | 9  | YCrCb  |  0 | 8  | 2  | - | 35.6 | 5.1 | ? | n |
|  8 |  0.98 | 9  | YCrCb  |  0,1,2 | 8  | 2  | - | 114.8 | 8.6 | ? | n |
|  9 |  0.97 | 11  | YUV  |  0,1,2 | 16  | 2  | - | 49.3 | 4.5 | ? | n |
|  10 |  0.96 | 11  | YUV  |  0 | 16  | 2  | - | 22.6 | 9.3 | ? | n |
|  11 |  0.91 | 11  | HLS  |  0 | 16  | 2  | - | 18.1 | 13 | ? | n |
|  12 |  0.92 | 16  | HLS  |  0 | 8  | 2  | - | 32.6 | 21 | ? | n |
|  13 |  0.98 | 16  | HSV  |  1,2 | 8 | 2  | - | 105.7 | 31.9 | ? | n |
|  14 |  0.96 | 8  | HSV  |  1,2 | 8 | 2  | - | 55.2 | 15.4 | ? | n |
|  15 |  0.95 | 8  | YUV  |  0 | 8 | 2  | - | 24.9 | 10.3 | ? | n |
|  16 |  0.95 | 8  | LUV  |  0 | 8 | 2  | - | 26.0 | 10.9 | ? | n |
|  17 |  0.94 | 8  | LUV  |  0 | 8 | 2  | - | 102.4 | 40.8 | ? | y |
|  18 |  0.96 | 8  | YUV  |  0 | 8 | 2  | 32 | ? | 21.0 | 1664 | y |

## Train Linear SVM with chosen feature extraction parameters 

In [None]:
feature_extraction_params = FeatureExtractionParams(
    hog_color_space = 'YUV',
    hog_orientation = 8,
    hog_pixels_per_cell = 8,
    hog_cells_per_block = 2,
    hog_channels=[0],
    color_hist_bins=32,
    color_hist_cspace='RGB',
    spat_bins=0)

print("feature count per image: " + str(len(features_for_image(vehicle_imgs[0], feature_extraction_params))))

X, y = build_dataset(feature_extraction_params)
print("data set size (images): " + str(len(X)))

In [None]:
# split data set into train and test sub sets and also suffle it
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("feature count per image: " + str(len(X[0])))

In [None]:
# scale features
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Duplicated and adapted from udacity lesson code

# Use a linear SVC 
svc = LinearSVC()
# Check the training time for the SVC
print('Feature vector length:', len(X_train_scaled[0]))
t = time.time()
svc.fit(X_train_scaled, y_train)
t2 = time.time()
print(round(t2-t, 2), 'Seconds to train SVC...')
# Check the score of the SVC
print('Test Accuracy of SVC = ', round(svc.score(X_test_scaled, y_test), 4))
# Check the prediction time for a single sample
t=time.time()
n_predict = 10
print('My SVC predicts: ', svc.predict(X_test_scaled[0:n_predict]))
print('For these',n_predict, 'labels: ', y_test[0:n_predict])
t2 = time.time()
print(round(t2-t, 5), 'Seconds to predict', n_predict,'labels with SVC')

## Sliding Windows

In [None]:
# Duplicated from udacity lesson code
def draw_boxes(img, bboxes, colors = [(0,255,0),(0,0,255)], thick=3):
    # Make a copy of the image
    imcopy = np.copy(img)
    # Iterate through the bounding boxes
    for idx,bbox in enumerate(bboxes):
        # Draw a rectangle given bbox coordinates
        cv2.rectangle(imcopy, bbox[0], bbox[1], colors[idx%len(colors)], thick)
    # Return the image copy with boxes drawn
    return imcopy

In [None]:
# Duplicated from udacity lesson code

# Define a function that takes an image,
# start and stop positions in both x and y, 
# window size (x and y dimensions),  
# and overlap fraction (for both x and y)
def slide_window(img, x_start_stop=[None, None], y_start_stop=[None, None], 
                    xy_window=(64, 64), xy_overlap=(0.5, 0.5)):
    # If x and/or y start/stop positions not defined, set to image size
    if x_start_stop[0] == None:
        x_start_stop[0] = 0
    if x_start_stop[1] == None:
        x_start_stop[1] = img.shape[1]
    if y_start_stop[0] == None:
        y_start_stop[0] = 0
    if y_start_stop[1] == None:
        y_start_stop[1] = img.shape[0]
    # Compute the span of the region to be searched    
    xspan = x_start_stop[1] - x_start_stop[0]
    yspan = y_start_stop[1] - y_start_stop[0]
    # Compute the number of pixels per step in x/y
    nx_pix_per_step = np.int(xy_window[0]*(1 - xy_overlap[0]))
    ny_pix_per_step = np.int(xy_window[1]*(1 - xy_overlap[1]))
    # Compute the number of windows in x/y
    nx_buffer = np.int(xy_window[0]*(xy_overlap[0]))
    ny_buffer = np.int(xy_window[1]*(xy_overlap[1]))
    nx_windows = np.int((xspan-nx_buffer)/nx_pix_per_step) 
    ny_windows = np.int((yspan-ny_buffer)/ny_pix_per_step) 
    # Initialize a list to append window positions to
    window_list = []
    # Loop through finding x and y window positions
    # Note: you could vectorize this step, but in practice
    # you'll be considering windows one by one with your
    # classifier, so looping makes sense
    for ys in range(ny_windows):
        for xs in range(nx_windows):
            # Calculate window position
            startx = xs*nx_pix_per_step + x_start_stop[0]
            endx = startx + xy_window[0]
            starty = ys*ny_pix_per_step + y_start_stop[0]
            endy = starty + xy_window[1]
            # Append window position to list
            window_list.append(((startx, starty), (endx, endy)))
    # Return the list of windows
    return window_list

In [None]:
windows_layers = []
y_start_stop = (350, 700)
windows_layers.append(
    slide_window(img_test, x_start_stop=[None, None], y_start_stop=(400, 496), 
                 xy_window=(64, 64), xy_overlap=(0.75, 0.75)))
windows_layers.append(
    slide_window(img_test, x_start_stop=[None, None], y_start_stop=(370, 514), 
                 xy_window=(96, 96), xy_overlap=(0.75, 0.75)))
windows_layers.append(
    slide_window(img_test, x_start_stop=[None, None], y_start_stop=(370, 562), 
                 xy_window=(128, 128), xy_overlap=(0.75, 0.75)))
windows_layers.append(
    slide_window(img_test, x_start_stop=[None, None], y_start_stop=(370, 658), 
                 xy_window=(192, 192), xy_overlap=(0.75, 0.75)))
windows_layers.append(
    slide_window(img_test, x_start_stop=[None, None], y_start_stop=(370, 700), 
                 xy_window=(256, 256), xy_overlap=(0.75, 0.75)))

windows_imgs = [draw_boxes(img_test, windows_layer) for windows_layer in windows_layers]
plot_images_grid(windows_imgs, 
                 ['windows' + str(i) for i in range(5)],
                axis_on=True)

#cv2.imwrite("./output_images/windows.jpg", window_img)
windows = []
for windows_layer in windows_layers:
    for window in windows_layer:
        windows.append(window)
print("window count: " + str(len(windows)))

## Search Vehicles in Windows (HOG on every window)

In [None]:
# Duplicated and adapted from udacity lesson code
def search_windows(img, windows, classifier, feature_extraction_params):

    #1) Create an empty list to receive positive detection windows
    on_windows = []
    
    #2) Iterate over all windows in the list
    for window in windows:
        #3) Extract the test window from original image
        test_img = cv2.resize(img[window[0][1]:window[1][1], window[0][0]:window[1][0]], (64, 64))      
        #4) Extract features for that window using single_img_features()
        features = features_for_image(test_img, feature_extraction_params)
        #5) Scale extracted features to be fed to classifier
        features_scaled = X_scaler.transform(np.array(features).reshape(1, -1))
        #6) Predict using your classifier
        prediction = classifier.predict(np.array(features_scaled).reshape(1, -1))
        #7) If positive (prediction == 1) then save the window
        if prediction == 1:
            on_windows.append(window)
    #8) Return windows for positive detections
    return on_windows

In [None]:
def bbox_img(img):
    bboxes = search_windows(img, windows, svc, feature_extraction_params)
    return draw_boxes(img, bboxes)                    

plot_images_grid([bbox_img(img) for img in imgs_test])

## Search Vehicles in Windows (HOG once per full image)

In [None]:
# Duplicated and adapted from udacity lesson code
def find_cars(img, svc, feature_extraction_params, scale, ystart, ystop, all_windows=False):
    
    draw_img = np.copy(img)
    #img = img.astype(np.float32)/255
    
    img_tosearch = img[ystart:ystop,:,:]
    ctrans_tosearch = convert_color_from_rgb(img_tosearch, feature_extraction_params.hog_color_space)
    
    if scale != 1:
        imshape = ctrans_tosearch.shape
        ctrans_tosearch = cv2.resize(ctrans_tosearch, (np.int(imshape[1]/scale), np.int(imshape[0]/scale)))
        img_tosearch = cv2.resize(img_tosearch, (np.int(imshape[1]/scale), np.int(imshape[0]/scale)))
    
    hog_features = []
    for channel in feature_extraction_params.hog_channels:
        channel_features = hog_features_for_image(ctrans_tosearch[:,:,channel], 
                                                  feature_extraction_params, feature_vec=False)
        hog_features.append(channel_features)

    # Define blocks and steps as above
    pix_per_cell = feature_extraction_params.hog_pixels_per_cell
    cell_per_block = feature_extraction_params.hog_cells_per_block
    nxblocks = (ctrans_tosearch.shape[1] // pix_per_cell) - cell_per_block + 1
    nyblocks = (ctrans_tosearch.shape[0] // pix_per_cell) - cell_per_block + 1 
    nfeat_per_block = feature_extraction_params.hog_orientation*cell_per_block**2
    
    # 64 was the orginal sampling rate, with 8 cells and 8 pix per cell
    window = 64
    nblocks_per_window = (window // pix_per_cell) - cell_per_block + 1
    cells_per_step = 2  # Instead of overlap, define how many cells to step
    nxsteps = (nxblocks - nblocks_per_window) // cells_per_step + 1
    nysteps = (nyblocks - nblocks_per_window) // cells_per_step + 1
    
    bboxes = []
    for xb in range(nxsteps):
        for yb in range(nysteps):
            ypos = yb*cells_per_step
            xpos = xb*cells_per_step
            xleft = xpos*pix_per_cell
            ytop = ypos*pix_per_cell
            
            # Extract HOG for this patch
            hog_features_patch = []
            for hog_channel_features in hog_features:
                hog_features_patch.append(
                    hog_channel_features[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel())
            hog_features_patch = np.concatenate(hog_features_patch)
            
            # Extract the image patch
            subimg = cv2.resize(img_tosearch[ytop:ytop+window, xleft:xleft+window], (64,64))
            
            features_patch = features_for_image(subimg, feature_extraction_params, hog_features_patch)

            # Scale features and make a prediction
            # TODO float64 for scaler
            test_features = X_scaler.transform(features_patch.reshape(1, -1))
            #test_features = X_scaler.transform(np.hstack((hog_features, hist_features)).reshape(1, -1))    
            #test_features = X_scaler.transform(np.hstack((shape_feat, hist_feat)).reshape(1, -1))    
            
            test_prediction = svc.predict(test_features)
            
            if test_prediction == 1 or all_windows:
                xbox_left = np.int(xleft*scale)
                ytop_draw = np.int(ytop*scale)
                win_draw = np.int(window*scale)
                bboxes.append(((xbox_left, ytop_draw+ystart),(xbox_left+win_draw,ytop_draw+win_draw+ystart)))
                
    return bboxes

In [None]:
def find_cars_multi_scale_layers(img, all_windows=False):
    bboxes_scales = []
    def append_bboxes(scale, ystart, ystop):
        bboxes_scales.append(find_cars(img, svc, feature_extraction_params, scale, ystart, ystop, all_windows))
    
    """
    append_bboxes(1.0, 400, 496)
    append_bboxes(1.5, 370, 514)
    append_bboxes(2.0, 370, 562)
    append_bboxes(2.5, 370, 658)
    append_bboxes(3.0, 370, 700)
    """
    
    append_bboxes(1.0, 400, 464)
    append_bboxes(1.0, 416, 480)
    append_bboxes(1.5, 400, 496)
    append_bboxes(1.5, 432, 528)
    append_bboxes(2.0, 400, 528)
    append_bboxes(2.0, 432, 560)
    append_bboxes(3.5, 400, 624)
    append_bboxes(3.5, 455, 679)
    
    return bboxes_scales

In [None]:
windows_layers = find_cars_multi_scale_layers(img_test, all_windows=True)
windows_imgs = [draw_boxes(img_test, windows_layer) for windows_layer in windows_layers]
plot_images_grid(windows_imgs, 
                 ['windows' + str(i) for i in range(len(windows_imgs))],
                axis_on=True)

for idx,img in enumerate(windows_imgs):
    cv2.imwrite("./output_images/windows-" + str(idx) + ".jpg", cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

In [None]:
def find_cars_multi_scale(img):
    bboxes_layers = find_cars_multi_scale_layers(img)
    bboxes = []
    for bboxes_layer in bboxes_layers:
        for bbox in bboxes_layer:
            bboxes.append(bbox)
    return bboxes

In [None]:
def bbox_img2(img):
    bboxes = find_cars_multi_scale(img)
    return draw_boxes(img, bboxes, colors=[(0,0,255)])                    

imgs_test_bboxes = [bbox_img2(img) for img in imgs_test]
plot_images_grid(imgs_test_bboxes)
for idx,img in enumerate(imgs_test_bboxes):
    cv2.imwrite("./output_images/imgs_test_bboxes_" + str(idx) + ".jpg", cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

## Filtering False Positives using Heatmap

In [None]:
# Duplicated and adapted from udacity lesson code

def heatmap_add(heatmap, bbox_list):
    # Iterate through list of bboxes
    for box in bbox_list:
        # Add += 1 for all pixels inside each bbox
        # Assuming each "box" takes the form ((x1, y1), (x2, y2))
        heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1

    # Return updated heatmap
    return heatmap# Iterate through list of bboxes
    
def heatmap_apply_threshold(heatmap, threshold):
    # Zero out pixels below the threshold
    heatmap[heatmap <= threshold] = 0
    # Return thresholded map
    return heatmap

def draw_labeled_bboxes(img, labels):
    # Iterate through all detected cars
    for car_number in range(1, labels[1]+1):
        # Find pixels with each car_number label value
        nonzero = (labels[0] == car_number).nonzero()
        # Identify x and y values of those pixels
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])
        # Define a bounding box based on min/max x and y
        bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
        # Draw the box on the image
        cv2.rectangle(img, bbox[0], bbox[1], (0,0,255), 6)
    # Return the image
    return img

# Add heat to each box in box list
test_heatmap = np.zeros_like(img_test[:,:,0]).astype(np.float)
test_bboxes = find_cars_multi_scale(img_test)
test_heatmap = heatmap_add(test_heatmap, test_bboxes)
    
# Apply threshold to help remove false positives
test_heat = heatmap_apply_threshold(test_heatmap, 3)

# Find final boxes from heatmap using label function
labels = label(test_heatmap)
test_img_labels = draw_labeled_bboxes(np.copy(img_test), labels)

fig = plt.figure()
plt.subplot(121)
plt.imshow(test_img_labels)
plt.title('Car Positions')
plt.subplot(122)
plt.imshow(test_heatmap, cmap='hot')
plt.title('Heat Map')
fig.tight_layout()

## Video Pipeline

In [None]:
heat_history_max_len = 3
heat_history = collections.deque([], heat_history_max_len)
def process_video_image(img):
    bboxes = find_cars_multi_scale(img)
    
    # without heatmap
    # return draw_boxes(img, vehicle_bboxes, color=(0, 0, 255), thick=6)                    

    # TODO Visualize the heatmap when displaying    
    #heatmap = np.clip(heat, 0, 255)
    
    heat = np.zeros_like(img[:,:,0])
    heat = heatmap_add(heat, bboxes)
    heat_new = np.copy(heat)
    for heat_past in heat_history:
        heat += heat_past
    heat_history.append(heat_new)
    heat = heatmap_apply_threshold(heat, 3*(heat_history_max_len+1))
    labels = label(heat)
    return draw_labeled_bboxes(img, labels)
    

## Test Video Processing

In [None]:
clip = VideoFileClip("test_video.mp4")
clip_cut = clip #clip.subclip(19, 24)
clip_augmented = clip_cut.fl_image(process_video_image)
clip_augmented.write_videofile("test_video_result.mp4", audio=False, progress_bar=True)

HTML("""
<video width="640" height="360" controls>
  <source src="{0}">
</video>
""".format("test_video_result.mp4"))

## Project Video Processing

In [None]:
clip = VideoFileClip("project_video.mp4")
clip_cut = clip #clip.subclip(19, 24)
clip_augmented = clip_cut.fl_image(process_video_image)
clip_augmented.write_videofile("project_video_result.mp4", audio=False, progress_bar=True)

HTML("""
<video width="640" height="360" controls>
  <source src="{0}">
</video>
""".format("project_video_result.mp4"))