In [221]:
import itertools
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from scipy.misc import imresize,imsave
import glob
import pickle
from skimage.feature import hog
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV,train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC,LinearSVC
from moviepy.editor import VideoFileClip
from IPython.display import HTML
from scipy.ndimage.measurements import label
from collections import deque
import random
import os

matplotlib.rcParams['figure.figsize']=[24.0,16.0]
red = (1.0,0,0)
green = (0,1.0,0)
blue = (0,0,1.0)
white = (1.0,1.0,1.0)

In [222]:
non_vehicles = '/Users/sunilsn/Desktop/self-driving-car-nanodegree/datasets/vehicle-tracking/non-vehicles/'
cars = glob.glob('/Users/sunilsn/Desktop/self-driving-car-nanodegree/datasets/vehicle-tracking/vehicles/*/*.png')
non_cars = glob.glob(non_vehicles+'*/*.png')
cutouts = glob.glob('/Users/sunilsn/Desktop/self-driving-car-nanodegree/datasets/vehicle-tracking/cutouts/cutout*.jpg')
sample_frame = '/Users/sunilsn/Desktop/self-driving-car-nanodegree/datasets/vehicle-tracking/cutouts/bbox-example-image.jpg'
project_video = '/Users/sunilsn/Desktop/self-driving-car-nanodegree/projects/CarND-Vehicle-Detection/project_video.mp4'
test_video = '/Users/sunilsn/Desktop/self-driving-car-nanodegree/projects/CarND-Vehicle-Detection/test_video.mp4'
project_video_output = '/Users/sunilsn/Desktop/self-driving-car-nanodegree/projects/CarND-Vehicle-Detection/project_video_output.mp4'
test_video_output = '/Users/sunilsn/Desktop/self-driving-car-nanodegree/projects/CarND-Vehicle-Detection/test_video_output.mp4'


num_examples = None
cars = cars[:num_examples]
non_cars = non_cars[:num_examples]
cspace = 'YCrCb' #'RGB'
#cspace = 'RGB'
spatial = 32
hist_bins = 32
hist_range = (0,1)
num_orientation_bins = 12
pix_per_cell = 8
cell_per_block = 2

In [223]:
def img_read(img_fname):
    ret = mpimg.imread(img_fname)
    if ret.dtype == np.uint8:
        ret = ret.astype(np.float32)/255
    elif ret.dtype == np.float32:
        pass
    else:
        print('unhandled image type : ',ret.dtype,img_fname)
    if np.max(ret)>1:
        print('max value in image greater than 1 ',img_fname)
    return ret

def img_write(img_fname,img):
    imsave(img_fname,img)
    
def bin_spatial(img, size):
    resized_img = cv2.resize(img, size)
    features = resized_img.ravel() 
    return features

def color_hist(img, nbins, bins_range):
    channel1_hist = np.histogram(img[:,:,0], bins=nbins, range=bins_range)
    channel2_hist = np.histogram(img[:,:,1], bins=nbins, range=bins_range)
    channel3_hist = np.histogram(img[:,:,2], bins=nbins, range=bins_range)
    hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
    return hist_features

def get_hog_features(ch, num_orientation_bins, pix_per_cell, cell_per_block, vis=False, feature_vec=True):
    return hog(ch, orientations=num_orientation_bins, pixels_per_cell=(pix_per_cell, pix_per_cell),
                   cells_per_block=(cell_per_block, cell_per_block), transform_sqrt=False, 
                   visualise=vis, feature_vector=feature_vec,block_norm='L2-Hys')
    

def convert_color(image,cspace):
    if cspace != 'RGB':
        flag = {'HSV':cv2.COLOR_RGB2HSV,'HLS':cv2.COLOR_RGB2HLS,'YUV':cv2.COLOR_RGB2YUV,
                'LUV':cv2.COLOR_RGB2LUV,'YCrCb':cv2.COLOR_RGB2YCrCb}
        converted_image = cv2.cvtColor(image,flag[cspace])
    else: converted_image = np.copy(image) 
    return converted_image

def intersects(b1,b2):
    (lx1,ly1),(rx1,ry1) = b1
    (lx2,ly2),(rx2,ry2) = b2
    if ry1 <ly2  or ly1>ry2 or rx1<lx2 or rx2<lx1:
        return False
    else:
        return True

def gen_images(image,augment_dir,num_images=100,exclude_bboxes=[],size=64,image_bbox=None):
    try:
        os.mkdir(augment_dir)
    except:
        print('error creating directory')
    if image_bbox is not None:
        (x1,y1),(x2,y2) = image_bbox
        cimg = image[y1:y2,x1:x2]
    else:
        cimg = image
    for aimg,index in zip(list(itertools.islice(rand_sub_images(cimg,exclude_bboxes,size),num_images)),range(num_images)):
        img_write(augment_dir+'/image_%05d.png'%(index),aimg)
    
def rand_sub_images(img,exclude_bboxes,size):
    ymax = img.shape[1]
    xmax = img.shape[0]
    while True:
        x,y = random.randrange(ymax-size),random.randrange(xmax-size)
        bbox=((x,y),(x+size,y+size))
        if all([intersects(bbox,ebox)==False for ebox in exclude_bboxes]):
            yield img[y:y+size,x:x+size]            
            
def gen_car_images(img,car_bboxes):
    for (x1,y1),(x2,y2) in car_bboxes:
        yield img[y1:y2,x1:x2]
        
    
def image_features(image, cspace, spatial_size, hist_bins, hist_range):
    converted_image = convert_color(image,cspace)
    feature_image = cv2.resize(converted_image,(64,64))
    spatial_features = bin_spatial(feature_image, size=spatial_size)
    hist_features = color_hist(feature_image, nbins=hist_bins, bins_range=hist_range)
    f = lambda ch: get_hog_features(feature_image[:,:,ch],num_orientation_bins,pix_per_cell,cell_per_block)
    hog_features = np.hstack((f(0),f(1),f(2)))
    final_features = np.hstack((spatial_features, hist_features,hog_features))
    return final_features

# Define a single function that can extract features using hog sub-sampling and make predictions
def car_features_for_scale(img, ystart, ystop, scale, orient, 
                           pix_per_cell, cell_per_block, spatial_size, hist_bins):
    car_boxes = []
    img_tosearch = img[ystart:ystop,:,:]
    ctrans_tosearch = convert_color(img_tosearch,'YCrCb')
    if scale != 1:
        imshape = ctrans_tosearch.shape
        ctrans_tosearch = cv2.resize(ctrans_tosearch, (np.int(imshape[1]/scale), np.int(imshape[0]/scale)))
        
    ch1 = ctrans_tosearch[:,:,0]
    ch2 = ctrans_tosearch[:,:,1]
    ch3 = ctrans_tosearch[:,:,2]

    # Define blocks and steps as above
    nxblocks = (ch1.shape[1] // pix_per_cell) - cell_per_block + 1
    nyblocks = (ch1.shape[0] // pix_per_cell) - cell_per_block + 1 
    nfeat_per_block = orient*cell_per_block**2

    # 64 was the orginal sampling rate, with 8 cells and 8 pix per cell
    window = 64
    nblocks_per_window = (window // pix_per_cell) - cell_per_block + 1
    cells_per_step = 2  # Instead of overlap, define how many cells to step
    nxsteps = (nxblocks - nblocks_per_window) // cells_per_step + 1
    nysteps = (nyblocks - nblocks_per_window) // cells_per_step + 1
    # Compute individual channel HOG features for the entire image
    hog1 = get_hog_features(ch1, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog2 = get_hog_features(ch2, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog3 = get_hog_features(ch3, orient, pix_per_cell, cell_per_block, feature_vec=False)
    
    for xb in range(nxsteps):
        for yb in range(nysteps):
            ypos = yb*cells_per_step
            xpos = xb*cells_per_step
            # Extract HOG for this patch
            hog_feat1 = hog1[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_feat2 = hog2[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_feat3 = hog3[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_features = np.hstack((hog_feat1, hog_feat2, hog_feat3))
            xleft = xpos*pix_per_cell
            ytop = ypos*pix_per_cell
            # Extract the image patch
            subimg = cv2.resize(ctrans_tosearch[ytop:ytop+window, xleft:xleft+window], (64,64))
          
            # Get color features
            spatial_features = bin_spatial(subimg, size=spatial_size)
            hist_features = color_hist(subimg, nbins=hist_bins, bins_range=hist_range)
            # Scale features and make a prediction
            final_features = np.hstack((spatial_features, hist_features, hog_features)) 
            xbox_left = np.int(xleft*scale)
            ytop_draw = np.int(ytop*scale)
            win_draw = np.int(window*scale)
            yield final_features,((xbox_left, ytop_draw+ystart),(xbox_left+win_draw,ytop_draw+win_draw+ystart))


def find_cars_for_scale(img, ystart, ystop, scale, car_feature_classifier, orient, 
                        pix_per_cell, cell_per_block, spatial_size, hist_bins):
    for features,bbox in car_features_for_scale(img, ystart, ystop, scale, orient, 
                        pix_per_cell, cell_per_block, spatial_size, hist_bins):
        if car_feature_classifier(features)>0.5:
            yield bbox

def visualize_image_features(imgfile,scaler,output_prefix=None):
    print(imgfile)
    fig = plt.figure(figsize=(12,4))
    plt.subplot(221)
    image = img_read(imgfile)
    plt.imshow(image)
    raw_features = image_features(image,cspace=cspace,spatial_size=(spatial,spatial),hist_bins=hist_bins,hist_range=hist_range)
    converted_image = convert_color(image, cspace)
    hog_features,hog_image_0 = get_hog_features(converted_image[:,:,0],
                                              num_orientation_bins,pix_per_cell,cell_per_block,vis=True)
    hog_features1,hog_image_1 = get_hog_features(converted_image[:,:,1],
                                              num_orientation_bins,pix_per_cell,cell_per_block,vis=True)
    hog_features2,hog_image_2 = get_hog_features(converted_image[:,:,2],
                                              num_orientation_bins,pix_per_cell,cell_per_block,vis=True)
    f = lambda x : np.dstack([x/np.max(x)]*3)
    h0 = f(hog_image_0)
    h1 = f(hog_image_1)
    h2 = f(hog_image_2)
    print('sizes of stacked images')
    for i in [h0,h1,h2]:
        print(np.max(i),i.dtype,i.shape)
    print('hog data done')
    if output_prefix is not None:
        img_write(img=hog_image_0,img_fname=output_prefix+'hog_image_0.jpg')
        img_write(img=hog_image_1,img_fname=output_prefix+'hog_image_1.jpg')
        img_write(img=hog_image_2,img_fname=output_prefix+'hog_image_2.jpg')
        img_write(img=concat_images(images=[image,h0,h1,h2]),
                  img_fname=output_prefix+'hog_combined.jpg')
    normalized_features = scaler.transform(raw_features.reshape((1,-1)))
    plt.title('Original Image')
    plt.subplot(222)
    plt.plot(raw_features)
    plt.title('Raw Features')
    plt.subplot(223)
    plt.plot(normalized_features[0])
    plt.title('Normalized Features')
    plt.subplot(224)
    plt.imshow(hog_image_0)
    fig.tight_layout()
    return fig

def tune_hyperparams(X,y):
    #parameters = {'kernel':['linear', 'rbf'], 'C':[100,500,1000,2000],'gamma':[0.0002,0.002,0.02,0.00002]}
    #svr = SVC()
    parameters = {'C':[1000]}
    svr = LinearSVC()
    clf = GridSearchCV(svr, parameters,verbose=3)
    clf.fit(X, y)
    return clf

def accuracy(mdl,X,y):
    pred = mdl.predict(X)
    return accuracy_score(pred,y)

def compute_model():
    def feature_calculator(images):
        return [image_features(img,cspace=cspace,spatial_size=(spatial,spatial),
                               hist_bins=hist_bins,hist_range=hist_range) for img in images]

    features = feature_calculator([img_read(x) for x in cars+non_cars])
    feature_scaler = StandardScaler().fit(features)
    features = feature_scaler.transform(features)
    labels = np.hstack((np.ones(shape=(len(cars),)),np.zeros(shape=(len(non_cars),))))
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.33, random_state=42)
    model = tune_hyperparams(X_train,y_train)
    print('train_accuracy : ',accuracy(model,X_train,y_train),X_train.shape,y_train.shape)
    print('test_accuracy  : ',accuracy(model,X_test,y_test),X_test.shape,y_test.shape)
    return model,feature_scaler

def draw_boxes(img, bboxes, color=blue, thick=6):
    imcopy = np.copy(img)
    for bbox in bboxes:
        cv2.rectangle(imcopy, bbox[0], bbox[1], color, thick)
    return imcopy
    
    
def slide_window(img, x_start_stop=[None, None], y_start_stop=[None, None], 
                    xy_window=(64, 64), xy_overlap=(0.5, 0.5)):
    x_start_stop = list(x_start_stop)
    y_start_stop = list(y_start_stop)
    if x_start_stop[0] == None:
        x_start_stop[0] = 0
    if x_start_stop[1] == None:
        x_start_stop[1] = img.shape[1]
    if y_start_stop[0] == None:
        y_start_stop[0] = 0
    if y_start_stop[1] == None:
        y_start_stop[1] = img.shape[0]
    # Compute the span of the region to be searched    
    xspan = x_start_stop[1] - x_start_stop[0]
    yspan = y_start_stop[1] - y_start_stop[0]
    # Compute the number of pixels per step in x/y
    nx_pix_per_step = np.int(xy_window[0]*(1 - xy_overlap[0]))
    ny_pix_per_step = np.int(xy_window[1]*(1 - xy_overlap[1]))
    # Compute the number of windows in x/y
    nx_buffer = np.int(xy_window[0]*(xy_overlap[0]))
    ny_buffer = np.int(xy_window[1]*(xy_overlap[1]))
    nx_windows = np.int((xspan-nx_buffer)/nx_pix_per_step) 
    ny_windows = np.int((yspan-ny_buffer)/ny_pix_per_step) 
    window_list = []
    for ys in range(ny_windows):
        for xs in range(nx_windows):
            startx = xs*nx_pix_per_step + x_start_stop[0]
            endx = startx + xy_window[0]
            starty = ys*ny_pix_per_step + y_start_stop[0]
            endy = starty + xy_window[1]
            window_list.append(((startx, starty), (endx, endy)))
    return window_list

def subwindow(image,box):
    ((xmin,ymin),(xmax,ymax)) = box
    return image[ymin:ymax,xmin:xmax]

def multires_slide_windows(img):
    slide_window_args=[((None,None),(400,None),(320,320),(0.75,0.75),red),
                       ((None,None),(400,None),(256,256),(0.75,0.75),green),
                       ((None,None),(400,628),(128,128),(0.5,0.5),blue),
                       ((None,None),(400,628),(64,64),(0.5,0.5),white)]
    multires_windows = []
    for x_start_stop,y_start_stop,xy_window,xy_overlap,color in slide_window_args:
        multires_windows.append((xy_window,slide_window(img,x_start_stop,y_start_stop,xy_window,xy_overlap),color))
    return multires_windows

def draw_multires_windows(img,multires_windows):
    return concat_images([draw_boxes(img,boxes,color) for xy_window,boxes,color in multires_windows])

def concat_images(images):
    for i in images:
        print(i.shape)
    num_images_per_row = int(np.sqrt(len(images)))
    if len(images)>num_images_per_row*num_images_per_row:
        num_images_per_row +=1
    num_rows = int(len(images)/num_images_per_row)
    if len(images)>num_rows*num_images_per_row:
        num_rows+=1
    num_blank_images = num_images_per_row *num_rows - len(images)
    images += [np.zeros_like(images[0])]*num_blank_images
    return np.vstack([np.hstack(images[i*num_images_per_row:(i+1)*num_images_per_row]) for i in range(num_rows)])

def is_car(fname,car_classifier):
    img = img_read(fname)
    prediction = car_classifier([img])
    return prediction

        
def find_cars_old(frame,car_image_classifier):
    bboxes = list(itertools.chain(*[boxes for _,boxes,_ in multires_slide_windows(frame)]))
    images = [subwindow(frame,bbox) for bbox in bboxes]
    return [bbox for bbox,label in zip(bboxes,[car_image_classifier(x) for x in images]) if label>0.5]

def find_cars(frame,car_feature_classifier):
    def helper(ystart,ystop,scale):
        return find_cars_for_scale(frame, ystart, ystop, scale, car_feature_classifier, num_orientation_bins, 
                                   pix_per_cell, cell_per_block, (spatial,spatial), hist_bins)
    bounds = [(300,720,4.0),(300,628,3.0),(300,628,2.0),(300,628,1.5),(300,628,1.0),(300,628,0.8)]
    return list(itertools.chain(*[helper(*x) for x in bounds]))

def genvideo(input_video_file,output_video_file,frame_trf):
    test_clip = VideoFileClip(input_video_file)
    new_clip = test_clip.fl_image(frame_trf) #NOTE: this function expects color images!!
    new_clip.write_videofile(output_video_file, audio=False)
    
def showvideo(vfile):    
    vtemplate="""<video width="640" height="300" controls><source src="{0}" type="video/mp4"></video>"""
    return HTML(vtemplate.format(vfile))

def labeled_bboxes(labels):
    for car_number in range(1, labels[1]+1):
        nonzero = (labels[0] == car_number).nonzero()
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])
        bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
        yield bbox

frame_id = 0        
def find_cars_in_video(input_video_file,output_video_file,car_feature_clf,write_component_images=False):
    global frame_id
    heatmaps = deque(maxlen=5)
    frame_id = 0
    def draw_car_bboxes(frame):
        global frame_id
        frame_id=frame_id+1
        frame_prefix = 'frames/frame_%03d'%(frame_id,)
        frame_float = frame.astype(np.float32)/255
        bboxes = find_cars(frame_float,car_feature_clf)
        heatmaps.append(add_heat(np.zeros_like(frame),bboxes))
        num_frames_available = len(heatmaps)
        cum_heatmap = np.zeros_like(frame_float)
        for i in range(num_frames_available):
            cum_heatmap += heatmaps[-(i+1)]
        cum_heat_image = cum_heatmap*(255/np.max(cum_heatmap))
        cum_heatmap[cum_heatmap<=(num_frames_available*7)] = 0
        cum_heatmap[cum_heatmap!=0]=255
        labels = label(cum_heatmap)
        deduped_bboxes = labeled_bboxes(labels)
        frame_with_bboxes = draw_boxes(frame,bboxes)
        frame_with_deduped_bboxes = draw_boxes(frame,deduped_bboxes)
        heat_img = heatmaps[-1]*(255/np.max(heatmaps[-1]))
        combined_image = concat_images([frame,frame_with_bboxes,cum_heatmap,cum_heat_image,heat_img,frame_with_deduped_bboxes])
        if write_component_images:
            img_write(frame_prefix+'_cum_heat.jpg',cum_heat_image)
            img_write(frame_prefix+'_heat.jpg',heat_img)
            img_write(frame_prefix+'.jpg',frame)
            img_write(frame_prefix+'_bboxes.jpg',frame_with_bboxes)
            img_write(frame_prefix+'_deduped_bboxes.jpg',frame_with_deduped_bboxes)
            img_write(frame_prefix+'_combined.jpg',combined_image)
        return combined_image#frame_with_deduped_bboxes
    genvideo(input_video_file,output_video_file,draw_car_bboxes)
    
def add_heat(heatmap, bbox_list):
    for box in bbox_list:
        heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1
    return heatmap

def apply_threshold(heatmap, threshold):
    heatmap[heatmap <= threshold] = 0
    return heatmap



In [224]:
recompute_model = False
recompute_model = True
if recompute_model:
    scaled_feature_classifier,feature_scaler=compute_model()
    with open('model.p','wb') as model_file:
        pickle.dump( scaled_feature_classifier, model_file )
    with open('feature_scaler.p','wb') as scaler_file:
        pickle.dump( feature_scaler, scaler_file)
else:
    with open('model.p','rb') as model_file:    
        scaled_feature_classifier = pickle.load(model_file)    
    with open('feature_scaler.p','rb') as scaler_file:
        feature_scaler = pickle.load(scaler_file)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] C=1000 ..........................................................
[CV] ................. C=1000, score=0.9797363761558135, total= 1.0min
[CV] C=1000 ..........................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.0min remaining:    0.0s


[CV] ................. C=1000, score=0.9820971867007673, total=  46.7s
[CV] C=1000 ..........................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  1.8min remaining:    0.0s


[CV] ................. C=1000, score=0.9805233130041314, total=  45.1s


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  2.6min finished


train_accuracy :  1.0 (15249, 10224) (15249,)
test_accuracy  :  0.984023432299 (7511, 10224) (7511,)


In [None]:
visualize_image_features(imgfile=cutouts[5],scaler=feature_scaler,output_prefix='output_images/cutout5_')
visualize_image_features(imgfile=non_cars[10],scaler=feature_scaler,output_prefix='output_images/noncar_')

In [None]:
car_feature_classifier = lambda feature:scaled_feature_classifier.predict(feature_scaler.transform(feature.reshape(1, -1)))
car_image_classifier = lambda image:car_feature_classifier(image_features(image, cspace, (spatial,spatial), hist_bins, hist_range).reshape(1, -1))
find_cars_in_video(test_video,test_video_output,car_feature_classifier,write_component_images=True)
showvideo(test_video_output)
find_cars_in_video(project_video,project_video_output,car_feature_classifier)
showvideo(project_video_output)


In [None]:
frame = img_read(sample_frame)
frame_float = frame.astype(np.float32)/255
bboxes = find_cars(frame_float,car_feature_classifier)
bboxes_old = find_cars_old(car_image_classifier=car_image_classifier,frame=frame_float)
ret_old = draw_boxes(bboxes=bboxes_old,img=frame)
ret = draw_boxes(bboxes=bboxes,img=frame)
plt.imshow(ret)
plt.show()
plt.imshow(ret_old)
plt.show()