In [1]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import cv2
import glob
import time
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from scipy.ndimage.measurements import label

%matplotlib inline

In [2]:
# Divide up into cars and notcars
images = glob.glob('data/*/*/*.png')
vehicles = []
notvehicles = []
for image in images:
    if 'non-vehicles' in image:
        notvehicles.append(image)
    elif 'vehicles' in image:
        vehicles.append(image)
    else:
        print("error")

In [3]:
PIX_PER_CELL = 8
CELL_PER_BLOCK = 2
ORIENT = 9

In [4]:
# Define a function to return HOG features and visualization
def get_hog_features(img, orient, pix_per_cell, cell_per_block, vis=False, feature_vec=True):
    
    # Call with two outputs if vis==True
    if vis == True:
        features, hog_image = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                                  cells_per_block=(cell_per_block, cell_per_block), block_norm='L2-Hys', transform_sqrt=True, 
                                  visualise=vis, feature_vector=feature_vec)
        
        return features, hog_image
    
    # Otherwise call with one output
    else:
        features = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                       cells_per_block=(cell_per_block, cell_per_block), block_norm='L2-Hys', transform_sqrt=True, 
                       visualise=vis, feature_vector=feature_vec)
        
        return features
    
def bin_spatial(img, size=(32, 32)):
    color1 = cv2.resize(img[:,:,0], size).ravel()
    color2 = cv2.resize(img[:,:,1], size).ravel()
    color3 = cv2.resize(img[:,:,2], size).ravel()
    return np.hstack((color1, color2, color3))


def color_hist(img, nbins=32):    #bins_range=(0, 256)
    # Compute the histogram of the color channels separately
    channel1_hist = np.histogram(img[:,:,0], bins=nbins)
    channel2_hist = np.histogram(img[:,:,1], bins=nbins)
    channel3_hist = np.histogram(img[:,:,2], bins=nbins)
    # Concatenate the histograms into a single feature vector
    hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
    # Return the individual histograms, bin_centers and feature vector
    return hist_features

# Define a function to extract features from a list of images
# Have this function call bin_spatial() and color_hist()
def extract_features(imgs, orient=ORIENT, pix_per_cell=PIX_PER_CELL, cell_per_block=CELL_PER_BLOCK):
    
    # Create a list to append feature vectors to
    features = []
    
    # Iterate through the list of images
    for file in imgs:
        
        # use png image
        image = mpimg.imread(file)
        # image = (image*255).astype(np.uint8) # 0-1 float32 to 0-255 uint8
        feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb)

        # Call get_hog_features() with vis=False, feature_vec=True
        hog_features = []
        
        for channel in range(feature_image.shape[2]):
            hog_features.append(get_hog_features(feature_image[:,:,channel], 
                                                 orient, pix_per_cell, cell_per_block, vis=False, feature_vec=True))
            
        hog_features = np.hstack((hog_features))
        
        # Get color features
        spatial_features = bin_spatial(feature_image)
        hist_features = color_hist(feature_image)

        # Scale features and make a prediction
        stacked = np.hstack((spatial_features, hist_features, hog_features))
        
        features.append(stacked)
        
    # Return list of feature vectors
    return features

In [5]:
orient = ORIENT
pix_per_cell = PIX_PER_CELL
cell_per_block = CELL_PER_BLOCK

t=time.time()
car_features = extract_features(vehicles, orient=orient, 
                        pix_per_cell=pix_per_cell, cell_per_block=cell_per_block)

notcar_features = extract_features(notvehicles, orient=orient, 
                        pix_per_cell=pix_per_cell, cell_per_block=cell_per_block)

t2 = time.time()
print(round(t2-t, 2), 'Seconds to extract HOG features...')

140.08 Seconds to extract HOG features...


In [6]:
# Create an array stack of feature vectors
X = np.vstack((car_features, notcar_features)).astype(np.float64)                        
# Fit a per-column scaler
X_scaler = StandardScaler().fit(X)
# Apply the scaler to X
scaled_X = X_scaler.transform(X)

# Define the labels vector
y = np.hstack((np.ones(len(car_features)), np.zeros(len(notcar_features))))


# Split up data into randomized training and test sets
np.random.seed(10)
rand_state = np.random.randint(0, 100) 
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.1, random_state=rand_state)

print('Using:',orient,'orientations',pix_per_cell, 'pixels per cell and', cell_per_block,'cells per block')
print('Feature vector length:', len(X_train[0]))

Using: 9 orientations 8 pixels per cell and 2 cells per block
Feature vector length: 8460


In [7]:
# Use a linear SVC
#svc = LinearSVC(C=clf.best_params_['C'], loss=clf.best_params_['loss'], penalty=clf.best_params_['penalty'])
svc = LinearSVC(C=0.0003, loss='hinge', penalty='l2')

# Check the training time for the SVC
t=time.time()
svc.fit(X_train, y_train)
t2 = time.time()
print(round(t2-t, 2), 'Seconds to train SVC...')
# Check the score of the SVC
print('Test Accuracy of SVC = ', round(svc.score(X_test, y_test), 4))
# Check the prediction time for a single sample
t=time.time()
n_predict = 10
print('My SVC predicts: ', svc.predict(X_test[0:n_predict]))
print('For these',n_predict, 'labels: ', y_test[0:n_predict])
t2 = time.time()

print(round(t2-t, 5), 'Seconds to predict', n_predict,'labels with SVC')

5.99 Seconds to train SVC...
Test Accuracy of SVC =  0.9916
My SVC predicts:  [ 1.  1.  1.  1.  1.  0.  0.  1.  0.  1.]
For these 10 labels:  [ 1.  1.  1.  1.  1.  0.  0.  1.  0.  1.]
0.00238 Seconds to predict 10 labels with SVC


In [8]:
def window_search(img, ystart, ystop, scale, svc, X_scaler, orient=ORIENT, pix_per_cell=PIX_PER_CELL, cell_per_block=CELL_PER_BLOCK, spatial_size=(32, 32), hist_bins=32):
    
    car_boxes = []
    # jpg image
    draw_img = np.copy(img)
    
    img_tosearch = img[ystart:ystop,:,:]
    img_tosearch = img_tosearch.astype(np.float32)/255.0

    ctrans_tosearch = cv2.cvtColor(img_tosearch, cv2.COLOR_RGB2YCrCb)
    
    if scale != 1:
        imshape = ctrans_tosearch.shape
        ctrans_tosearch = cv2.resize(ctrans_tosearch, (np.int(imshape[1]/scale), np.int(imshape[0]/scale)))
        
    ch1 = ctrans_tosearch[:,:,0]
    ch2 = ctrans_tosearch[:,:,1]
    ch3 = ctrans_tosearch[:,:,2]

    # Define blocks and steps as above
    nxblocks = (ch1.shape[1] // pix_per_cell) - cell_per_block + 1
    nyblocks = (ch1.shape[0] // pix_per_cell) - cell_per_block + 1
    nfeat_per_block = orient*cell_per_block**2
    
    # 64 was the orginal sampling rate, with 8 cells and 8 pix per cell
    window = 64
    
    nblocks_per_window = (window // pix_per_cell) - cell_per_block + 1
    cells_per_step = 2  # Instead of overlap, define how many cells to step
    nxsteps = (nxblocks - nblocks_per_window) // cells_per_step
    nysteps = (nyblocks - nblocks_per_window) // cells_per_step
    
    # Compute individual channel HOG features for the entire image
    hog1 = get_hog_features(ch1, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog2 = get_hog_features(ch2, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog3 = get_hog_features(ch3, orient, pix_per_cell, cell_per_block, feature_vec=False)
    
    for xb in range(nxsteps):
        for yb in range(nysteps):
            ypos = yb*cells_per_step
            xpos = xb*cells_per_step
            
            # Extract HOG for this patch
            hog_feat1 = hog1[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_feat2 = hog2[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_feat3 = hog3[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_features = np.hstack((hog_feat1, hog_feat2, hog_feat3))

            xleft = xpos*pix_per_cell
            ytop = ypos*pix_per_cell

            # Extract the image patch
            subimg = cv2.resize(ctrans_tosearch[ytop:ytop+window, xleft:xleft+window], (64, 64))
          
            # Get color features
            spatial_features = bin_spatial(subimg, size=spatial_size)
            hist_features = color_hist(subimg, nbins=hist_bins)

            # Scale features and make a prediction
            stacked = np.hstack((spatial_features, hist_features, hog_features))

            test_features = X_scaler.transform(stacked.reshape(1, -1))    
            #test_features = X_scaler.transform(np.hstack((shape_feat, hist_feat)).reshape(1, -1))    
            test_prediction = svc.predict(test_features)
            
            if test_prediction == 1:
                xbox_left = np.int(xleft*scale)
                ytop_draw = np.int(ytop*scale)
                win_draw = np.int(window*scale)
                #cv2.rectangle(draw_img,(xbox_left, ytop_draw+ystart),(xbox_left+win_draw,ytop_draw+win_draw+ystart),(0,0,255),6) 
                car_boxes.append(((xbox_left, ytop_draw+ystart),(xbox_left+win_draw,ytop_draw+win_draw+ystart)))
                
    return car_boxes
    #return draw_img

In [9]:
def add_heat(heatmap, bbox_list):
    # Iterate through list of bboxes
    for box in bbox_list:
        # Add += 1 for all pixels inside each bbox
        # Assuming each "box" takes the form ((x1, y1), (x2, y2))
        heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1

    # Return updated heatmap
    return heatmap# Iterate through list of bboxes


def apply_threshold(heatmap, threshold):
    # Zero out pixels below the threshold
    heatmap[heatmap <= threshold] = 0
    # Return thresholded map
    return heatmap


def draw_labeled_bboxes(img, labels, svc=None, vis_prob=False):
    
    # Iterate through all detected cars
    for car_number in range(1, labels[1]+1):
        
        # Find pixels with each car_number label value
        nonzero = (labels[0] == car_number).nonzero()
        
        # Identify x and y values of those pixels
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])
        
        # Define a bounding box based on min/max x and y
        bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
        
        # Draw the box on the image
        cv2.rectangle(img, bbox[0], bbox[1], (0,0,255), 6)
        
        if vis_prob == True:
            target = img[bbox[0][1]:bbox[1][1], bbox[0][0]:bbox[1][0], :]
            prob = predict_window(target, svc)
            message = "{:.3f}".format(prob[0])
            # Draw probability of sub image
            cv2.putText(img, message, bbox[0], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 3, cv2.LINE_AA)

    # Return the image
    return img

In [10]:
# bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
def conpare_prev_frame(prev_bboxes, current_bboxes):

    overlap_bboxes = []
    for current_bbox in current_bboxes:

        overlap = 0
        for prev_bbox in prev_bboxes:

            if overlap == 0:

                # check lengthwise direction
                is_include_min_x_prev_bbox = current_bbox[0][0] in range(prev_bbox[0][0], prev_bbox[1][0])
                is_include_max_y_prev_bbox = current_bbox[1][1] in range(prev_bbox[0][1], prev_bbox[1][1])

                if is_include_min_x_prev_bbox and is_include_max_y_prev_bbox:

                    overlap = 1

                # check widthwise direction
                is_include_min_x_prev_bbox = current_bbox[1][1] in range(prev_bbox[0][0], prev_bbox[1][0])
                is_include_max_y_prev_bbox = current_bbox[0][0] in range(prev_bbox[0][1], prev_bbox[1][1])

                if is_include_min_x_prev_bbox and is_include_max_y_prev_bbox:

                    overlap = 1

        if overlap == 1:
            overlap_bboxes.append(current_bbox)

    return overlap_bboxes

In [11]:
# scv : 
# X_scaler :
class Pipeline:
    
    def __init__(self, svc, X_scaler):
        self.previouse_bboxes_1 = []
        #self.previouse_bboxes_2 = []
        #self.previouse_bboxes_3 = []

        #self.ystart = 400
        #self.ystop = 656
        #self.scale = 1.0
        self.svc = svc
        self.X_scaler = X_scaler
        self.check_frame_range = 50
        self.threshold = 1

        
    def __call__(self, image):
        
        # try multi scale window search
        detected_bboxes_1 = window_search(image, 400, 600, 1.1, self.svc, self.X_scaler)
        #detected_bboxes_2 = window_search(image, 400, 656, 1.1, self.svc, self.X_scaler)
        #detected_bboxes_3 = window_search(image, 400, 656, 0.9, self.svc, self.X_scaler)

        try:
            if len(self.previouse_bboxes_1) >= self.check_frame_range:
                
                s1_bboxes = self.previouse_bboxes_1.pop(0)
                #s2_bboxes = self.previouse_bboxes_2.pop(0)
                #s3_bboxes = self.previouse_bboxes_3.pop(0)

            else:
                s1_bboxes = self.previouse_bboxes_1[0]
                #s2_bboxes = self.previouse_bboxes_2[0]
                #s3_bboxes = self.previouse_bboxes_3[0]
                
            s1_compared_bboxes = [conpare_prev_frame(b, detected_bboxes_1) for b in self.previouse_bboxes_1]
            #s2_compared_bboxes = [conpare_prev_frame(b, detected_bboxes_2) for b in self.previouse_bboxes_2]
            #s3_compared_bboxes = [conpare_prev_frame(b, detected_bboxes_3) for b in self.previouse_bboxes_3]

            plot_bboxes = list(set(s1_bboxes).intersection(*s1_compared_bboxes))
            #plot_bboxes.extend(list(set(s2_bboxes).intersection(*s2_compared_bboxes)))
            #plot_bboxes.extend(list(set(s3_bboxes).intersection(*s3_compared_bboxes)))
                
        except:
            
            plot_bboxes = detected_bboxes_1
            #plot_bboxes.extend(detected_bboxes_2)
            #plot_bboxes.extend(detected_bboxes_3)

        heat = np.zeros_like(image[:,:,0]).astype(np.float)

        heat = add_heat(heat, plot_bboxes)

        heat = apply_threshold(heat, self.threshold)
        heatmap = np.clip(heat, 0, 255)

        labels = label(heatmap)
        draw_img = draw_labeled_bboxes(np.copy(image), labels)

        self.previouse_bboxes_1.append(detected_bboxes_1)
        #self.previouse_bboxes_2.append(detected_bboxes_2)
        #self.previouse_bboxes_3.append(detected_bboxes_3)

        return draw_img

In [12]:
from moviepy.editor import VideoFileClip
from IPython.display import HTML

In [13]:
pipeline = Pipeline(svc, X_scaler)
 
def pipe_image(image):
    return pipeline(image)

#white_output = 'result_test_video_window_scale10_12_th1_fixed.mp4'
#white_output = 'result_test_video2_09_10_12_th1_f5_seed10_2.mp4'
white_output = 'tts_11.mp4'

clip1 = VideoFileClip('test_images/test_video3.mp4')
white_clip = clip1.fl_image(pipe_image)
%time white_clip.write_videofile(white_output, audio=False)

[MoviePy] >>>> Building video tts_11.mp4
[MoviePy] Writing video tts_11.mp4


100%|██████████| 47/47 [00:30<00:00,  1.61it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: tts_11.mp4 

CPU times: user 50.3 s, sys: 1.18 s, total: 51.5 s
Wall time: 31.9 s
