# ===== Car detecion using SSD start =====#

In [1]:
import cv2
import keras
from keras.applications.imagenet_utils import preprocess_input
from keras.backend.tensorflow_backend import set_session
from keras.models import Model
from keras.preprocessing import image
import matplotlib.pyplot as plt
import numpy as np
from scipy.misc import imread
import tensorflow as tf

# ===== import SSD for keras 2 =====
from ssd_v2 import SSD300v2
from ssd_utils import BBoxUtility
# ===== import SSD for keras 2 =====

%matplotlib inline
plt.rcParams['figure.figsize'] = (8, 8)
plt.rcParams['image.interpolation'] = 'nearest'

np.set_printoptions(suppress=True)

config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.95#0.45
set_session(tf.Session(config=config))

Using TensorFlow backend.


In [2]:
voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle',
               'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable',
               'Dog', 'Horse','Motorbike', 'Person', 'Pottedplant',
               'Sheep', 'Sofa', 'Train', 'Tvmonitor']
NUM_CLASSES = len(voc_classes) + 1

In [3]:
input_shape=(300, 300, 3)
model = SSD300(input_shape, num_classes=NUM_CLASSES)
model.load_weights('weights_SSD300.hdf5', by_name=True)
bbox_util = BBoxUtility(NUM_CLASSES)

In [7]:
def get_bbox(xmin, ymin, xmax, ymax):
    return [xmin, ymin, xmax, ymax]

def center_is_near(prev_bbox, bbox):
    IS_NEAR_THRESHOLD = 30
    
    prev_center_x = (prev_bbox[0] + prev_bbox[2])/2.
    prev_center_y = (prev_bbox[1] + prev_bbox[3])/2.
    center_x = (bbox[0] + bbox[2])/2.
    center_y = (bbox[1] + bbox[3])/2.
    
    dist = np.sqrt((prev_center_x - center_x)**2 + (prev_center_y - center_y)**2)
    
    if dist <= IS_NEAR_THRESHOLD:
        return True
    else:
        return False

def draw_boxes(img, preds, results):
    global first_frame_has_car, prev_bboxes, prev_bboxes_len, bbox_disappear_frame_count
    
    # Parse the outputs.
    det_label = results[0][:, 0]
    det_conf = results[0][:, 1]
    det_xmin = results[0][:, 2]
    det_ymin = results[0][:, 3]
    det_xmax = results[0][:, 4]
    det_ymax = results[0][:, 5]

    # Get detections with confidence higher than 0.6.
    top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.6]

    top_conf = det_conf[top_indices]
    top_label_indices = det_label[top_indices].tolist()
    top_xmin = det_xmin[top_indices]
    top_ymin = det_ymin[top_indices]
    top_xmax = det_xmax[top_indices]
    top_ymax = det_ymax[top_indices]
    
    bboxes_len = 0
    bboxes = []   
    for i in range(top_conf.shape[0]):
        
        label = int(top_label_indices[i])        
        label_name = voc_classes[label - 1]
        
        if label_name == 'Car':
            bboxes_len += 1
            
            xmin = int(round(top_xmin[i] * img.shape[1]))
            ymin = int(round(top_ymin[i] * img.shape[0]))
            xmax = int(round(top_xmax[i] * img.shape[1]))
            ymax = int(round(top_ymax[i] * img.shape[0]))
            
            if first_frame_has_car or len(prev_bboxes) == 0:
                prev_bboxes.append(get_bbox(xmin, ymin, xmax, ymax))
                first_frame_has_car = False
                prev_bboxes_len = 0
            else:
                has_near_in_prev_bboxes = False
                for i_prev_bbox in range(len(prev_bboxes)):
                    if center_is_near(prev_bboxes[i_prev_bbox], [xmin, ymin, xmax, ymax]):
                        ratiox = 0.5
                        ratioy = 0.65
                        xmin = int((1-ratiox)*xmin + ratiox*prev_bboxes[i_prev_bbox][0])
                        ymin = int((1-ratioy)*ymin + ratioy*prev_bboxes[i_prev_bbox][1])
                        xmax = int((1-ratiox)*xmax + ratiox*prev_bboxes[i_prev_bbox][2])
                        ymax = int((1-ratioy)*ymax + ratioy*prev_bboxes[i_prev_bbox][3])
                        prev_bboxes[i_prev_bbox][0] = xmin 
                        prev_bboxes[i_prev_bbox][1] = ymin
                        prev_bboxes[i_prev_bbox][2] = xmax
                        prev_bboxes[i_prev_bbox][3] = ymax
                        has_near_in_prev_bboxes = True
                        
                if not has_near_in_prev_bboxes:
                    prev_bboxes.append(get_bbox(xmin, ymin, xmax, ymax))
                    
            bboxes.append(get_bbox(xmin, ymin, xmax, ymax))  
           
    if prev_bboxes_len > bboxes_len and bbox_disappear_frame_count < 5:
        for i_prev_bbox in range(len(prev_bboxes)):
            for i_bbox in range(len(bboxes)):
                if not center_is_near(prev_bboxes[i_prev_bbox], bboxes[i_bbox]):
                    cv2.rectangle(img, 
                                  (prev_bboxes[i_prev_bbox][0],prev_bboxes[i_prev_bbox][1]), 
                                  (prev_bboxes[i_prev_bbox][2],prev_bboxes[i_prev_bbox][3]), (0,255,0), 5)
            if len(bboxes) == 0:
                cv2.rectangle(img, 
                              (prev_bboxes[i_prev_bbox][0],prev_bboxes[i_prev_bbox][1]), 
                              (prev_bboxes[i_prev_bbox][2],prev_bboxes[i_prev_bbox][3]), (0,255,0), 5)
        bbox_disappear_frame_count += 1
    else:
        bbox_disappear_frame_count = 0
        prev_bboxes_len = len(bboxes)
        prev_bboxes = bboxes
    for i_bbox in range(len(bboxes)):
        cv2.rectangle(img, (bboxes[i_bbox][0],bboxes[i_bbox][1]), (bboxes[i_bbox][2],bboxes[i_bbox][3]), (0,255,0), 5)  
            
    if len(prev_bboxes) > 10:
        prev_bboxes = []
        bbox_disappear_frame_count = 10
        
    return img

In [8]:
def process_video(input_img):
    
    inputs = []
    #input_img_cropped = input_img[120:720,680:1280,:]
    #img = cv2.resize(input_img_cropped, (300, 300))
    img = cv2.resize(input_img, (300, 300))
    img = image.img_to_array(img)
    inputs.append(img.copy())
    inputs = preprocess_input(np.array(inputs))
    inputs = np.expand_dims(inputs[0], axis=0)
    
    preds = model.predict(inputs, batch_size=1, verbose=0)
    results = bbox_util.detection_out(preds)
    
    final_img = draw_boxes(input_img, preds, results)
    
    return final_img

In [9]:
from moviepy.editor import VideoFileClip
from IPython.display import HTML

first_frame_has_car = True
prev_bboxes = []
bbox_disappear_frame_count = 0
prev_bboxes_len = 0

output = 'project_video_video_SSD_smooth_disappear.mp4'
clip1 = VideoFileClip("project_video.mp4")
clip = clip1.fl_image(process_video) #NOTE: this function expects color images!!
%time clip.write_videofile(output, audio=False)

[MoviePy] >>>> Building video project_video_video_SSD_smooth_disappear.mp4
[MoviePy] Writing video project_video_video_SSD_smooth_disappear.mp4


100%|█████████▉| 1260/1261 [01:42<00:00, 12.32it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: project_video_video_SSD_smooth_disappear.mp4 

CPU times: user 37.1 s, sys: 3.25 s, total: 40.4 s
Wall time: 1min 44s


# ===== Car detection using SSD end =====#

# Advanced Lane Detection#

In [49]:
### Import
import numpy as np
import cv2
import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pickle
import time

%matplotlib inline

In [52]:
import skimage 
from skimage import io
from skimage.color import rgb2lab
import numpy as np
import scipy.ndimage as ndi
from scipy import signal

In [54]:
### Load Camera Calibration Pickle Data
dist_data = pickle.load( open( "dist_pickle.p", "rb" ) )
camera_mtx = dist_data["mtx"]
camera_dist = dist_data["dist"]

In [55]:
### Perspective Transform

def undistort_img(image):
    # Undistort test image
    undist_image = cv2.undistort(image, camera_mtx, camera_dist, None, camera_mtx)

    img_size = (undist_image.shape[1], undist_image.shape[0])
    
    # Lane line vertices
    # Upper and low are based on visual locations, not grid locations
    center_x = img_size[0]//2
    upper_y = img_size[1]//1.58
    low_y = img_size[1]
    upper_left_x = center_x//1.18
    upper_right_x = center_x//0.83
    low_left_x = 0
    low_right_x = 2*center_x
    
    # Calculate source points based on fractions of imade dimensions
    src_corners = np.float32([[low_left_x, low_y], 
                              [upper_left_x, upper_y], 
                              [upper_right_x, upper_y],
                              [low_right_x, low_y]])
   
    
    # Calculate destination points based on entire image's dimensions.
    dst_corners = np.float32([[0, img_size[1]],
                              [0, 0],
                              [img_size[0],0],
                              [img_size[0], img_size[1]]])
    
    return undist_image, src_corners, dst_corners

def perspective_transform(image):
    # Calculate perspective transform
    
    undist_image, src_corners, dst_corners = undistort_img(image)    
    
    img_size = (undist_image.shape[1], undist_image.shape[0])
    
    M = cv2.getPerspectiveTransform(src_corners, dst_corners)

    warped = cv2.warpPerspective(undist_image, M, img_size)
    
    M_inv = cv2.getPerspectiveTransform(dst_corners, src_corners)
    
    """
    # Draw points and lines to mark region for transform
    for i in range(4):
        cv2.circle(undist_image, (src_corners[i,0], src_corners[i,1]), 6, (255, 0, 0), 6)
    for i in range(4):
        cv2.line(undist_image, 
                 (src_corners[i-1,0], src_corners[i-1,1]), 
                 (src_corners[i,0], src_corners[i,1]),  
                 (0,255,0), 2)
    """
        
    return warped, M_inv
def color_threshold(image):
    # Debug rule of thumb: check cv2.cvtColor(blurred_warped, cv2.COLOR_RGB2LAB)

    # color_threshold: lab
    #undist_lab = rgb2lab(skimage.img_as_float(image)).astype(np.uint8)#cv2.cvtColor(img, cv2.COLOR_RGB2Lab)
    undist_lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
    undist_B = undist_lab[:,:,2]
    _, binary_lab_B = cv2.threshold(undist_B, 150, 255, cv2.THRESH_BINARY)
    """
    # color_threshold: luv
    undist_luv = cv2.cvtColor(image, cv2.COLOR_RGB2LUV)
    undist_L = undist_luv[:,:,0]
    """
    undist_L = undist_lab[:,:,0]
    thresh_L = (210, 255)
    binary_luv_L = np.zeros_like(undist_L)
    _, binary_luv_L = cv2.threshold(undist_L, thresh_L[0], thresh_L[1], cv2.THRESH_BINARY)
    # color_threshold: hsv
    undist_hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    undist_S = undist_hsv[:,:,2]
    thresh_S = (230, 255)
    binary_hsv_S = np.zeros_like(undist_S)
    _, binary_hsv_S = cv2.threshold(undist_S, thresh_S[0], thresh_S[1], cv2.THRESH_BINARY)
    # sobelx threshold
    kernel = np.ones((3,3),np.uint8)
    undist_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    binary_gray = cv2.Sobel(undist_gray,cv2.CV_32F,1,0,ksize=5) # threshold doesn't work on cv2.CV_64F
    binary_gray = np.sqrt(binary_gray**2)/binary_gray.max()*255.
    _, binary_gray = cv2.threshold(binary_gray, 90, 255, cv2.THRESH_BINARY)    
    binary_gray = cv2.erode(binary_gray,kernel,iterations = 1)
    thresh_S2 = (150, 255)
    _, binary_hsv_S2 = cv2.threshold(undist_S, thresh_S2[0], thresh_S2[1], cv2.THRESH_BINARY)
    binary_hsv_S2 = cv2.dilate(binary_hsv_S2, kernel, iterations = 1)
    binary_gray2 = np.zeros_like(undist_S)
    binary_gray2[(binary_gray == 255) & (binary_hsv_S2 == 255)] = 255    

    combined_color_binary = np.zeros_like(binary_lab_B)
    combined_color_binary[(binary_lab_B == 255) | (binary_luv_L == 255) | (binary_gray2 == 255) | (binary_hsv_S == 255)] = 255
    # !
    kernel_comb = np.ones((3,3),np.uint8)
    combined_color_binary = cv2.morphologyEx(combined_color_binary, cv2.MORPH_OPEN, kernel_comb)
    kernel_comb = np.ones((8,8),np.uint8)
    combined_color_binary = cv2.morphologyEx(combined_color_binary, cv2.MORPH_CLOSE, kernel_comb)
    
    return combined_color_binary

def find_boxes(image, img_warped):
    img_draw = np.array(img_warped)

    steps = 16
    win_size = image.shape[0]//steps
    half_frame = image.shape[1] // 2
    medianfilt_kernel_size = 45

    left_peaks_xy = []
    right_peaks_xy = [] 
    
    for i in range(steps):
        histogram = np.sum(image[(i)*win_size:(i+1)*win_size,:], axis=0)
        histogram_smooth = signal.medfilt(histogram, medianfilt_kernel_size)

        # Find left/right peak(s)
        left_peaks = np.array(signal.find_peaks_cwt(histogram_smooth[:half_frame], np.arange(10, 50)))
        right_peaks = np.array(signal.find_peaks_cwt(histogram_smooth[half_frame:], np.arange(5, 20))) + half_frame

        # Draw boxes that contains a hist. peak
        if left_peaks.any():
            ind = np.argmax(left_peaks) # if there are multiple peaks, choose that on the right
            cv2.rectangle(img_draw,(left_peaks[ind]-win_size//2,i*win_size), (left_peaks[ind]+win_size//2,(i+1)*win_size), (0,255,0), 3)
            # append (x,y) coords. 
            # (x,y): center of the boxes int pixel coord. (width, height)
            left_peaks_xy.append(np.array([left_peaks[ind], (2*i+1)*win_size//2]))
        if right_peaks.any():
            ind = np.argmax(right_peaks)
            cv2.rectangle(img_draw,(right_peaks[ind]-win_size//2,i*win_size), (right_peaks[ind]+win_size//2,(i+1)*win_size), (0,255,0), 3)
            # append (x,y) coords.
            # (x,y): center of the boxes int pixel coord. (width, height)
            right_peaks_xy.append(np.array([right_peaks[ind], (2*i+1)*win_size//2]))    
            
    return left_peaks_xy, right_peaks_xy, img_draw


def param_polyfit(img_height, left_peaks_xy, right_peaks_xy):
    # polyfit
    global prev_left_fit, prev_right_fit
    global frame_count
    curve_limit = 5.3e-4
    
    steps = 16#30
    y_steps = np.arange(0,img_height,img_height//steps)
    y_steps = np.append(y_steps, img_height)
    damp_ratio = 0.85
    
    x = [xy[0] for xy in left_peaks_xy]
    y = [xy[1] for xy in left_peaks_xy]
    if frame_count == 0:         
        left_fit = np.polyfit(y , x, 2)
        prev_left_fit = left_fit
    elif len(left_peaks_xy) == 0:
        left_fit = prev_left_fit
    else: 
        left_fit = np.polyfit(y , x, 2)
        #left_fit[0] = np.clip(left_fit[0], a_min=-1*curve_limit, a_max=curve_limit)# limit max min curvature
        left_fit = damp_ratio*prev_left_fit + (1-damp_ratio)*left_fit
        prev_left_fit = left_fit
    left_fit_x = left_fit[0]*y_steps**2 + left_fit[1]*y_steps  + left_fit[2]
    
    x = [xy[0] for xy in right_peaks_xy]
    y = [xy[1] for xy in right_peaks_xy]      
    if frame_count == 0: 
        right_fit = np.polyfit(y, x, 2)  
        prev_right_fit = right_fit
    elif len(right_peaks_xy) == 0:
        right_fit = prev_right_fit
    else: 
        right_fit = np.polyfit(y, x, 2)
        #right_fit[0] = np.clip(right_fit[0], a_min=-1*curve_limit, a_max=curve_limit)# limit max min curvature
        right_fit = damp_ratio*prev_right_fit + (1-damp_ratio)*right_fit
        prev_right_fit = right_fit
    right_fit_x = right_fit[0]*y_steps**2 + right_fit[1]*y_steps + right_fit[2]        
    
    frame_count += 1
    if frame_count >= 1e6:
        frame_count = 1
    
    return left_fit_x, right_fit_x, y_steps        
        
def draw_line_and_poly(image, left_fit_x, right_fit_x, y_steps):
    img_draw2 = np.zeros_like(image)

    for i in range(len(left_fit_x)-1):
        #cv2.circle(img_draw2, (int(left_fit_x[i]), int(y_steps[i])), 20, (0,255,0),-1)
        #cv2.circle(img_draw2, (int(right_fit_x[i]), int(y_steps[i])), 20, (0,255,0),-1)
        poly_pnts = [(int(left_fit_x[i]), int(y_steps[i])), (int(right_fit_x[i]), int(y_steps[i])),
                    (int(right_fit_x[i+1]), int(y_steps[i+1])), (int(left_fit_x[i+1]), int(y_steps[i+1]))]
        cv2.fillConvexPoly(img_draw2, np.array(poly_pnts), (50,255,50))
        line_pnt1 = (int(left_fit_x[i]), int(y_steps[i]))
        line_pnt2 = (int(left_fit_x[i+1]), int(y_steps[i+1]))
        cv2.line(img_draw2, line_pnt1, line_pnt2, (0,0,255), 30)
        line_pnt1 = (int(right_fit_x[i]), int(y_steps[i]))
        line_pnt2 = (int(right_fit_x[i+1]), int(y_steps[i+1]))
        cv2.line(img_draw2, line_pnt1, line_pnt2, (0,0,255), 30)
    
    return img_draw2

def draw_on_original_img(image, img_line_poly, M_inv):
    new_warp = cv2.warpPerspective(img_line_poly, M_inv, (image.shape[1], image.shape[0]))
    new_img = cv2.addWeighted(image, 1, new_warp, 0.5, 0)
    
    return new_img

In [56]:
### Video pipeline
def process_video(input_img):
    img_test = input_img
    
    img_width = img_test.shape[1]
    img_height = img_test.shape[0]

    warped, M_inv = perspective_transform(img_test)
    blurred_warped = cv2.GaussianBlur(warped,(5,5),0)

    img_binary_lines = color_threshold(blurred_warped)

    left_peaks_xy, right_peaks_xy, _ = find_boxes(img_binary_lines, warped)

    left_fit_x, right_fit_x, y_steps = param_polyfit(img_height, left_peaks_xy, right_peaks_xy)

    img_draw = draw_line_and_poly(img_test, left_fit_x, right_fit_x, y_steps)

    final_img = draw_on_original_img(img_test, img_draw, M_inv)
    
    inputs = []
    img = cv2.resize(input_img, (300, 300))
    img = image.img_to_array(img)
    inputs.append(img.copy())
    inputs = preprocess_input(np.array(inputs))
    inputs = np.expand_dims(inputs[0], axis=0)
    
    preds = model.predict(inputs, batch_size=1, verbose=0)
    results = bbox_util.detection_out(preds)
    
    final_img = draw_boxes(final_img, preds, results)
    
    return final_img

In [57]:
import warnings
warnings.simplefilter('ignore', np.RankWarning)

In [61]:
from joblib import Parallel, delayed
import multiprocessing

In [59]:
### Prrocess video
# Import everything needed to edit/save/watch video clips
from moviepy.editor import VideoFileClip
from IPython.display import HTML

right_fit_count = 0
frame_count = 0

output = 'project_video_comb.mp4'
clip1 = VideoFileClip("project_video.mp4")
clip = clip1.fl_image(process_video) #NOTE: this function expects color images!!
%time clip.write_videofile(output, audio=False)

[MoviePy] >>>> Building video project_video_comb.mp4
[MoviePy] Writing video project_video_comb.mp4


100%|█████████▉| 1260/1261 [15:53<00:00,  1.34it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: project_video_comb.mp4 

CPU times: user 15min 38s, sys: 8.07 s, total: 15min 46s
Wall time: 15min 55s


In [None]:
def test_appaned(x):
    x.append(10)
xx = []
test_appaned(xx)