In [1]:
# Importing the required libraries
import argparse
import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf
from keras import backend as K
from keras.layers import Input, Lambda, Conv2D
from keras.models import load_model, Model
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping
from YAD2K.yad2k.models.keras_yolo import preprocess_true_boxes, yolo_body, yolo_head, yolo_loss, yolo_boxes_to_corners
from YAD2K.yad2k.utils.draw_boxes import draw_boxes
import pandas as pd
import numpy as np
from glob import glob
import os
%matplotlib inline
import cv2

Using TensorFlow backend.


In [2]:
# Loading the labels
#PATH = 'C:/FluxAuto/berk_data/images/100k/train/'
labels = pd.read_csv('labels.csv')

In [4]:
# Default anchor boxes
YOLO_ANCHORS = np.array(
    ((0.57273, 0.677385), (1.87446, 2.06253), (3.33843, 5.47434),
     (7.88282, 3.52778), (9.77052, 9.16828)))

In [5]:
def get_classes(classpath):
    """
    Loads the classes stored in the classes.txt file.
    
    Returns:
    
    List containing the classes
    """
    with open(classpath) as f:
        class_names = f.readlines()
    class_names = class_names[0].split(' ')
    return class_names

In [6]:
get_classes('data/classes.txt')

['bike', 'bus', 'car', 'motor', 'person', 'rider', 'train', 'truck']

In [7]:
def get_anchors(anchor_path):
    """
    Loads the anchors from a file
    """
    if os.path.isfile(anchor_path):
        with open(anchor_path) as f:
            anchors = f.readline()
            anchors = [float(x) for x in anchors.split(',')]
            return np.array(anchors).reshape(-1, 2)
    else:
        Warning('Could not open anchors file, using default')
        return YOLO_ANCHORS

In [8]:
def get_boxes(box_path):
    """
    Loads the saved box coordinates
    """
    load_boxes = np.load(box_path)
    for i in load_boxes.iteritems():
        boxes = i
    boxes = list(boxes[1])
    return boxes

In [9]:
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=0.6):
    """
    Filters YOLO boxes by thresholding on object and class confidence.
    Arguements: 
    box_confidence: Probability of the box containing the object.
    boxes: The box parameters : (x, y, h, w) 
           x, y -> Center of the box 
           h, w -> Height and width of the box w.r.t the image size.
    box_class_probs: Probability of all the classes for each box.
    threshold: Threshold value for box confidence. 
    
    Returns: 
    scores: containing the class probability score for the selected boxes.
    boxes: contains box coordinates for the selected boxes.
    classes: contains the index of the class detected by the selected boxes.
    """
    
    # Compute the box scores: 
    box_scores = box_confidence * box_class_probs
    
    # Find the box classes index with the maximum box score
    box_classes = K.argmax(box_scores)
    # Find the box classes with maximum box score
    box_class_scores = K.max(box_scores, axis=-1)
    
    # Creating a mask for selecting the boxes that have box score greater than threshold.
    thresh_mask = box_class_scores >= threshold
    # Selecting the scores, boxes and classes with box score greater than 
    # threshold by filtering the box score with the help of thresh_mask.
    scores = tf.boolean_mask(tensor=box_class_scores, mask=thresh_mask)
    classes = tf.boolean_mask(tensor=box_classes, mask=thresh_mask)
    boxes = tf.boolean_mask(tensor=boxes, mask=thresh_mask)
    
    return scores, classes, boxes

In [10]:
def process_data(images, boxes=None):
    """
    Process the data
    """
    images = [PIL.Image.fromarray(i) for i in images]
    orig_size = np.array([images[0].width, images[0].height])
    orig_size = np.expand_dims(orig_size, axis=0)
    
    #Image preprocessing 
    processed_images = [i.resize((416, 416), PIL.Image.BICUBIC) for i in images]
    processed_images = [np.array(image, dtype=np.float) for image in processed_images]
    processed_images = [image/255. for image in processed_images]
    
    if boxes is not None:
        # Box preprocessing
        # Original boxes stored as as 1D list of class, x_min, y_min, x_max, y_max
        boxes = [box.reshape((-1, 5)) for box in boxes]
        # Get extents as y_min, x_min, y_max, x_max, class fpr comparision with 
        # model output
        box_extents = [box[:, [2,1,4,3,0]] for box in boxes]
        
        # Get box parametes as x_center, y_center, box_width, box_height, class.
        boxes_xy = [0.5* (box[:, 3:5] + box[:, 1:3]) for box in boxes]
        boxes_wh = [box[:, 3:5] - box[:, 1:3] for box in boxes]
        boxes_xy = [box_xy / orig_size for box_xy in boxes_xy]
        boxes_wh = [box_wh / orig_size for box_wh in boxes_wh]
        boxes = [np.concatenate((boxes_xy[i], boxes_wh[i], box[:, 0:1]), axis=-1) for i, box in enumerate(boxes)]
        
        # find the max number of boxes 
        max_boxes = 0
        for boxz in boxes:
            if boxz.shape[0] > max_boxes:
                max_boxes = boxz.shape[0]
        # add zero pad for training 
        for i, boxz in enumerate(boxes):
            if boxz.shape[0] <  max_boxes:
                zero_padding = np.zeros((max_boxes - boxz.shape[0], 5), dtype=np.float32)
                boxes[i] = np.vstack((boxz, zero_padding))
        
        return np.array(processed_images), np.array(boxes)
    else:
        return np.array(processed_images)

In [11]:
def get_detector_mask(boxes, anchors):
    """
    Precompute detectors_mask and matching_true_boxes for training. 
    Detectors mask is 1 for each spatial position in the final conv layer and 
    anchor that should be active for the given boxes and 0 otherwise. 
    Matching true boxes gives the regression targets for the ground truth box that 
    caused a detector to be active or 0 otherwise.
    """
    detectors_mask = [0 for i in range(len(boxes))]
    matching_true_boxes = [0 for i in range(len(boxes))]
    for i, box in enumerate(boxes):
        detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(box, anchors, [416, 416])
    return np.array(detectors_mask), np.array(matching_true_boxes)


In [12]:
def create_model(anchors, class_names, load_pretrained=True, freeze_body = True):
    """
    
    load_pretrained: whether or not to load the pretrained model or initialize all weights

    freeze_body: whether or not to freeze all weights except for the last layer's
    
    Returns:
    model_body : YOLOv2 with new output layer
    model : YOLOv2 with custom loss Lambda layer  
    
    """
    detector_mask_shape = (13, 13, 5, 1)
    matching_boxes_shape = (13, 13, 5, 5)
    
    # Create model input layers 
    image_input = Input(shape=(416,416,3))
    boxes_input = Input(shape=(None, 5))
    detector_mask_input = Input(shape=detector_mask_shape)
    matching_boxes_input = Input(shape=matching_boxes_shape)
    
    # Create model body
    yolo_model = yolo_body(image_input, len(anchors), len(class_names))
    topless_yolo = Model(yolo_model.input, yolo_model.layers[-2].output)
    
    if load_pretrained == True:
        # Save topless yolo
        topless_yolo_path = os.path.join('model_data', 'yolo_topless.h5')
        if not os.path.exists(topless_yolo_path):
            print('Creating Topless weights file')
            yolo_path = os.path.join('model_data', 'yolo.h5')
            model_body = load_model(yolo_path)
            model_body = Model(model_body.inputs, model_body.layers[-2].output)
            model_body.save_weights(topless_yolo_path)
        topless_yolo.load_weights(topless_yolo_path)
        
    if freeze_body:
        for layer in topless_yolo.layers:
            layer.trainable = False
    
    final_layer = Conv2D(len(anchors)*(5 + len(class_names)), (1, 1), activation='linear')(topless_yolo.output)
    model_body = Model(image_input, final_layer)
    
    # Place model loss on CPU to reduce GPU memory usage.    
    with tf.device('/cpu:0'):
        model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', arguments={
            'anchors': anchors, 
            'num_classes': len(class_names)})([model_body.output, boxes_input, detector_mask_input, matching_boxes_input])
    
    model = Model([model_body.input, boxes_input, detector_mask_input, matching_boxes_input], model_loss)
    return model_body, model

In [13]:
def train(model, class_name, anchors, image_data, boxes, detectors_mask, matching_true_boxes, validation_split=0.1, epochs = 5):
    """
    Trains the model and saves the weights with the lowest loss value.
    """
    model.compile(optimizer='adam', loss={'yolo_loss': lambda y_true, y_pred: y_pred})
    
    logging = TensorBoard()
    checkpoint = ModelCheckpoint('model_data/model.best.h5', monitor='val_loss', save_weights_only=True, save_best_only=True)
    earlystopping = EarlyStopping(monitor='val_loss', patience=15, verbose=1)
    
    model.fit([image_data, boxes, detectors_mask, matching_true_boxes], np.zeros(len(image_data)), validation_split=validation_split, batch_size=16, epochs=epochs, callbacks=[logging, checkpoint, earlystopping])
    model.save_weights('model_data/model.best.h5')

In [14]:
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=0.6):
    """
    Filters YOLO boxes by thresholding on object and class confidence.
    Arguements: 
    box_confidence: Probability of the box containing the object.
    boxes: The box parameters : (x, y, h, w) 
           x, y -> Center of the box 
           h, w -> Height and width of the box w.r.t the image size.
    box_class_probs: Probability of all the classes for each box.
    threshold: Threshold value for box confidence. 
    
    Returns: 
    scores: containing the class probability score for the selected boxes.
    boxes: contains box coordinates for the selected boxes.
    classes: contains the index of the class detected by the selected boxes.
    """
    
    # Compute the box scores: 
    box_scores = box_confidence * box_class_probs
    
    # Find the box classes index with the maximum box score
    box_classes = K.argmax(box_scores)
    # Find the box classes with maximum box score
    box_class_scores = K.max(box_scores, axis=-1)
    
    # Creating a mask for selecting the boxes that have box score greater than threshold.
    thresh_mask = box_class_scores >= threshold
    # Selecting the scores, boxes and classes with box score greater than 
    # threshold by filtering the box score with the help of thresh_mask.
    scores = tf.boolean_mask(tensor=box_class_scores, mask=thresh_mask)
    classes = tf.boolean_mask(tensor=box_classes, mask=thresh_mask)
    boxes = tf.boolean_mask(tensor=boxes, mask=thresh_mask)
    
    return scores, classes, boxes

In [15]:
def non_max_suppression(scores, classes, boxes, max_boxes=10, iou_threshold = 0.5):
    """
    Non-maximal suppression is used to fix the multiple detections of the same object.
    - Find the box_confidence (Probability of the box containing the object) for each detection.
    - Find the bounding box with the highest box_confidence
    - Suppress all the bounding boxes which have an IoU greater than 0.5 with the bounding box with the maximum box confidence.
    
    scores    -> containing the class probability score for the selected boxes.
    boxes     -> contains box coordinates for the boxes selected after threshold masking.
    classes   -> contains the index of the classes detected by the selected boxes.
    max_boxes -> maximum number of predicted boxes to be returned after NMS filtering.
    
    Returns: 
    scores  -> predicted score for each box.
    classes -> predicted class for each box.
    boxes   -> predicted box coordinates.
    """
    
    # Converting max_boxes to tensor 
    max_boxes_tensor = K.variable(max_boxes, dtype='int32')
    # Initialize the max_boxes_tensor
    K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
    
    # Implement non-max suppression using tf.image.non_max_suppression()
    # tf.image.non_max_suppression() ->  Returns the indicies corresponding to the boxes you want to keep
    
    indicies = tf.image.non_max_suppression(boxes=boxes, scores=scores, max_output_size=max_boxes_tensor, iou_threshold=iou_threshold)
    
    # Use K.gather() to select only indicies present in 'indicies' varaible from scores, boxes and classe
    
    scores = tf.gather(scores, indicies)
    classes = tf.gather(classes, indicies)
    boxes = tf.gather(boxes, indicies)
    
    return scores, classes , boxes 

In [16]:
def scale_boxes(boxes, image_shape):
    """ Scales the predicted boxes in order to be drawable on the image"""
    height = image_shape[0]
    width = image_shape[1]
    image_dims = K.stack([height, width, height, width])
    image_dims = K.reshape(image_dims, [1, 4])
    boxes = boxes * image_dims
    return boxes

In [17]:
def yolo_eval(yolo_outputs, image_shape = (720., 1280.), max_boxes = 10, score_threshold = 0.6, iou_threshold = 0.5):
    """
    The function takes the ouput of the YOLO encoding/ model and filters the boxes using 
    score threshold and non-maximal suppression. Returns the predicted boxes along with their scores,
    box coordinates and classes.
    
    Arguments: 
    yolo_outputs    -> Output of the encoding model. 
    image_shape     -> Input shape 
    max_boxes       -> Maximum number of predicted boxes to be returned after NMS filtering.
    score_threshold -> Threshold value for box class score, if the maximum class probability score < threshold,
                       then discard that box. 
    iou_threshold   -> 'Intersection over Union' threshold used for NMS filtering
    
    Returns: 
    scores  -> predicted score for each box.
    classes -> predicted class for each box.
    boxes   -> predicted box coordinates.
    """
    
    box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs
    
    # Convert boxes to be ready for filtering functions
    boxes = yolo_boxes_to_corners(box_xy, box_wh)
    
    scores, classes, boxes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, score_threshold)
    
    # Scale boxes back to original image shape.
    boxes = scale_boxes(boxes, image_shape)
    
    # Perform non-max suppression
    scores, classes , boxes = non_max_suppression(scores, classes, boxes, max_boxes, iou_threshold)
    
    return scores, boxes, classes

In [18]:
def load_yolo(model_body, class_names, anchors, weights_name='model_data/model.best.h5',):
    """
    Loads the yolo model
    
    Returns: 
    scores     -> containing the class probability score for the selected boxes.
    boxes      -> contains box coordinates for the boxes selected after threshold masking.
    classes    -> contains the index of the classes detected by the selected boxes.
    model_body -> the yolo model with the loaded with the save weights
    input_image_shape -> Tensor representing the shape of the input image
    """
    model_body.load_weights(weights_name)
    yolo_outputs = yolo_head(model_body.output, anchors, len(class_names))
    input_image_shape = K.placeholder(shape=(2, ))
    scores, boxes, classes = yolo_eval(yolo_outputs, input_image_shape)
    
    return scores, boxes, classes, model_body, input_image_shape

In [19]:
def draw(model_body, scores, boxes, classes, input_image_shape, image_data, image_set = 'val', out_path='data/output/', save_all=True, real_time=False):
    """
    Draw the predicted bounding boxes on the image data
    
    """
    if image_set == 'real':
        image_data = np.expand_dims(image_data, axis=0)
    
    if image_set == 'val':
        image_data = np.array([np.expand_dims(image, axis=0)
            for image in image_data[int(len(image_data)*.9):]])
    
    elif image_set == 'all':
        image_data = np.array([np.expand_dims(image, axis=0)
            for image in image_data])
    
    sess = K.get_session()
    #if not os.path.exists(out_path):
    #    os.makedirs(out_path)
    
    for i in range(len(image_data)):
        out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], 
                                                      feed_dict={model_body.input: image_data[i],
                                                                 input_image_shape: [image_data.shape[2], image_data.shape[3]],
                                                                 K.learning_phase():0
                                                                })
        print('Found {} boxes for image'.format(len(out_boxes)))
        print(out_boxes)
        # Generate colors for the drawing bounding boxes
        image_with_boxes = draw_boxes(image_data[i][0], out_boxes, out_classes,
                                    class_names, out_scores)
        
        if real_time == True:
            return image_with_boxes
            
        elif save_all or (len(out_boxes) > 0):
            image = PIL.Image.fromarray(image_with_boxes)
            image.save(os.path.join(out_path,str(i)+'.png'))

In [43]:
# Loading the image data and 
# Loading the box coordinates
image_data = np.load('data/image_data.npy')
boxes = np.load('data/boxes.npy')
boxes = list(boxes)
# Selecting the box coordinates of the first 1000 images.
boxes = boxes[:1000]

In [44]:
# Preprocess the image data and box coordinates to be fed to the model
image_data, boxes =  process_data(image_data, boxes)

MemoryError: 

In [45]:
# Loading anchors and classes
anchors= YOLO_ANCHORS
class_names = get_classes('data/classes.txt')

In [46]:
# Extracting detector mask and matching true boxes
detectors_mask, matching_true_boxes = get_detector_mask(boxes, anchors)

IndexError: tuple index out of range

In [47]:
# Creating the yolo model with pre-trained weights
model_body, model = create_model(anchors, class_names)

In [48]:
# Uncomment to train the network
# Training the pre-trained yolo model our image dataset of 1000 images.
# train(model, class_names, anchors, image_data, boxes, detectors_mask, matching_true_boxes, epochs=20)

### Predicting on the validation dataset

In [None]:
# Validation set is the 10% of the image_data
scores, boxes, classes, model_body, input_image_shape = load_yolo(model_body, class_names, anchors)
# Predicting the classes and box coordinates for the input image
draw(model_body, scores, boxes, classes,input_image_shape, image_data, image_set='val',save_all=False)

### Object Detection on test image data

In [None]:
# Loading the path of the test data
test = glob('data/test/*.jpg')

In [None]:
# Reading and storing the test image data
test_data = []
for i in test:
    test_data.append(plt.imread(i))

In [None]:
# Processing the test image data 
test_data = process_data(test_data)

In [None]:
# Predicting the scores, boxes, classes for the given input image
scores, boxes, classes, model_body, input_image_shape = load_yolo(model_body, class_names, anchors)

In [None]:
# Drawing the bounding boxes
draw(model_body, scores, boxes, classes,input_image_shape, test_data, image_set='all', out_path='data/test/output/',save_all=False)

# Real-Time Object Detection

In [None]:
#Path of the stored video file
videopath = 'data/real_time/bdd-videos-sample.mp4'

In [None]:
scores, boxes, classes, model_body, input_image_shape = load_yolo(model_body, class_names, anchors)

In [None]:
vc = cv2.VideoCapture(videopath)

In [None]:
while(True):
    check, frame = vc.read()
    frame = process_data(np.expand_dims(frame, axis=0))
    img_data = draw(model_body, scores, boxes, classes, input_image_shape, frame, image_set='real', save_all=False, real_time=True)
    img_data = np.array(img_data)
    cv2.imshow('Capture:', img_data)
    key = cv2.waitKey(1)
    if key == ord('q'):
        break
vc.release()
cv2.destroyAllWindows()        