In [1]:
# Importing the required libraries
import argparse
import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf
from keras import backend as K
from keras.layers import Input, Lambda, Conv2D
from keras.models import load_model, Model
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping
from YAD2K.yad2k.models.keras_yolo import preprocess_true_boxes, yolo_body, yolo_head, yolo_loss, yolo_boxes_to_corners
from YAD2K.yad2k.utils.draw_boxes import draw_boxes
import pandas as pd
import numpy as np
from glob import glob
import os
%matplotlib inline
import cv2

Using TensorFlow backend.


In [19]:
# Loading the labels
#PATH = 'C:/FluxAuto/berk_data/images/100k/train/'
labels = pd.read_csv('C:/FluxAuto/berk_data/labels.csv')

In [3]:
# Default anchor boxes
YOLO_ANCHORS = np.array(
    ((0.57273, 0.677385), (1.87446, 2.06253), (3.33843, 5.47434),
     (7.88282, 3.52778), (9.77052, 9.16828)))

In [4]:
def get_classes(classpath):
    """
    Loads the classes stored in the classes.txt file.
    
    Returns:
    
    List containing the classes
    """
    with open(classpath) as f:
        class_names = f.readlines()
    class_names = class_names[0].split(' ')
    return class_names

In [5]:
get_classes('data/classes.txt')

['bike', 'bus', 'car', 'motor', 'person', 'rider', 'train', 'truck']

In [6]:
def get_anchors(anchor_path):
    """
    Loads the anchors from a file
    """
    if os.path.isfile(anchor_path):
        with open(anchor_path) as f:
            anchors = f.readline()
            anchors = [float(x) for x in anchors.split(',')]
            return np.array(anchors).reshape(-1, 2)
    else:
        Warning('Could not open anchors file, using default')
        return YOLO_ANCHORS

In [7]:
def get_boxes(box_path):
    """
    Loads the saved box coordinates
    """
    load_boxes = np.load(box_path)
    for i in load_boxes.iteritems():
        boxes = i
    boxes = list(boxes[1])
    return boxes

In [8]:
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=0.6):
    """
    Filters YOLO boxes by thresholding on object and class confidence.
    Arguements: 
    box_confidence: Probability of the box containing the object.
    boxes: The box parameters : (x, y, h, w) 
           x, y -> Center of the box 
           h, w -> Height and width of the box w.r.t the image size.
    box_class_probs: Probability of all the classes for each box.
    threshold: Threshold value for box confidence. 
    
    Returns: 
    scores: containing the class probability score for the selected boxes.
    boxes: contains box coordinates for the selected boxes.
    classes: contains the index of the class detected by the selected boxes.
    """
    
    # Compute the box scores: 
    box_scores = box_confidence * box_class_probs
    
    # Find the box classes index with the maximum box score
    box_classes = K.argmax(box_scores)
    # Find the box classes with maximum box score
    box_class_scores = K.max(box_scores, axis=-1)
    
    # Creating a mask for selecting the boxes that have box score greater than threshold.
    thresh_mask = box_class_scores >= threshold
    # Selecting the scores, boxes and classes with box score greater than 
    # threshold by filtering the box score with the help of thresh_mask.
    scores = tf.boolean_mask(tensor=box_class_scores, mask=thresh_mask)
    classes = tf.boolean_mask(tensor=box_classes, mask=thresh_mask)
    boxes = tf.boolean_mask(tensor=boxes, mask=thresh_mask)
    
    return scores, classes, boxes

In [9]:
def process_data(images, boxes=None):
    """
    Process the data
    """
    images = [PIL.Image.fromarray(i) for i in images]
    orig_size = np.array([images[0].width, images[0].height])
    orig_size = np.expand_dims(orig_size, axis=0)
    
    #Image preprocessing 
    processed_images = [i.resize((416, 416), PIL.Image.BICUBIC) for i in images]
    processed_images = [np.array(image, dtype=np.float) for image in processed_images]
    processed_images = [image/255. for image in processed_images]
    
    if boxes is not None:
        # Box preprocessing
        # Original boxes stored as as 1D list of class, x_min, y_min, x_max, y_max
        boxes = [box.reshape((-1, 5)) for box in boxes]
        # Get extents as y_min, x_min, y_max, x_max, class fpr comparision with 
        # model output
        box_extents = [box[:, [2,1,4,3,0]] for box in boxes]
        
        # Get box parametes as x_center, y_center, box_width, box_height, class.
        boxes_xy = [0.5* (box[:, 3:5] + box[:, 1:3]) for box in boxes]
        boxes_wh = [box[:, 3:5] - box[:, 1:3] for box in boxes]
        boxes_xy = [box_xy / orig_size for box_xy in boxes_xy]
        boxes_wh = [box_wh / orig_size for box_wh in boxes_wh]
        boxes = [np.concatenate((boxes_xy[i], boxes_wh[i], box[:, 0:1]), axis=-1) for i, box in enumerate(boxes)]
        
        # find the max number of boxes 
        max_boxes = 0
        for boxz in boxes:
            if boxz.shape[0] > max_boxes:
                max_boxes = boxz.shape[0]
        # add zero pad for training 
        for i, boxz in enumerate(boxes):
            if boxz.shape[0] <  max_boxes:
                zero_padding = np.zeros((max_boxes - boxz.shape[0], 5), dtype=np.float32)
                boxes[i] = np.vstack((boxz, zero_padding))
        
        return np.array(processed_images), np.array(boxes)
    else:
        return np.array(processed_images)

In [10]:
def get_detector_mask(boxes, anchors):
    """
    Precompute detectors_mask and matching_true_boxes for training. 
    Detectors mask is 1 for each spatial position in the final conv layer and 
    anchor that should be active for the given boxes and 0 otherwise. 
    Matching true boxes gives the regression targets for the ground truth box that 
    caused a detector to be active or 0 otherwise.
    """
    detectors_mask = [0 for i in range(len(boxes))]
    matching_true_boxes = [0 for i in range(len(boxes))]
    for i, box in enumerate(boxes):
        detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(box, anchors, [416, 416])
    return np.array(detectors_mask), np.array(matching_true_boxes)


In [11]:
def create_model(anchors, class_names, load_pretrained=True, freeze_body = True):
    """
    
    load_pretrained: whether or not to load the pretrained model or initialize all weights

    freeze_body: whether or not to freeze all weights except for the last layer's
    
    Returns:
    model_body : YOLOv2 with new output layer
    model : YOLOv2 with custom loss Lambda layer  
    
    """
    detector_mask_shape = (13, 13, 5, 1)
    matching_boxes_shape = (13, 13, 5, 5)
    
    # Create model input layers 
    image_input = Input(shape=(416,416,3))
    boxes_input = Input(shape=(None, 5))
    detector_mask_input = Input(shape=detector_mask_shape)
    matching_boxes_input = Input(shape=matching_boxes_shape)
    
    # Create model body
    yolo_model = yolo_body(image_input, len(anchors), len(class_names))
    topless_yolo = Model(yolo_model.input, yolo_model.layers[-2].output)
    
    if load_pretrained == True:
        # Save topless yolo
        topless_yolo_path = os.path.join('model_data', 'yolo_topless.h5')
        if not os.path.exists(topless_yolo_path):
            print('Creating Topless weights file')
            yolo_path = os.path.join('model_data', 'yolo.h5')
            model_body = load_model(yolo_path)
            model_body = Model(model_body.inputs, model_body.layers[-2].output)
            model_body.save_weights(topless_yolo_path)
        topless_yolo.load_weights(topless_yolo_path)
        
    if freeze_body:
        for layer in topless_yolo.layers:
            layer.trainable = False
    
    final_layer = Conv2D(len(anchors)*(5 + len(class_names)), (1, 1), activation='linear')(topless_yolo.output)
    model_body = Model(image_input, final_layer)
    
    # Place model loss on CPU to reduce GPU memory usage.    
    with tf.device('/cpu:0'):
        model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', arguments={
            'anchors': anchors, 
            'num_classes': len(class_names)})([model_body.output, boxes_input, detector_mask_input, matching_boxes_input])
    
    model = Model([model_body.input, boxes_input, detector_mask_input, matching_boxes_input], model_loss)
    return model_body, model

In [12]:
def train(model, class_name, anchors, image_data, boxes, detectors_mask, matching_true_boxes, validation_split=0.1, epochs = 5):
    """
    Trains the model and saves the weights with the lowest loss value.
    """
    model.compile(optimizer='adam', loss={'yolo_loss': lambda y_true, y_pred: y_pred})
    
    logging = TensorBoard()
    checkpoint = ModelCheckpoint('model_data/model.best.h5', monitor='val_loss', save_weights_only=True, save_best_only=True)
    earlystopping = EarlyStopping(monitor='val_loss', patience=15, verbose=1)
    
    model.fit([image_data, boxes, detectors_mask, matching_true_boxes], np.zeros(len(image_data)), validation_split=validation_split, batch_size=16, epochs=epochs, callbacks=[logging, checkpoint, earlystopping])
    model.save_weights('model_data/model.best.h5')

In [13]:
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=0.6):
    """
    Filters YOLO boxes by thresholding on object and class confidence.
    Arguements: 
    box_confidence: Probability of the box containing the object.
    boxes: The box parameters : (x, y, h, w) 
           x, y -> Center of the box 
           h, w -> Height and width of the box w.r.t the image size.
    box_class_probs: Probability of all the classes for each box.
    threshold: Threshold value for box confidence. 
    
    Returns: 
    scores: containing the class probability score for the selected boxes.
    boxes: contains box coordinates for the selected boxes.
    classes: contains the index of the class detected by the selected boxes.
    """
    
    # Compute the box scores: 
    box_scores = box_confidence * box_class_probs
    
    # Find the box classes index with the maximum box score
    box_classes = K.argmax(box_scores)
    # Find the box classes with maximum box score
    box_class_scores = K.max(box_scores, axis=-1)
    
    # Creating a mask for selecting the boxes that have box score greater than threshold.
    thresh_mask = box_class_scores >= threshold
    # Selecting the scores, boxes and classes with box score greater than 
    # threshold by filtering the box score with the help of thresh_mask.
    scores = tf.boolean_mask(tensor=box_class_scores, mask=thresh_mask)
    classes = tf.boolean_mask(tensor=box_classes, mask=thresh_mask)
    boxes = tf.boolean_mask(tensor=boxes, mask=thresh_mask)
    
    return scores, classes, boxes

In [14]:
def non_max_suppression(scores, classes, boxes, max_boxes=10, iou_threshold = 0.5):
    """
    Non-maximal suppression is used to fix the multiple detections of the same object.
    - Find the box_confidence (Probability of the box containing the object) for each detection.
    - Find the bounding box with the highest box_confidence
    - Suppress all the bounding boxes which have an IoU greater than 0.5 with the bounding box with the maximum box confidence.
    
    scores    -> containing the class probability score for the selected boxes.
    boxes     -> contains box coordinates for the boxes selected after threshold masking.
    classes   -> contains the index of the classes detected by the selected boxes.
    max_boxes -> maximum number of predicted boxes to be returned after NMS filtering.
    
    Returns: 
    scores  -> predicted score for each box.
    classes -> predicted class for each box.
    boxes   -> predicted box coordinates.
    """
    
    # Converting max_boxes to tensor 
    max_boxes_tensor = K.variable(max_boxes, dtype='int32')
    # Initialize the max_boxes_tensor
    K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
    
    # Implement non-max suppression using tf.image.non_max_suppression()
    # tf.image.non_max_suppression() ->  Returns the indicies corresponding to the boxes you want to keep
    
    indicies = tf.image.non_max_suppression(boxes=boxes, scores=scores, max_output_size=max_boxes_tensor, iou_threshold=iou_threshold)
    
    # Use K.gather() to select only indicies present in 'indicies' varaible from scores, boxes and classe
    
    scores = tf.gather(scores, indicies)
    classes = tf.gather(classes, indicies)
    boxes = tf.gather(boxes, indicies)
    
    return scores, classes , boxes 

In [27]:
def scale_boxes(boxes, image_shape):
    """ Scales the predicted boxes in order to be drawable on the image"""
    height = image_shape[0]
    width = image_shape[1]
    image_dims = K.stack([height, width, height, width])
    image_dims = K.reshape(image_dims, [1, 4])
    boxes = boxes * image_dims
    return boxes

In [15]:
def yolo_eval(yolo_outputs, image_shape = (720., 1280.), max_boxes = 10, score_threshold = 0.6, iou_threshold = 0.5):
    """
    The function takes the ouput of the YOLO encoding/ model and filters the boxes using 
    score threshold and non-maximal suppression. Returns the predicted boxes along with their scores,
    box coordinates and classes.
    
    Arguments: 
    yolo_outputs    -> Output of the encoding model. 
    image_shape     -> Input shape 
    max_boxes       -> Maximum number of predicted boxes to be returned after NMS filtering.
    score_threshold -> Threshold value for box class score, if the maximum class probability score < threshold,
                       then discard that box. 
    iou_threshold   -> 'Intersection over Union' threshold used for NMS filtering
    
    Returns: 
    scores  -> predicted score for each box.
    classes -> predicted class for each box.
    boxes   -> predicted box coordinates.
    """
    
    box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs
    
    # Convert boxes to be ready for filtering functions
    boxes = yolo_boxes_to_corners(box_xy, box_wh)
    
    scores, classes, boxes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, score_threshold)
    
    # Scale boxes back to original image shape.
    boxes = scale_boxes(boxes, image_shape)
    
    # Perform non-max suppression
    scores, classes , boxes = non_max_suppression(scores, classes, boxes, max_boxes, iou_threshold)
    
    return scores, boxes, classes

In [16]:
def load_yolo(model_body, class_names, anchors, weights_name='model_data/model.best.h5',):
    """
    Loads the yolo model
    
    Returns: 
    scores     -> containing the class probability score for the selected boxes.
    boxes      -> contains box coordinates for the boxes selected after threshold masking.
    classes    -> contains the index of the classes detected by the selected boxes.
    model_body -> the yolo model with the loaded with the save weights
    input_image_shape -> Tensor representing the shape of the input image
    """
    model_body.load_weights(weights_name)
    yolo_outputs = yolo_head(model_body.output, anchors, len(class_names))
    input_image_shape = K.placeholder(shape=(2, ))
    scores, boxes, classes = yolo_eval(yolo_outputs, input_image_shape)
    
    return scores, boxes, classes, model_body, input_image_shape

In [17]:
def draw(model_body, scores, boxes, classes, input_image_shape, image_data, image_set = 'val', out_path='data/output/', save_all=True, real_time=False):
    """
    Draw the predicted bounding boxes on the image data
    
    """
    if image_set == 'real':
        image_data = np.expand_dims(image_data, axis=0)
    
    if image_set == 'val':
        image_data = np.array([np.expand_dims(image, axis=0)
            for image in image_data[int(len(image_data)*.9):]])
    
    elif image_set == 'all':
        image_data = np.array([np.expand_dims(image, axis=0)
            for image in image_data])
    
    sess = K.get_session()
    #if not os.path.exists(out_path):
    #    os.makedirs(out_path)
    
    for i in range(len(image_data)):
        out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], 
                                                      feed_dict={model_body.input: image_data[i],
                                                                 input_image_shape: [image_data.shape[2], image_data.shape[3]],
                                                                 K.learning_phase():0
                                                                })
        print('Found {} boxes for image'.format(len(out_boxes)))
        print(out_boxes)
        # Generate colors for the drawing bounding boxes
        image_with_boxes = draw_boxes(image_data[i][0], out_boxes, out_classes,
                                    class_names, out_scores)
        
        if real_time == True:
            return image_with_boxes
            
        elif save_all or (len(out_boxes) > 0):
            image = PIL.Image.fromarray(image_with_boxes)
            image.save(os.path.join(out_path,str(i)+'.png'))

In [18]:
# Loading the image data and 
# Loading the box coordinates
image_data = np.load('data/image_data.npy')
boxes = np.load('data/boxes.npy')
boxes = list(boxes)
# Selecting the box coordinates of the first 1000 images.
boxes = boxes[:1000]

In [22]:
# Preprocess the image data and box coordinates to be fed to the model
image_data, boxes =  process_data(image_data, boxes)

In [23]:
# Loading anchors and classes
anchors= YOLO_ANCHORS
class_names = get_classes('data/classes.txt')

In [24]:
# Extracting detector mask and matching true boxes
detectors_mask, matching_true_boxes = get_detector_mask(boxes, anchors)

In [25]:
# Creating the yolo model with pre-trained weights
model_body, model = create_model(anchors, class_names)

In [26]:
# Uncomment to train the network
# Training the pre-trained yolo model our image dataset of 1000 images.
# train(model, class_names, anchors, image_data, boxes, detectors_mask, matching_true_boxes, epochs=20)

Train on 900 samples, validate on 100 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Predicting on the validation dataset

In [28]:
# Validation set is the 10% of the image_data
scores, boxes, classes, model_body, input_image_shape = load_yolo(model_body, class_names, anchors)
# Predicting the classes and box coordinates for the input image
draw(model_body, scores, boxes, classes,input_image_shape, image_data, image_set='val',save_all=False)

Found 10 boxes for image
[[  60.32064819   99.14643097   91.3787384   122.12823486]
 [  52.26750565  263.3788147    62.58306885  274.50674438]
 [  53.013237    230.26971436   63.08942413  239.21534729]
 [  63.0400238    73.10009003   80.92043304   85.21637726]
 [  62.64461136   43.70405197   81.20243073   59.07641602]
 [  55.64930725  169.48071289   65.19117737  176.00042725]
 [  51.77373886  199.23583984   60.78482819  206.52658081]
 [  55.75138474  265.81900024   76.56145477  281.29394531]
 [  54.07904816  140.91677856   65.76027679  149.54672241]
 [  64.2984848    -8.33102608  136.17704773   27.68081856]]
car 0.96 (99, 60) (122, 91)
car 0.89 (263, 52) (275, 63)
car 0.87 (230, 53) (239, 63)
car 0.84 (73, 63) (85, 81)
car 0.82 (44, 63) (59, 81)
car 0.78 (169, 56) (176, 65)
car 0.78 (199, 52) (207, 61)
car 0.73 (266, 56) (281, 77)
car 0.71 (141, 54) (150, 66)
car 0.68 (0, 64) (28, 136)
Found 10 boxes for image
[[ 182.58631897  231.14955139  220.64202881  257.2409668 ]
 [ 179.59642029  

Found 10 boxes for image
[[ 111.36930847  121.70019531  302.66479492  243.21902466]
 [ 131.63168335  261.66680908  233.33016968  354.81210327]
 [ 149.99452209   75.5063324   180.13314819   94.20606995]
 [ 153.10548401  101.30319214  186.5112915   120.27676392]
 [ 137.03779602  225.14375305  198.38685608  266.60269165]
 [ 139.29512024  228.45381165  165.21038818  249.31652832]
 [ 140.19960022  256.64614868  161.98709106  271.90231323]
 [ 145.13034058  107.54278564  164.77171326  120.77571106]
 [ 141.0536499    93.77857208  203.31855774  132.47348022]
 [ 151.68740845  226.16651917  180.38587952  248.20697021]]
car 0.99 (122, 111) (243, 303)
car 0.95 (262, 132) (355, 233)
car 0.94 (76, 150) (94, 180)
car 0.87 (101, 153) (120, 187)
car 0.87 (225, 137) (267, 198)
car 0.87 (228, 139) (249, 165)
car 0.80 (257, 140) (272, 162)
car 0.74 (108, 145) (121, 165)
car 0.71 (94, 141) (132, 203)
car 0.67 (226, 152) (248, 180)
Found 8 boxes for image
[[ 123.03779602    4.35627747  308.90795898  108.7546

Found 10 boxes for image
[[ 164.91120911  166.16259766  200.52366638  195.8062439 ]
 [ 188.39839172    6.80933142  202.8679657    15.93231392]
 [ 172.89901733  141.90849304  197.16781616  159.04959106]
 [ 160.43359375  222.89112854  185.85639954  241.7175293 ]
 [ 181.68264771   40.48762512  194.02027893   48.87094116]
 [ 146.12808228  316.36071777  186.16349792  392.00921631]
 [ 160.77154541  206.5615387   183.40184021  220.39382935]
 [ 156.13754272  287.66131592  190.11987305  298.88702393]
 [ 182.51234436   12.1380558   192.2250824    19.14156342]
 [ 178.21240234   78.96287537  195.62312317   88.80329132]]
car 0.91 (166, 165) (196, 201)
car 0.89 (7, 188) (16, 203)
car 0.86 (142, 173) (159, 197)
car 0.85 (223, 160) (242, 186)
car 0.80 (40, 182) (49, 194)
car 0.73 (316, 146) (392, 186)
car 0.70 (207, 161) (220, 183)
person 0.68 (288, 156) (299, 190)
car 0.66 (12, 183) (19, 192)
car 0.63 (79, 178) (89, 196)
Found 3 boxes for image
[[ 158.9385376   138.3221283   176.74963379  151.9179840

Found 10 boxes for image
[[ 144.29078674    2.65441442  348.33099365   90.03051758]
 [ 211.26150513  133.30979919  241.63131714  157.35266113]
 [ 197.7440033   263.14450073  265.13613892  301.74343872]
 [ 209.23492432  169.90682983  228.70878601  180.12013245]
 [ 180.53479004  303.87890625  297.49304199  376.75665283]
 [ 216.99031067  166.50151062  235.28862     180.01069641]
 [ 204.33525085  222.49987793  235.27134705  241.72787476]
 [ 208.21629333  138.1260376   225.86392212  149.63183594]
 [ 200.91064453  185.71081543  252.13728333  224.83319092]
 [ 189.35223389  278.20849609  285.72665405  319.3510437 ]]
car 0.99 (3, 144) (90, 348)
car 0.97 (133, 211) (157, 242)
car 0.95 (263, 198) (302, 265)
car 0.93 (170, 209) (180, 229)
car 0.89 (304, 181) (377, 297)
car 0.88 (167, 217) (180, 235)
car 0.88 (222, 204) (242, 235)
car 0.86 (138, 208) (150, 226)
car 0.84 (186, 201) (225, 252)
car 0.80 (278, 189) (319, 286)
Found 7 boxes for image
[[ 164.60601807  230.91372681  185.4468689   246.3349

Found 7 boxes for image
[[ 157.2063446   234.33111572  173.05830383  245.15292358]
 [ 162.5750885   201.1449585   174.22346497  208.92405701]
 [ 152.92677307  258.23446655  186.59564209  283.15658569]
 [ 155.04440308  164.10910034  194.39550781  189.73394775]
 [ 148.50311279   83.22476959  210.84439087   92.40074158]
 [ 154.12489319   97.53392029  200.90664673  104.09986115]
 [ 162.65068054  132.54281616  191.11094666  141.90565491]]
car 0.98 (234, 157) (245, 173)
car 0.98 (201, 163) (209, 174)
car 0.97 (258, 153) (283, 187)
car 0.95 (164, 155) (190, 194)
person 0.74 (83, 149) (92, 211)
person 0.72 (98, 154) (104, 201)
person 0.72 (133, 163) (142, 191)
Found 9 boxes for image
[[ 161.11187744   99.72720337  182.96691895  122.42305756]
 [ 168.45503235   67.86651611  191.8120575    97.00830078]
 [ 153.50344849   62.26474762  200.38748169  110.54155731]
 [ 157.72172546  136.55856323  180.62358093  161.26463318]
 [ 158.84944153  160.30482483  180.59693909  187.97616577]
 [ 148.26554871  196

Found 10 boxes for image
[[ 190.5929718   280.56634521  382.48226929  396.35961914]
 [ 194.09562683   -8.32554054  341.6277771   128.26130676]
 [ 224.01119995  155.66824341  253.99316406  171.72865295]
 [ 227.55192566  214.48983765  249.71862793  226.91387939]
 [ 211.87832642  228.86590576  273.64880371  280.2510376 ]
 [ 222.01954651  112.27218628  270.16760254  155.74993896]
 [ 227.95941162  220.53804016  250.12825012  232.84301758]
 [ 221.72554016  138.50619507  249.51116943  153.70652771]
 [ 221.43807983  107.83785248  243.12683105  122.48980713]
 [ 209.94163513  170.79370117  284.53295898  215.53344727]]
car 1.00 (281, 191) (396, 382)
car 0.97 (0, 194) (128, 342)
car 0.96 (156, 224) (172, 254)
car 0.95 (214, 228) (227, 250)
car 0.93 (229, 212) (280, 274)
car 0.92 (112, 222) (156, 270)
car 0.90 (221, 228) (233, 250)
car 0.85 (139, 222) (154, 250)
car 0.76 (108, 221) (122, 243)
car 0.73 (171, 210) (216, 285)
Found 10 boxes for image
[[ 231.68464661  210.75349426  283.54589844  261.78

Found 9 boxes for image
[[ 177.40539551    7.14632034  366.85635376   92.47740173]
 [ 205.31814575   88.68843842  276.99200439  127.40983582]
 [ 214.84545898  131.15872192  255.15213013  146.19223022]
 [ 197.15879822  290.8770752   241.37963867  302.28604126]
 [ 212.36805725  130.96237183  230.04266357  138.98486328]
 [ 205.52934265   55.57021713  282.33282471   99.10425568]
 [ 213.90283203  102.54431915  253.0763855   122.49971008]
 [ 180.87419128  130.67993164  348.51171875  236.14581299]
 [ 206.01106262  324.20925903  231.7795105   334.22467041]]
car 0.99 (7, 177) (92, 367)
car 0.98 (89, 205) (127, 277)
car 0.94 (131, 215) (146, 255)
person 0.79 (291, 197) (302, 241)
car 0.75 (131, 212) (139, 230)
car 0.72 (56, 206) (99, 282)
car 0.68 (103, 214) (122, 253)
car 0.68 (131, 181) (236, 349)
person 0.67 (324, 206) (334, 232)
Found 9 boxes for image
[[ 211.36422729  330.31356812  227.88395691  339.99707031]
 [ 215.82769775  403.62142944  241.24009705  413.05581665]
 [ 193.48040771   43.50

Found 7 boxes for image
[[ 232.29084778   -2.66301394  396.18469238   32.54391479]
 [ 253.49151611  170.66621399  264.7901001   176.21612549]
 [ 246.79881287  214.69432068  299.33413696  255.82366943]
 [ 247.82933044  200.92251587  271.51959229  214.19075012]
 [ 245.51095581  178.09487915  260.75738525  187.29608154]
 [ 248.51269531  141.37635803  280.66912842  156.37857056]
 [ 243.64826965  195.94821167  262.49356079  207.04240417]]
car 0.96 (0, 232) (33, 396)
car 0.95 (171, 253) (176, 265)
car 0.94 (215, 247) (256, 299)
car 0.93 (201, 248) (214, 272)
car 0.92 (178, 246) (187, 261)
car 0.92 (141, 249) (156, 281)
car 0.91 (196, 244) (207, 262)
Found 8 boxes for image
[[ 208.99139404  174.30099487  277.32754517  229.60394287]
 [ 219.3690033   228.4758606   249.83303833  251.92330933]
 [ 150.11968994  281.97686768  344.77368164  400.08740234]
 [ 221.60163879  202.23982239  249.25094604  220.82769775]
 [ 221.08564758  166.92088318  241.83627319  185.88795471]
 [ 207.2098999   232.34309387

### Object Detection on test image data

In [29]:
# Loading the path of the test data
test = glob('data/test/*.jpg')

In [30]:
# Reading and storing the test image data
test_data = []
for i in test:
    test_data.append(plt.imread(i))

In [31]:
# Processing the test image data 
test_data = process_data(test_data)

In [32]:
# Predicting the scores, boxes, classes for the given input image
scores, boxes, classes, model_body, input_image_shape = load_yolo(model_body, class_names, anchors)

In [33]:
# Drawing the bounding boxes
draw(model_body, scores, boxes, classes,input_image_shape, test_data, image_set='all', out_path='data/test/output/',save_all=False)

Found 9 boxes for image
[[ 205.9680481   263.85797119  321.08343506  392.87295532]
 [ 233.86581421  137.02624512  267.10501099  162.32997131]
 [ 243.09080505   26.72019196  275.00042725   58.79608154]
 [ 231.84410095  160.03601074  258.29504395  171.17120361]
 [ 246.80488586    2.94693804  287.24804688   19.74625587]
 [ 237.28416443   66.43199921  266.17175293   83.55331421]
 [ 186.70582581  166.59449768  308.47869873  244.72096252]
 [ 202.74201965   78.27429962  276.9982605   126.54659271]
 [ 232.33847046  110.18444824  256.07312012  123.31916809]]
car 0.99 (264, 206) (393, 321)
car 0.90 (137, 234) (162, 267)
car 0.86 (27, 243) (59, 275)
car 0.84 (160, 232) (171, 258)
car 0.83 (3, 247) (20, 287)
car 0.75 (66, 237) (84, 266)
bus 0.73 (167, 187) (245, 308)
bus 0.61 (78, 203) (127, 277)
car 0.60 (110, 232) (123, 256)
Found 9 boxes for image
[[ 219.71142578    9.19717407  350.81610107  111.53739929]
 [ 236.18609619  147.30355835  254.57933044  161.63568115]
 [ 224.26185608   95.08708191  

# Real-Time Object Detection

In [34]:
#Path of the stored video file
videopath = 'data/real_time/bdd-videos-sample.mp4'

In [35]:
scores, boxes, classes, model_body, input_image_shape = load_yolo(model_body, class_names, anchors)

In [36]:
vc = cv2.VideoCapture(videopath)

In [37]:
while(True):
    check, frame = vc.read()
    frame = process_data(np.expand_dims(frame, axis=0))
    img_data = draw(model_body, scores, boxes, classes, input_image_shape, frame, image_set='real', save_all=False, real_time=True)
    img_data = np.array(img_data)
    cv2.imshow('Capture:', img_data)
    key = cv2.waitKey(1)
    if key == ord('q'):
        break
vc.release()
cv2.destroyAllWindows()        

Found 10 boxes for image
[[ 171.47224426  239.72506714  217.68223572  278.42980957]
 [ 155.60971069  337.54464722  251.21801758  414.41625977]
 [ 177.896698    222.65484619  198.31126404  236.80918884]
 [ 176.60140991  314.93188477  203.09346008  336.19827271]
 [ 170.00369263  293.17480469  203.95294189  315.16094971]
 [ 182.00579834  203.77552795  204.88439941  222.40917969]
 [ 179.21929932  297.22427368  206.85287476  311.63348389]
 [ 177.24362183  204.03887939  193.48596191  219.86094666]
 [ 169.01531982    2.65679026  199.73547363   17.22865868]
 [ 174.94244385  136.46487427  191.9602356   152.26609802]]
car 0.96 (240, 171) (278, 218)
car 0.95 (338, 156) (414, 251)
car 0.94 (223, 178) (237, 198)
car 0.89 (315, 177) (336, 203)
car 0.88 (293, 170) (315, 204)
car 0.88 (204, 182) (222, 205)
car 0.87 (297, 179) (312, 207)
car 0.86 (204, 177) (220, 193)
car 0.83 (3, 169) (17, 200)
car 0.81 (136, 175) (152, 192)
Found 10 boxes for image
[[ 171.58901978  239.730896    217.62106323  278.427

Found 10 boxes for image
[[ 186.16943359  209.4634552   203.29533386  221.47024536]
 [ 188.23905945  230.19866943  204.9624176   239.57902527]
 [ 178.2646637   135.98696899  194.25437927  150.57058716]
 [ 178.51673889  106.97689819  196.39460754  123.32501984]
 [ 161.97399902  349.80752563  258.19973755  410.70889282]
 [ 180.80415344  296.33676147  213.15322876  315.97015381]
 [ 182.1734314   265.16793823  216.31678772  279.08779907]
 [ 180.09411621   65.08857727  194.78730774   81.6505661 ]
 [ 179.6113739   241.842453    222.01451111  276.27676392]
 [ 179.85972595   48.64486694  193.49894714   65.91539001]]
car 0.98 (209, 186) (221, 203)
car 0.97 (230, 188) (240, 205)
car 0.96 (136, 178) (151, 194)
car 0.95 (107, 179) (123, 196)
car 0.94 (350, 162) (411, 258)
car 0.93 (296, 181) (316, 213)
car 0.91 (265, 182) (279, 216)
car 0.90 (65, 180) (82, 195)
car 0.89 (242, 180) (276, 222)
car 0.86 (49, 180) (66, 193)
Found 10 boxes for image
[[ 185.32411194  210.58952332  203.19796753  222.5293

Found 10 boxes for image
[[ 176.00163269  134.16932678  190.84632874  146.52546692]
 [ 175.57943726   98.35259247  196.39082336  116.79312897]
 [ 176.06903076  171.81954956  187.03373718  180.04151917]
 [ 180.7796936   228.69656372  196.48916626  236.20127869]
 [ 185.37408447  234.12719727  204.02500916  244.16418457]
 [ 177.07226562  205.43569946  193.42370605  212.26785278]
 [ 183.19441223  211.36512756  202.8011322   224.58982849]
 [ 181.38522339  302.36373901  208.59132385  318.48153687]
 [ 175.30993652   58.27481842  198.62243652   91.05949402]
 [ 178.75090027  263.88265991  216.03129578  279.07730103]]
car 0.97 (134, 176) (147, 191)
car 0.96 (98, 176) (117, 196)
car 0.94 (172, 176) (180, 187)
car 0.94 (229, 181) (236, 196)
car 0.94 (234, 185) (244, 204)
car 0.94 (205, 177) (212, 193)
car 0.92 (211, 183) (225, 203)
car 0.91 (302, 181) (318, 209)
car 0.91 (58, 175) (91, 199)
car 0.90 (264, 179) (279, 216)
Found 10 boxes for image
[[ 187.10169983  233.53970337  202.45439148  242.419

Found 10 boxes for image
[[ 184.65950012  208.49317932  203.02178955  223.9498291 ]
 [ 190.39227295  230.47576904  204.13078308  239.9382782 ]
 [ 178.39666748  134.64619446  193.6114502   149.82533264]
 [ 187.13224792  270.00979614  212.27906799  283.04537964]
 [ 179.31770325  102.11849976  194.34233093  114.79924774]
 [ 179.90969849   68.82881927  197.68891907   86.51477051]
 [ 185.68423462  319.34564209  211.04283142  341.91461182]
 [ 172.34088135  373.27597046  265.42837524  417.47186279]
 [ 174.74890137   -3.20906258  200.323349     16.83301353]
 [ 187.06922913  305.33700562  205.24952698  316.88528442]]
car 0.98 (208, 185) (224, 203)
car 0.98 (230, 190) (240, 204)
car 0.94 (135, 178) (150, 194)
car 0.93 (270, 187) (283, 212)
car 0.91 (102, 179) (115, 194)
car 0.91 (69, 180) (87, 198)
car 0.90 (319, 186) (342, 211)
car 0.88 (373, 172) (416, 265)
car 0.84 (0, 175) (17, 200)
car 0.84 (305, 187) (317, 205)
Found 10 boxes for image
[[ 184.65249634  208.39804077  202.89692688  223.43504

Found 10 boxes for image
[[ 195.22906494  200.13497925  223.55854797  217.38691711]
 [ 189.32550049  166.9634552   223.48495483  184.63827515]
 [ 191.87516785   16.61228943  251.5403595    65.93117523]
 [ 196.61729431  229.41259766  222.47407532  245.44525146]
 [ 181.1824646   121.40544891  252.07304382  164.2441864 ]
 [ 191.79391479  137.20721436  216.30773926  152.57746887]
 [ 194.28038025  183.47940063  240.71716309  218.47564697]
 [ 205.16036987  165.74331665  256.24490356  186.33178711]
 [ 189.07279968   -0.31923318  269.72177124   52.1125946 ]
 [ 188.15409851   74.35301208  220.08164978   90.57528687]]
car 0.92 (200, 195) (217, 224)
car 0.90 (167, 189) (185, 223)
car 0.88 (17, 192) (66, 252)
car 0.83 (229, 197) (245, 222)
car 0.80 (121, 181) (164, 252)
car 0.70 (137, 192) (153, 216)
car 0.67 (183, 194) (218, 241)
car 0.65 (166, 205) (186, 256)
car 0.64 (0, 189) (52, 270)
car 0.62 (74, 188) (91, 220)
Found 10 boxes for image
[[ 195.25382996  200.8221283   222.89996338  218.3491516

car 0.98 (0, 192) (58, 265)
car 0.91 (113, 175) (158, 263)
car 0.88 (176, 189) (228, 270)
car 0.86 (164, 192) (183, 221)
car 0.85 (200, 196) (217, 223)
car 0.79 (230, 196) (245, 222)
car 0.62 (101, 195) (116, 219)
Found 7 boxes for image
[[ 188.44190979   -2.37663937  268.17651367   57.26064301]
 [ 174.842453    113.06038666  263.30551147  157.26608276]
 [ 187.42370605  174.4894104   271.41790771  229.65692139]
 [ 192.28610229  164.18023682  219.70339966  181.78320312]
 [ 196.08309937  200.79501343  221.57081604  216.59690857]
 [ 195.19442749  230.06124878  221.01629639  244.82476807]
 [ 195.26609802  104.57955933  219.35293579  119.02422333]]
car 0.97 (0, 188) (57, 268)
car 0.91 (113, 175) (157, 263)
car 0.90 (174, 187) (230, 271)
car 0.87 (164, 192) (182, 220)
car 0.81 (201, 196) (217, 222)
car 0.77 (230, 195) (245, 221)
car 0.65 (105, 195) (119, 219)
Found 7 boxes for image
[[ 188.53977966   -2.24518728  266.71795654   53.61289978]
 [ 173.35125732  110.68430328  266.36914062  156.85