In [1]:
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model, Model
from keras.activations import softmax
import keras.backend as K
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from faker import Faker
import random
from tqdm import tqdm
from babel.dates import format_date
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
#m is no. of training examples, each example has a sequence of Tx inputs
Tx = 10
Ty = 10
frame_vals_len = 1805

In [3]:
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor = Dense(1, activation = "relu")
activator = Activation(softmax,name='attention_weights') # We are using a custom softmax(axis = 1) loaded in this notebook
dotor = Dot(axes = 1)
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor = Dense(1, activation = "relu")
activator = Activation(softmax, name='attention_weights') # We are using a custom softmax(axis = 1) loaded in this notebook
dotor = Dot(axes = 1)

In [4]:
def one_step_attention(a, s_prev):
    """
    Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
    "alphas" and the hidden states "a" of the Bi-LSTM.
    
    Arguments:
    a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a)
    s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)
    
    Returns:
    context -- context vector, input of the next (post-attetion) LSTM cell
    """
    
    ### START CODE HERE ###
    # Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a" (≈ 1 line)
    s_prev = repeator(s_prev)
    # Use concatenator to concatenate a and s_prev on the last axis (≈ 1 line)
    concat = concatenator([a, s_prev])
    # Use densor to propagate concat through a small fully-connected neural network to compute the "energies" variable e. (≈1 lines)
    e = densor(concat)
    # Use activator and e to compute the attention weights "alphas" (≈ 1 line)
    alphas = activator(e)
    # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell (≈ 1 line)
    context = dotor([alphas, a])
    ### END CODE HERE ###
    
    return context

In [5]:
n_a = 64
n_s = 128
post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(frame_vals_len, activation=softmax)

In [6]:
def model(Tx, Ty, n_a, n_s, frame_vals_len):
    """
    Arguments:
    Tx -- length of the input sequence
    Ty -- length of the output sequence
    n_a -- hidden state size of the Bi-LSTM
    n_s -- hidden state size of the post-attention LSTM
    human_vocab_size -- size of the python dictionary "human_vocab"
    machine_vocab_size -- size of the python dictionary "machine_vocab"

    Returns:
    model -- Keras model instance
    """
    
    # Define the inputs of your model with a shape (Tx,)
    # Define s0 and c0, initial hidden state for the decoder LSTM of shape (n_s,)
    X = Input(shape=(Tx, frame_vals_len))
    s0 = Input(shape=(n_s,), name='s0')
    c0 = Input(shape=(n_s,), name='c0')
    s = s0
    c = c0
    
    # Initialize empty list of outputs
    outputs = []
    
    ### START CODE HERE ###
    
    # Step 1: Define your pre-attention Bi-LSTM. Remember to use return_sequences=True. (≈ 1 line)
    a = Bidirectional(LSTM(n_a, return_sequences=True))(X)
    
    # Step 2: Iterate for Ty steps
    for t in range(Ty):
    
        # Step 2.A: Perform one step of the attention mechanism to get back the context vector at step t (≈ 1 line)
        context = one_step_attention(a, s)
        
        # Step 2.B: Apply the post-attention LSTM cell to the "context" vector.
        # Don't forget to pass: initial_state = [hidden state, cell state] (≈ 1 line)
        s, _, c = post_activation_LSTM_cell(context, initial_state = [s, c])
        
        # Step 2.C: Apply Dense layer to the hidden state output of the post-attention LSTM (≈ 1 line)
        out = output_layer(s)
        
        # Step 2.D: Append "out" to the "outputs" list (≈ 1 line)
        outputs.append(out)
    
    # Step 3: Create model instance taking three inputs and returning the list of outputs. (≈ 1 line)
    model = Model(inputs = [X, s0, c0], outputs = outputs)
    
    ### END CODE HERE ###
    
    return model

In [7]:
GRID_H = 19
GRID_W = 19
NO_OBJECT_SCALE  = 1.0
OBJECT_SCALE     = 5.0
COORD_SCALE      = 1.0
CLASS_SCALE      = 1.0

BATCH_SIZE       = 16
WARM_UP_BATCHES  = 0
TRUE_BOX_BUFFER  = 50

In [8]:
def custom_loss(y_true, y_pred):
    mask_shape = tf.shape(y_true)[:4]
    
    cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1)))
    cell_y = tf.transpose(cell_x, (0,2,1,3,4))

    cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [BATCH_SIZE, 1, 1, 1, 1])
    
    coord_mask = tf.zeros(mask_shape)
    conf_mask  = tf.zeros(mask_shape)
    class_mask = tf.zeros(mask_shape)
    
    seen = tf.Variable(0.)
    total_recall = tf.Variable(0.)
    
    """
    Adjust prediction
    """
    ### adjust x and y      
    pred_box_xy = tf.sigmoid(y_pred[..., 1:3]) + cell_grid
    
    ### adjust w and h
    pred_box_wh = tf.exp(y_pred[..., 3:5])
    
    ### adjust confidence
    pred_box_conf = tf.sigmoid(y_pred[..., 0])
    
    """
    Adjust ground truth
    """
    ### adjust x and y
    true_box_xy = y_true[..., 1:3] # relative position to the containing cell
    
    ### adjust w and h
    true_box_wh = y_true[..., 3:5] # number of cells accross, horizontally and vertically
    
    ### adjust confidence
    true_wh_half = true_box_wh / 2.
    true_mins    = true_box_xy - true_wh_half
    true_maxes   = true_box_xy + true_wh_half
    
    pred_wh_half = pred_box_wh / 2.
    pred_mins    = pred_box_xy - pred_wh_half
    pred_maxes   = pred_box_xy + pred_wh_half       
    
    intersect_mins  = tf.maximum(pred_mins,  true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)
    intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
    
    true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
    pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores  = tf.truediv(intersect_areas, union_areas)
    
    true_box_conf = iou_scores * y_true[..., 0]
    
    """
    Determine the masks
    """
    ### coordinate mask: simply the position of the ground truth boxes (the predictors)
    coord_mask = tf.expand_dims(y_true[..., 0], axis=-1) * COORD_SCALE
    
    ### confidence mask: penelize predictors + penalnp_arr_gt.shapeize boxes with low IOU
    # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]
    
    true_wh_half = true_wh / 2.
    true_mins    = true_xy - true_wh_half
    true_maxes   = true_xy + true_wh_half
    
    pred_xy = tf.expand_dims(pred_box_xy, 4)
    pred_wh = tf.expand_dims(pred_box_wh, 4)
    
    pred_wh_half = pred_wh / 2.
    pred_mins    = pred_xy - pred_wh_half
    pred_maxes   = pred_xy + pred_wh_half    
    np_arr_gt.shape
    intersect_mins  = tf.maximum(pred_mins,  true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)
    intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
    
    true_areas = true_wh[..., 0] * true_wh[..., 1]
    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores  = tf.truediv(intersect_areas, union_areas)

    best_ious = tf.reduce_max(iou_scores, axis=4)
    conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4]) * NO_OBJECT_SCALE
    
    # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
    conf_mask = conf_mask + y_true[..., 4] * OBJECT_SCALE
          
    
    """
    Warm-up training
    """
    no_boxes_mask = tf.to_float(coord_mask < COORD_SCALE/2.)
    seen = tf.assign_add(seen, 1.)
    
    true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, WARM_UP_BATCHES), 
                          lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask, 
                                   true_box_wh + tf.ones_like(true_box_wh) * np.reshape(ANCHORS, [1,1,1,BOX,2]) * no_boxes_mask, 
                                   tf.ones_like(coord_mask)],
                          lambda: [true_box_xy, 
                                   true_box_wh,
                                   coord_mask])
    
    """
    Finalize the loss
    """
    nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
    nb_conf_box  = tf.reduce_sum(tf.to_float(conf_mask  > 0.0))
    nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))
    
    loss_xy    = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
    loss_wh    = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
    loss_conf  = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_mask)  / (nb_conf_box  + 1e-6) / 2.
    loss = loss_xy + loss_wh + loss_conf + loss_class
    
    nb_true_box = tf.reduce_sum(y_true[..., 4])
    nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.5) * tf.to_float(pred_box_conf > 0.3))

    """
    Debugging code
    """    
    current_recall = nb_pred_box/(nb_true_box + 1e-6)
    total_recall = tf.assign_add(total_recall, current_recall) 

    loss = tf.Print(loss, [tf.zeros((1))], message='Dummy Line \t', summarize=1000)
    loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000)
    loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000)
    loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000)
    loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000)
    loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)
    loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000)
    loss = tf.Print(loss, [total_recall/seen], message='Average Recall \t', summarize=1000)
    
    return loss

In [9]:
def non_max_suppression_fast(boxes, overlapThresh):
    # if there are no boxes, return an empty list
    if len(boxes) == 0:
        return []
 
    # if the bounding boxes integers, convert them to floats --
    # this is important since we'll be doing a bunch of divisions
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")
        
    # initialize the list of picked indexes	
    pick = []
 
    # grab the coordinates of the bounding boxes
    x1 = boxes[:,0]
    y1 = boxes[:,1]
    x2 = boxes[:,2]
    y2 = boxes[:,3]
 
    # compute the area of the bounding boxes and sort the bounding
    # boxes by the bottom-right y-coordinate of the bounding box
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(y2)
 
    # keep looping while some indexes still remain in the indexes
    # list
    while len(idxs) > 0:
        # grab the last index in the indexes list and add the
        # index value to the list of picked indexes
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
 
        # find the largest (x, y) coordinates for the start of
        # the bounding box and the smallest (x, y) coordinates
        # for the end of the bounding box
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])
        
        # compute the width and height of the bounding box
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)
        
        # compute the ratio of overlap
        overlap = (w * h) / area[idxs[:last]]
        
        # delete all indexes from the index list that have
        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(overlap > overlapThresh)[0])))
 
    # return only the bounding boxes that were picked using the
    # integer data type
    return pick

In [44]:
def del_conf_layer(x):
    return np.delete(x,0,axis = 1)

In [53]:
def my_loss(y_true, y_pred):
    lambda_coord = 5
    lambda_noobj = 0.5
    #non max suppression
    threshold = 0.5
    y_pred_conf = y_pred[...,0]
    logical_mask = tf.greater_equal(y_pred_conf,threshold)
    int_mask = tf.cast(logical_mask, tf.float32)
    y_pred_conf = y_pred_conf*int_mask
    y_pred = tf.cast(y_pred,tf.float32)
    y_pred = tf.stack(y_pred_conf,y_pred[...,1:])
    for i in range(10):
        index_of_boxes = []
        boxes_dims =[]
        for j in range(19):
            for k in range(19):
                if y_pred[i,j,k,0]>0:
                    index_of_boxes.append([i,j,k])
                    xyhw = [(1/19)*(j+y_pred[i,j,k,1]), (1/19)*(k+y_pred[i,j,k,2]),(1/19)*(y_pred[i,j,k,3]),(1/19)*(y_pred[i,j,k,4])]
                    xy2 = [xywh[0]-(xywh[3]/2),xywh[1]-(xywh[2]/2),xywh[0]+(xywh[3]/2),xywh[1]+(xywh[3]/2)]
                    boxes_dims.append(xy2)
        picked_boxes_indices = non_max_suppression_fast(boxes_dims, threshold)
        for index in picked_boxes_indices:
            picked_box = boxes_dims[index]
            new_xyhw = [(picked_box[2] - picked_box[0])/2,(picked_box[3] - picked_box[1])/2,picked_box[3] - picked_box[1], picked_box[2] - picked_box[0]]
            y_pred[i,int(new_xywh[0]/(1/19)),int(new_xywh[1]/(1/19)),1] = new_xyhw[0]/(1/19) - int(new_xyhw[0]/(1/19))
            y_pred[i,int(new_xywh[0]/(1/19)),int(new_xywh[1]/(1/19)),2] = new_xyhw[1]/(1/19) - int(new_xyhw[1]/(1/19))
            y_pred[i,int(new_xywh[0]/(1/19)),int(new_xywh[1]/(1/19)),3] = new_xyhw[2]/(1/19)
            y_pred[i,int(new_xywh[0]/(1/19)),int(new_xywh[1]/(1/19)),4] = new_xyhw[3]/(1/19)
            
    #calculating loss using yolo loss function
    pred_box_xy = tf.sigmoid(y_pred[..., 1:3])
    pred_box_hw = tf.exp(y_pred[..., 3:5])
    pred_box_conf = tf.sigmoid(y_pred[..., 0])
    
    true_box_xy = y_true[...,1:3]
    true_box_hw = y_pred[...,3:5]
    true_box_conf = y_pred[...,0]
    
    loss_xy = lambda_coord*np.sum(true_box_conf*np.square(true_box_xy-pred_box_xy))
    loss_wh = lambda_coord*np.sum(true_box_conf*np.square(np.sqrt(true_box_hw)-np.sqrt(pred_box_hw)))
    loss_conf = np.sum(true_box_conf*np.square(true_box_conf - pred_box_conf)) + lambda_noobj*np.sum(np.abs(true_box_conf-1)*np.square(true_box_conf - pred_box_conf))
    loss = loss_xy + loss_wh + loss_conf
    #debugging
    loss = tf.Print(loss, [tf.zeros((1))], message='Dummy Line \t', summarize=1000)
    loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000)
    loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000)
    loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000)
    loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)
    return loss

In [43]:
a = np.reshape(np.arange(1000),(10,5,5,4))
mask = np.greater(a[...,0],100*np.ones((a[...,0].shape)).astype(int))
a[...,0] = a[...,0]*mask
a[0,0,0] = [7,8,9,10]
b = np.reshape(np.arange(1000),(10,5,5,4))
np.delete(b,0,axis = 1)

array([[[[ 20,  21,  22,  23],
         [ 24,  25,  26,  27],
         [ 28,  29,  30,  31],
         [ 32,  33,  34,  35],
         [ 36,  37,  38,  39]],

        [[ 40,  41,  42,  43],
         [ 44,  45,  46,  47],
         [ 48,  49,  50,  51],
         [ 52,  53,  54,  55],
         [ 56,  57,  58,  59]],

        [[ 60,  61,  62,  63],
         [ 64,  65,  66,  67],
         [ 68,  69,  70,  71],
         [ 72,  73,  74,  75],
         [ 76,  77,  78,  79]],

        [[ 80,  81,  82,  83],
         [ 84,  85,  86,  87],
         [ 88,  89,  90,  91],
         [ 92,  93,  94,  95],
         [ 96,  97,  98,  99]]],


       [[[120, 121, 122, 123],
         [124, 125, 126, 127],
         [128, 129, 130, 131],
         [132, 133, 134, 135],
         [136, 137, 138, 139]],

        [[140, 141, 142, 143],
         [144, 145, 146, 147],
         [148, 149, 150, 151],
         [152, 153, 154, 155],
         [156, 157, 158, 159]],

        [[160, 161, 162, 163],
         [164, 165, 166, 

In [92]:
a = 2.9
int(a)

2

In [12]:
model = model(Tx, Ty, n_a, n_s, 1805)

Instructions for updating:
Colocations handled automatically by placer.


In [13]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 10, 1805)     0                                            
__________________________________________________________________________________________________
s0 (InputLayer)                 (None, 128)          0                                            
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 10, 128)      957440      input_1[0][0]                    
__________________________________________________________________________________________________
repeat_vector_2 (RepeatVector)  (None, 10, 128)      0           s0[0][0]                         
                                                                 lstm_1[0][0]                     
          

In [54]:
out = model.compile(optimizer=Adam(lr=0.005, beta_1=0.9, beta_2=0.999, decay=0.01),
                    metrics=['accuracy'],
                    loss=my_loss)
out

TypeError: Using a `tf.Tensor` as a Python `bool` is not allowed. Use `if t is not None:` instead of `if t:` to test if a tensor is defined, and use TensorFlow ops such as tf.cond to execute subgraphs conditioned on the value of a tensor.