<a href="https://colab.research.google.com/github/subham913/BlazeFace_/blob/master/BlazeFace_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf

In [0]:
def SingleBlazeBlock(x, filter_size, kernel_size = 5, strides = 1):
  _x = tf.keras.layers.SeparableConv2D(filters = filter_size, kernel_size=kernel_size, strides = strides, padding = 'same', use_bias=False)(x)
  _x = tf.keras.layers.BatchNormalization()(_x)
  channel_pad = _x.shape[-1] - x.shape[-1] #######Channel padding for residual connections
  if(strides==2):
    x = tf.keras.layers.MaxPooling2D()(x)
    if(channel_pad):
      # x = tf.keras.backend.concatenate([x,tf.zeros((x.shape[0],x.shape[1],x.shape[2],channel_pad),dtype = x.dtype)],axis=-1)
      x = tf.keras.backend.concatenate([x,tf.zeros_like(x)],axis=-1) 
  
  x_out = tf.keras.layers.Add()([_x,x])
  x_out = tf.keras.layers.Activation("relu")(x_out)
  return x_out






In [0]:
def DoubleBlazeBlock(x, filter1_size, filter2_size, kernel_size = 5, strides = 1):
  
  _x1 = tf.keras.layers.SeparableConv2D(filters = filter1_size, kernel_size=kernel_size, strides= strides, padding = 'same', use_bias=False)(x)
  _x1 = tf.keras.layers.BatchNormalization()(_x1)
  _x1 = tf.keras.layers.Activation("relu")(_x1)

  _x2 = tf.keras.layers.SeparableConv2D(filters = filter2_size, kernel_size=kernel_size, strides = 1, padding = 'same', use_bias=False)(_x1)
  _x2 = tf.keras.layers.BatchNormalization()(_x2)

  channel_pad = _x2.shape[-1] - x.shape[-1]
  if(strides==2):
    x = tf.keras.layers.MaxPooling2D()(x)
    if(channel_pad):
      # x = tf.keras.backend.concatenate([x,tf.zeros((x.shape[0],x.shape[1],x.shape[2],channel_pad),dtype = x.dtype)],axis=-1)
      x = tf.keras.backend.concatenate([x,tf.zeros_like(x)],axis=-1)
  x_out = tf.keras.layers.Add()([_x2,x])
  x_out = tf.keras.layers.Activation("relu")(x_out)
  return x_out


#Test SigleBlazeBlock

In [4]:
import numpy as np
# m  = tf.keras.layers.SeparableConv2D(filters = 24, kernel_size=5, strides = 2, padding = 'same')
a = tf.convert_to_tensor(np.random.randn(3,64,64,24),dtype = tf.float32)
out = SingleBlazeBlock(a, 48, strides=2)
print(out.shape)

(3, 32, 32, 48)


# Test DoubleBlazeBlock

In [5]:
import numpy as np
# m  = tf.keras.layers.SeparableConv2D(filters = 24, kernel_size=5, strides = 2, padding = 'same')
a = tf.convert_to_tensor(np.random.randn(3,32,32,48),dtype = tf.float32)
out = DoubleBlazeBlock(a, 24, 48, strides=1)
print(out.shape)

(3, 32, 32, 48)


#Network

In [0]:
def network():
  inputs = tf.keras.layers.Input(shape=(128,128,3))
  x = tf.keras.layers.Conv2D(kernel_size = 5, filters = 24, strides = 2, padding = 'same')(inputs) ## bx64x64x24
  
  x = SingleBlazeBlock(x, 24) ## bx64x64x24
  x = SingleBlazeBlock(x, 24) ## bx64x64x24
  x = SingleBlazeBlock(x, 48, strides = 2) ## bx32x32x48
  x = SingleBlazeBlock(x, 48) ## bx32x32x48
  x = SingleBlazeBlock(x, 48) ## bx32x32x48

  x16 = DoubleBlazeBlock(x, 24, 96, strides = 2) ## bx16x16x96
  _x = DoubleBlazeBlock(x16, 24, 96) ## bx16x16x96
  _x = DoubleBlazeBlock(_x, 24, 96) ## bx16x16x96
  _x = DoubleBlazeBlock(_x, 24, 96, strides = 2) ## bx8x8x96
  _x = DoubleBlazeBlock(_x, 24, 96) ## bx8x8x96
  x8 = DoubleBlazeBlock(_x, 24, 96) ## bx8x8x96

  ####confidence
  x16_conf = tf.keras.layers.Conv2D(kernel_size = 3, filters = 2, strides = 1, padding = 'same', activation = 'sigmoid')(x16) ## bx16x16x2
  x8_conf = tf.keras.layers.Conv2D(kernel_size = 3, filters = 6, strides = 1, padding = 'same', activation = 'sigmoid')(x8) ## bx8x8x6

  x_conf = tf.keras.layers.concatenate([tf.keras.layers.Reshape((512, 1))(x16_conf),tf.keras.layers.Reshape((384, 1))(x8_conf)],axis=1) ## bx896x1

  ###bounding boxes
  x16_bboxes = tf.keras.layers.Conv2D(kernel_size = 3, filters = 8, strides = 1, padding = 'same')(x16) ## bx16x16x8
  x8_bboxes = tf.keras.layers.Conv2D(kernel_size = 3, filters = 24, strides = 1, padding = 'same')(x8) ## bx8x8x24

  x_bboxes = tf.keras.layers.concatenate([tf.keras.layers.Reshape((512, 4))(x16_conf),tf.keras.layers.Reshape((384, 4))(x8_conf)],axis=1) ## bx896x4

  #### Finally return both score and bounding boxes


  return tf.keras.models.Model(inputs = inputs, outputs = [x_conf, x_bboxes])








In [7]:
model = network()
print(model.summary())

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 64, 64, 24)   1824        input_1[0][0]                    
__________________________________________________________________________________________________
separable_conv2d_3 (SeparableCo (None, 64, 64, 24)   1176        conv2d[0][0]                     
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 64, 64, 24)   96          separable_conv2d_3[0][0]         
______________________________________________________________________________________________

#Calculate IoU

In [0]:
def get_intersection(bboxA, bboxB):
  '''
  bboxA:[A,4]
  bboxB:[B,4]
  
  returns intersection of the boxes A and B of shape [A,B]
  '''
  numboxes_A = bboxA.shape[0]
  numboxes_B = bboxB.shape[0]
  max_xy = tf.math.minimum(tf.tile(tf.expand_dims(bboxA[:,2:],axis=1), multiples = (1,numboxes_B,1)),tf.tile(tf.expand_dims(bboxB[:,2:],axis=0), multiples = (numboxes_A,1,1)))
  min_xy = tf.math.maximum(tf.tile(tf.expand_dims(bboxA[:,:2],axis=1), multiples = (1,numboxes_B,1)),tf.tile(tf.expand_dims(bboxB[:,:2],axis=0), multiples = (numboxes_A,1,1)))
  intersections = tf.clip_by_value((max_xy - min_xy),clip_value_min = 0, clip_value_max = 16384)
  return intersections[:,:,0]*intersections[:,:,1]

In [0]:
def get_IoU(bboxA, bboxB):
  '''
  bboxA:[A,4]
  bboxB:[B,4]
  bbox_format: (xmin,ymin,xmax,ymax) 

  returns IoU of shape [A,B]
  Note: If bbox is given in (x,y,w,h) format needs to be converted to (xmin,ymin,xmax,ymax)
  '''
  numboxes_A = bboxA.shape[0]
  numboxes_B = bboxB.shape[0]
  area_a = tf.tile(tf.expand_dims((bboxA[:,2]-bboxA[:,0])*(bboxA[:,3]-bboxA[:,1]), axis=1), multiples = (1,numboxes_B))
  area_b = tf.tile(tf.expand_dims((bboxB[:,2]-bboxB[:,0])*(bboxB[:,3]-bboxB[:,1]), axis=0), multiples = (numboxes_A,1))
  intersection = get_intersection(bboxA, bboxB)
  union = area_a + area_b - intersection
  return intersection/union

#Prediction Modules

In [0]:
def preds_to_coordinates(raw_box_preds, priors):
  '''
  raw_box_preds: (b, 896, 4)
  priors: (896,4)

  decodes raw predictions(offsets) from network to coordinates (xmin, ymin, xmax, ymax)
  '''

  bbox = tf.zeros_like(raw_box_preds)
  xy_center = (raw_box_preds[:,:,:2] / 128) * priors[:,2:] + priors[:,:2] 
  wh = (raw_box_preds[:,:,2:] / 128) * priors[:,2:]
  bbox[:,:,:2] = xy_center - wh/2 ####(xmin, ymin)
  bbox[:,:,2:] = xy_center + wh/2 ####(xmax, ymax)
  return bbox







In [0]:
def get_detections_from_preds(raw_box_preds, raw_score_preds, priors, clip_val = 100, conf_thresh = 0.6):
  '''
  raw_box_preds: (b, 896, 4)
  priors: (896,4)
  raw_score_preds: (b, 896, 1)
  
  returns detections of shape (b, num_detections, 5)

  '''

  raw_detection_boxes = preds_to_coordinates(raw_box_preds, priors)
  raw_detection_score = tf.squeeze(raw_score_preds, axis=-1)
  mask = raw_detection_score >= conf_thresh #### Discard boxes with too low confidence

  filtered_output = []

  for i in range(raw_box_preds.shape[0]):
    boxes = raw_detection_boxes[i, mask[i]]
    scores = raw_detection_score[i, mask[i]]
    filtered_output.append(tf.concat([boxes, tf.expand_dims(scores,axis=-1)], axis=-1))

  return filtered_output






In [0]:
def weighted_nms(filtered_boxes, IoU_thresh = 0.5):
  '''
  filtered_boxes:(num_detections,5)
  For each image with overlapping boxes with IoU above
  a certain threshold it weighs the coordinates by the
  normalized confidence score.
  '''
  detection_out = []

  if (len(filtered_boxes) == 0):
     return detection_out

  sorted_ind = tf.argsort(filtered_boxes[:,-1], direction='DESCENDING')

  while(len(sorted_ind)):
    max_score_box = filtered_boxes[sorted_ind[0],:4]
    remaining_boxes = tf.gather_nd(filtered_boxes[:,:4], tf.expand_dims(sorted_ind,axis=-1))
    IoUs = get_IoU(max_score_box, remaining_boxes)
    mask = IoUs > IoU_thresh

    curr_overlaps = sorted_ind[mask]
    sorted_ind = sorted_ind[~mask]
    weighted_detection = max_score_box
    if(curr_overlaps.shape[0] > 1):
      score = tf.gather_nd(filtered_boxes[:,4], tf.expand_dims(curr_overlaps,axis=-1))
      coordinates = tf.gather_nd(filtered_boxes[:,:4], tf.expand_dims(curr_overlaps,axis=-1))
      normalized_score = score / tf.math.reduce_sum(score)
      weighted_bbox = tf.math.reduce_sum(coordinates * normalized_score, axis = 0)
      weighted_detection[:4] = weighted_bbox
      weighted_detection[4] =  tf.math.reduce_sum(score) / curr_overlaps.shape[0]
    
    detection_out.append(weighted_detection)

  
  return detection_out



  



In [0]:
def predict(input, priors):
  '''
  input: tensor of shape (b,H,W,C)

  returns prediction for the input of shape (b,num_detections,5)
  
  Note: If no face is detected in the image then a tensor of shape (0,5) is added for that image.
  Also, priors can be saved once and for all of shape (896,4) of format (x,y,w,h)
  '''

  model = network()
  out = model.predict(input)
  raw_detections = get_detections_from_preds(out[0], out[1], priors)
  
  final_detections = []

  for i in range(inputs.shape[0]):
    faces = weighted_nms(raw_detections[i])
    if(len(faces)>0):
      faces = tf.stack(faces)
    else:
      faces = tf.zeros([0,5])
    final_detections.append(faces)
  return final_detections


#Training Modules

In [0]:
def get_offsets(matches, priors):
  '''
  matches:(num_priors,4), coordinate format:(xmin,ymin,xmax,ymax)
  priors:(num_priors,4)

  returns offsets of matches to priors of shape (num_priors,4) for regression
  '''

  ghat_cxcy = (matches[:,:2] + matches[:,2:])/(2 * 128) - priors[:,:2]
  ghat_cxcy /= priors[:,2:]

  ghat_wh = tf.log((matches[:,2:]-matches[:,:2])/(128*priors[:,2:]))

  return tf.concat([ghat_cxcy, ghat_wh], axis=-1)

In [0]:
def match(gt_box, gt_labels, priors, matched_loc_offset, matched_conf, idx, overlap_thresh = 0.5):
  '''
  gt_box:(num_objects, 4)
  gt_labels:(num_objects)
  matched_loc_offset:tensor of shape(b,num_priors,4) to be filled for batch index idx
  matched_conf:tensor of shape(b,num_priors) to be filled for batch index idx

  It matches each default box to a ground_truth box. 
  First assign each ground truth box a prior box with max IoU
  and then assign each prior box a ground truth box with IoU greater than 0.5.
  
  '''

  overlaps = get_IoU(gt_box, priors)
  
  #### assigns each gt_box a prior_box with max IoU
  best_prior_overlap = tf.math.reduce_max(overlaps, axis = 1)
  best_prior_idx = tf.math.argmax(overlaps, axis = 1)

  #### assigns each prior_box a gt_box with max IoU
  best_truth_overlap = tf.math.reduce_max(overlaps, axis = 0)
  best_truth_idx = tf.math.argmax(overlaps, axis = 0)

  #### ensure best match isn't missed due to thresholding
  fill_value = tf.cast(tf.fill(best_prior_idx.shape[0], 3),dtype = best_truth_overlap.dtype)
  best_truth_overlap = tf.tensor_scatter_nd_update(best_truth_overlap, tf.expand_dims(best_prior_idx, axis=1), fill_value)

  for i in range(best_prior_idx.shape[0]):
    best_truth_idx[best_prior_idx[i]] = i
  
  matches = tf.gather_nd(gt_box, tf.expand_dims(best_truth_idx, axis=1))
  labels = tf.gather_nd(gt_labels, tf.expand_dims(best_truth_idx, axis=1)) + 1

  bg_mask =  best_truth_overlap < overlap_thresh #### priors with IoU < threshold are background
  labels[bg_mask] = 0 ### background class is 0

  matched_loc_offset[idx] = get_offsets(matches, priors)
  matched_conf[idx] = labels













In [0]:
def weighted_loss(ground_truth, predictions):
  '''
  ground_truth:(b,num_obj,5)
  predictions:[(b,num_priors,4),(b,num_priors,1)]

  Note that we can save priors beforehand
  returns a scalar smooth_l1_loss

  Note: priors can be saved once and for all of shape (896,4) of format (x,y,w,h)
  '''
  priors = tf.convert_to_tensor(np.load(priors.npy), dtype = tf.float32)
  pred_offset, pred_conf = predictions
  batch_size = ground_truth.shape[0]
  num_priors = priors.shape[0]
  matched_loc_offset = tf.zeros_like(pred_offsets)
  matched_conf = tf.zeros([batch_size, num_priors])

  for idx in range(batch_size):
    match(ground_truth[idx, :, :-1], ground_truth[idx, :, -1], priors, matched_loc_offset, matched_conf, idx)
  

  matched_loc_offset = tf.Variable(matched_loc_offset, trainable=False)
  matched_conf = tf.Variable(matched_conf, trainable=False)

  pos_class = matched_conf > 0
  num_pos = tf.math.reduce_sum(pos_class, axis=1) ###(b,)

  pred_offset = pred_offset[pos_class]
  matched_loc_offset = matched_loc_offset[pos_class]


  ##### Smooth L1 loss(localisation loss)
  h = tf.keras.losses.Huber() ##### returns averaged loss
  N = tf.math.reduce_sum(num_pos)
  loc_loss = (h(matched_loc_offset, pred_offset) * (batch_size * num_priors * 4))

  ####TODO: Hard Negative Mining 

  ##### Confidence Loss
  bce = tf.keras.losses.BinaryCrossentropy()
  conf_loss = bce(matched_conf, pred_conf) * (batch_size * num_priors)

  averaged_loss = (loc_loss + conf_loss)/N

  return averaged_loss 




  

  




  