<a href="https://colab.research.google.com/github/justadudewhohacks/ipynbs/blob/master/face_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dependencies

In [1]:
!pip install -U -q PyDrive
!pip install git+https://github.com/justadudewhohacks/image_augment.py
!pip install git+https://github.com/justadudewhohacks/colabsnippets

[?25l[K     |▎                               | 10kB 23.4MB/s eta 0:00:01[K     |▋                               | 20kB 6.7MB/s eta 0:00:01[K     |█                               | 30kB 9.5MB/s eta 0:00:01[K     |█▎                              | 40kB 6.0MB/s eta 0:00:01[K     |█▋                              | 51kB 7.2MB/s eta 0:00:01[K     |██                              | 61kB 8.5MB/s eta 0:00:01[K     |██▎                             | 71kB 9.6MB/s eta 0:00:01[K     |██▋                             | 81kB 10.6MB/s eta 0:00:01[K     |███                             | 92kB 11.8MB/s eta 0:00:01[K     |███▎                            | 102kB 9.8MB/s eta 0:00:01[K     |███▋                            | 112kB 9.8MB/s eta 0:00:01[K     |████                            | 122kB 9.8MB/s eta 0:00:01[K     |████▎                           | 133kB 9.8MB/s eta 0:00:01[K     |████▋                           | 143kB 9.8MB/s eta 0:00:01[K     |█████                   

# Download Data

In [0]:
from colabsnippets.DataDownloader import DataDownloader

data_downloader = DataDownloader(data_dir = './data')

data_downloader.download_data({
	"WIDER" : [
    { "images": "1JHmXqGPngDCbM56eYPeqsaCgJC4vgL4m", "boxes": "1Hd2i-6dnaWIriFK4Hj0CLZnfGtKcKj9L" }
	]
}, ['boxes'])

print('done!')

# Common

In [0]:
import cv2
import math
import json
import random
import time
import types
import os
import numpy as np
import tensorflow as tf
from augment import ImageAugmentor, augment
from augment.augment import abs_coords
from colabsnippets.utils import load_json
from colabsnippets import BatchLoader

'''
--------------------------------------------------------------------------------

Data Loader

--------------------------------------------------------------------------------
'''
  
def transform_boxes(boxes):
  out_boxes = []
  for box in boxes:
    x, y, w, h = box['x'], box['y'], box['width'], box['height']
    out_box = (x, y, w, h)     
    if w <= 0 or h <= 0:
      raise Exception("box has invalid width or height: {}".format(out_box))   
    for val in out_box:
      if val < -0.5 or val > 1.5:
        raise Exception("box is probably not a valid relative box: {}".format(out_box))
    out_boxes.append(out_box)
  return out_boxes
  
def extract_data_labels(data):
  db = data['db']
  img_file = data['file']
  boxes_file = img_file.replace('.jpg', '.json')
  boxes_dir = "boxes-shard{}".format(data['shard']) if 'shard' in data else 'boxes'
  boxes_path = "./data/{}/{}/{}".format(db, boxes_dir, boxes_file)
  boxes = load_json(boxes_path)
  return transform_boxes(boxes)
    
def resolve_image_path(data):
  db = data['db']
  img_file = data['file']
  img_dir = "images-shard{}".format(data['shard']) if 'shard' in data else 'images'
  img_path = "./data/{}/{}/{}".format(db, img_dir, img_file)
  return img_path

def min_bbox(boxes):
  min_x, min_y, max_x, max_y = 1.0, 1.0, 0, 0
  for box in boxes:
    x, y, w, h = box
    pts = [(x, y), (x + w, y + h)]
    for x, y in pts:
      min_x = x if x < min_x else min_x
      min_y = y if y < min_y else min_y
      max_x = max_x if x < max_x else x
      max_y = max_y if y < max_y else y

  return [min_x, min_y, max_x, max_y]

class DataLoader(BatchLoader):
  def __init__(self, data, image_augmentor = None, start_epoch = None, is_test = False):  
    self.image_augmentor = image_augmentor
    BatchLoader.__init__(
      self, 
      data if type(data) is types.FunctionType else lambda: data, 
      resolve_image_path, 
      extract_data_labels,
      start_epoch = start_epoch, 
      is_test = is_test
    )
      
  def load_image_and_labels_batch(self, datas, image_size):
    batch_x, batch_y = [], []
    for data in datas:
      boxes = self.extract_data_labels(data)
      image = self.load_image(data)
      roi = min_bbox(boxes)
      if self.image_augmentor is not None:
        image, boxes = self.image_augmentor.augment(image, boxes = boxes, random_crop = roi, pad_to_square = True, resize = image_size)
      else:
        image, boxes = augment(image, boxes = boxes, random_crop = roi, pad_to_square = True, resize = image_size)
      batch_x.append(image)
      batch_y.append(boxes)
        
    return batch_x, batch_y


'''
--------------------------------------------------------------------------------

utility

--------------------------------------------------------------------------------
'''

def gpu_session(callback):
  config = tf.ConfigProto()
  config.gpu_options.allow_growth = True
  config.allow_soft_placement = True
  config.log_device_placement = True
  with tf.Session(config = config) as session:
    with tf.device('/gpu:0'):
      return callback(session)

def get_checkpoint(model_name, epoch):
  return model_name + '.ckpt-' + str(epoch)

def draw_box(img, box):
  x, y, w, h = abs_coords(box, img)

  cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 1)
  cv2.circle(img, (x, y), 2, (0, 0, 255), -1)
  cv2.circle(img, (x, y + h), 2, (0, 0, 255), -1)
  cv2.circle(img, (x + w, y), 2, (0, 0, 255), -1)
  cv2.circle(img, (x + w, y + h), 2, (0, 0, 255), -1)

# Train

## Neural Network

In [0]:
import math
import tensorflow as tf

from colabsnippets import NeuralNetwork
from colabsnippets.ops import normalize, conv2d, depthwise_separable_conv2d

default_anchors = [
  (1.603231, 2.094468),
  (6.041143, 7.080126),
  (2.882459, 3.518061),
  (4.266906, 5.178857),
  (9.041765, 10.66308)
]
default_mean_rgb = [117.001, 114.697, 97.404]

class TinyYolov2Mobilenet(NeuralNetwork):
  def __init__(self, name = 'tiny_yolov2_mobilenet', anchors = default_anchors, mean_rgb = default_mean_rgb):
    super().__init__(self.initialize_weights, name = name)
    self.anchors = anchors
    self.mean_rgb = mean_rgb

  def initialize_weights(self, weight_processor):
    with tf.variable_scope(self.name):
      weight_processor.process_conv_weights(3, 32, 'conv_in')
      weight_processor.process_depthwise_separable_conv2d_weights(32, 64, 'separable_conv0')
      weight_processor.process_depthwise_separable_conv2d_weights(64, 128, 'separable_conv1')
      weight_processor.process_depthwise_separable_conv2d_weights(128, 256, 'separable_conv2')
      weight_processor.process_depthwise_separable_conv2d_weights(256, 512, 'separable_conv3')
      weight_processor.process_depthwise_separable_conv2d_weights(512, 1024, 'separable_conv4')
      weight_processor.process_conv_weights(1024, len(self.anchors) * 5, 'conv_out', filter_size = 1)

  def depthwise_separable_conv2d(self, x, name):
    return depthwise_separable_conv2d(x, name, [1, 1, 1, 1])
    
  def leaky_relu(self, x):
    return tf.nn.leaky_relu(x, alpha = 0.1)
  
  def max_pool(self, x, is_scale_down = True):
    stride = [1, 2, 2, 1] if is_scale_down else [1, 1, 1, 1]
    return tf.nn.max_pool(x, [1, 2, 2, 1], stride, 'SAME')
    
  def forward(self, batch_tensor):
    out = normalize(batch_tensor, self.mean_rgb)

    with tf.variable_scope(self.name, reuse = True):
      out = self.max_pool(self.leaky_relu(conv2d(out, 'conv_in', [1, 1, 1, 1])))
      out = self.max_pool(self.leaky_relu(self.depthwise_separable_conv2d(out, 'separable_conv0')))
      out = self.max_pool(self.leaky_relu(self.depthwise_separable_conv2d(out, 'separable_conv1')))
      out = self.max_pool(self.leaky_relu(self.depthwise_separable_conv2d(out, 'separable_conv2')))
      out = self.max_pool(self.leaky_relu(self.depthwise_separable_conv2d(out, 'separable_conv3')))
      out = self.max_pool(self.leaky_relu(self.depthwise_separable_conv2d(out, 'separable_conv4')), is_scale_down = False)
      out = conv2d(out, 'conv_out', [1, 1, 1, 1])

    return out
  
  def predict(self, pred, min_score = 0.5):
    lambda sigmoid = x: 1 / (1 + math.exp(-x))
    
    centers, scales, scores = self.extract_centers_scales_and_scores(pred)
    scores = tf.sigmoid()
    batch_size, num_cells = scores.shape[0:1]
    
    out_boxes = []
    for batch_idx in range(0, batch_size):
      for col in range(0, scores.shape[1]):
        for row in range(0, scores.shape[2]):
          for anchor_idx in range(0, scores.shape[3]):
            score = sigmoid(scores[batch_idx, row, col, anchor_idx])
            if score >= min_score:
              aw, ah = self.anchors[anchor_idx]
              x, y = centers[batch_idx, row, col, anchor_idx]
              w, h = scales[batch_idx, row, col, anchor_idx]
              x = (col + sigmoid(x)) / num_cells
              y = (row + sigmoid(y)) / num_cells
              w = (math.exp(w) * aw) / num_cells
              h = (math.exp(h) * ah) / num_cells
              out_boxes.append(x, y, w, h)
              
    return out_boxes

## Loss Function

In [0]:
import math
import numpy as np

def calculate_iou(box0, box1):
  x0, y0, w0, h0 = box0
  x1, y1, w1, h1 = box1
  
  inter_ul_x, inter_ul_y = max(x0, x1), max(y0, y1)
  inter_br_x, inter_br_y = (min(x0 + w0, x1 + w1), min(y0 + h0, y1 + h1))
  inter_w, inter_h = inter_br_x - inter_ul_x, inter_br_y - inter_ul_y
  
  area0 = w0 * h0
  area1 = w1 * h1
  inter_area = inter_w * inter_h
  
  return inter_area / float(area0 + area1 - inter_area)

def get_box_grid_position(box, input_size, num_cells, anchors):
  cell_size = input_size / num_cells
  x, y, w, h = box
  ct_x = x + (w / 2)
  ct_y = y + (h / 2)
  col = math.floor(ct_x / num_cells)
  row = math.floor(ct_y / num_cells)

  highest_iou = 0
  highest_iou_anchor_idx = 0
  for anchor_idx, anchor in enumerate(anchors):
    anchor_box = (0, 0, cell_size, cell_size)
    abs_box = (0, 0, w * input_size, h * input_size)
    iou = calculate_iou(anchor_box, abs_box)
    if highest_iou < iou:
      highest_iou = iou
      highest_iou_anchor_idx = anchor_idx
  
  return col, row, highest_iou_anchor_idx
                                          
def create_gt_mask_and_coords(batch_gt_boxes, input_size, num_cells, anchors):
  cell_size = input_size / num_cells
  batch_size = len(batch_gt_boxes)
  mask = np.zeros([batch_size, num_cells, num_cells, len(anchors), 1])
  gt_coords = np.zeros([batch_size, num_cells, num_cells, len(anchors), 4])
  for batch_idx in range(0, batch_size):
    for gt_box in batch_gt_boxes[batch_idx]:
      col, row, anchor_idx = get_box_grid_position(gt_box, input_size, num_cells, anchors)
      mask[batch_idx, col, row, anchor_idx, :] = 1
      
      x, y, w, h = gt_box
      aw, ah = anchors[anchor_idx]
      gt_x = ((x * input_size) - (col * num_cells)) / cell_size
      gt_y = ((y * input_size) - (row * num_cells)) / cell_size
      gt_w = math.log((w * input_size) / (aw * cell_size))
      gt_h = math.log((h * input_size) / (ah * cell_size))
      gt_coords[batch_idx, col, row, anchor_idx, :] = [gt_x, gt_y, gt_w, gt_h]
      
  return mask, gt_coords

def extract_centers_scales_and_scores(pred):
  num_anchors = pred.shape.as_list()[3] / 5
  get_shape = lambda size: np.concatenate((pred.shape.as_list()[0:3], [num_anchors, size]), axis = None)
  grid_preds = tf.reshape(pred, get_shape(5))
  grid_pred_coords = tf.slice(grid_preds, [0, 0, 0, 0, 0], get_shape(4))
  grid_pred_scores = tf.slice(grid_preds, [0, 0, 0, 0, 4], get_shape(1))
  return grid_pred_coords, grid_pred_scores
                                        
def compile_loss_op(pred, gt_coords, mask, coord_scale = 1.0, object_scale = 5.0, no_object_scale = 1.0):
  grid_pred_coords, grid_pred_scores = extract_centers_scales_and_scores(pred)
  # TODO: ious
  ious = 1
  object_loss = object_scale * tf.reduce_sum(mask * (ious - tf.nn.sigmoid(grid_pred_scores))**2)
  coord_loss = coord_scale * tf.reduce_sum(mask * (grid_pred_coords - gt_coords)**2)
  no_object_loss = no_object_scale * tf.reduce_sum((1 - mask) * tf.nn.sigmoid(grid_pred_scores)**2)
  total_loss = object_loss + coord_loss + no_object_loss
  return total_loss, object_loss, coord_loss, no_object_loss
  

## Train

In [0]:
tf.reset_default_graph()

net = TinyYolov2Mobilenet()
model_name = net.name

# training parameters
learning_rate = 0.001
start_epoch = 0
batch_size = 1
image_size = 416
#image_augmentor = ImageAugmentor.load('./augmentor_4.json')
image_augmentor = None

train_data = load_json('./trainData.json')[0:20]
for data in train_data:
  try:
    boxes = extract_data_labels(data)
  except Exception as e:
    print (data, e)
  

data_loader = DataLoader(train_data, start_epoch = start_epoch, image_augmentor = image_augmentor)
net.init_trainable_weights()

X = tf.placeholder(tf.float32, [batch_size, image_size, image_size, 3])
pred = net.forward(X)
anchors = net.anchors
num_cells = pred.shape.as_list()[1]
num_anchors = len(anchors)
GT_COORDS = tf.placeholder(tf.float32, [batch_size, num_cells, num_cells, num_anchors, 4])
MASK = tf.placeholder(tf.float32, [batch_size, num_cells, num_cells, num_anchors, 1])
loss_op = compile_loss_op(pred, GT_COORDS, MASK)

saver = tf.train.Saver(max_to_keep = None)
  
print(len(train_data))
log_file = open('./log.txt', 'w')

def train(sess):
  total_loss = 0
  total_object_loss = 0
  total_coord_loss = 0
  total_no_object_loss = 0
  iteration_count = 0
  ts_epoch = time.time()
  
  sess.run(tf.global_variables_initializer())

  if (start_epoch != 0):
    checkpoint = get_checkpoint(start_epoch - 1)
    saver.restore(sess, checkpoint)
    print('done restoring session')

  while True:
    epoch = data_loader.epoch
    current_idx = data_loader.current_idx
    end_idx = data_loader.get_end_idx()

    ts = time.time()

    batch_x, batch_gt_boxes = data_loader.next_batch(batch_size, image_size)
    mask, gt_coords = create_gt_mask_and_coords(batch_gt_boxes, image_size, num_cells, anchors)
    out, losses = sess.run([pred, loss_op], feed_dict = { X: batch_x, GT_COORDS: gt_coords, MASK: mask })
    loss, object_loss, coord_loss, no_object_loss = losses
    
    total_loss += loss
    total_object_loss += object_loss
    total_coord_loss += coord_loss
    total_no_object_loss += no_object_loss
    iteration_count += 1
    
    log_file.write("epoch " + str(epoch) + ", (" + str(current_idx) + " of " + str(end_idx) + "), loss= " + "{:.4f}".format(loss) 
          + ", time= " + str((time.time() - ts) * 1000) + "ms \n")

    if epoch != data_loader.epoch:
      avg_loss = total_loss / iteration_count
      avg_object_loss = total_object_loss / iteration_count
      avg_coord_loss = total_coord_loss / iteration_count
      avg_no_object_loss = total_no_object_loss / iteration_count
      print('next epoch: ' + str(data_loader.epoch))
      print('avg_loss= ' + str(avg_loss))
      print('avg_object_loss= ' + str(avg_object_loss))
      print('avg_coord_loss= ' + str(avg_coord_loss))
      print('avg_no_object_loss= ' + str(avg_no_object_loss))
      if False:
        saver.save(sess, model_name + '.ckpt', global_step = epoch)

        epoch_txt_file_path = 'epoch_' + str(epoch) + '.txt'
        epoch_txt = open(epoch_txt_file_path, 'w')
        epoch_txt.write('total_loss= ' + str(total_loss) + '\n')
        epoch_txt.write('avg_loss= ' + str(avg_loss) + '\n')
        epoch_txt.write('learning_rate= ' + str(learning_rate) + '\n')
        epoch_txt.write('batch_size= ' + str(batch_size) + '\n')
        epoch_txt.write('epoch_time= ' + str(time.time() - ts_epoch) + 's \n')
        epoch_txt.close()

      total_loss = 0
      total_object_loss = 0
      total_coord_loss = 0
      total_no_object_loss = 0
      iteration_count = 0      
      ts_epoch = time.time()
        
  print('done!')
  log_file.close() 
    
gpu_session(train)

# Debug

## Check Inputs

In [0]:
!rm -rf ./check_inputs && mkdir ./check_inputs

from IPython.display import Image, display

num_inputs = 10
image_size = 400
num_images_per_row = 2
db = 'WIDER'

image_augmentor = ImageAugmentor.load('./augmentor_4.json')
train_data = load_json('./data/trainData.json')

db_data = []
for data in train_data:
  if db is None or data['db'] == db:
    db_data.append(data)
    
data_loader = DataLoader(db_data, start_epoch = 0, image_augmentor = image_augmentor)
batch_x, batch_y = data_loader.next_batch(num_inputs, image_size)

file_idx = 0
idx = 0
while idx < num_inputs:
  imgs = np.stack(batch_x[idx : idx + num_images_per_row], axis = 0)
  all_boxes = batch_y[idx : idx + num_images_per_row]
  for i, boxes in enumerate(all_boxes):
    for box in boxes:
      draw_box(imgs[i], box)
  
  merged_img = np.concatenate(imgs, axis = 1)
  
  file = './check_inputs/' + str(file_idx) + '.jpg'
  cv2.imwrite(file, merged_img)
  display(Image(file))
  
  file_idx += 1
  idx += num_images_per_row

!rm -rf ./check_inputs