<a href="https://colab.research.google.com/github/justadudewhohacks/ipynbs/blob/master/face_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dependencies

In [0]:
!pip install -U -q PyDrive
!pip install git+https://github.com/justadudewhohacks/image_augment.py
!pip install git+https://github.com/justadudewhohacks/colabsnippets

# Download Data

In [0]:
from colabsnippets.DataDownloader import DataDownloader

data_downloader = DataDownloader(data_dir = './data')

data_downloader.download_data({
	"WIDER" : [
    { "images": "1JHmXqGPngDCbM56eYPeqsaCgJC4vgL4m", "boxes": "1aeAGd5LmL8EBB1yaZxKOp1NbZ1CBJBmm" }
	]
}, ['boxes'])

print('done!')

# Common

In [0]:
import cv2
import math
import json
import random
import time
import types
import os
import numpy as np
import tensorflow as tf
from augment import ImageAugmentor, augment
from augment.augment import abs_coords
from colabsnippets.utils import load_json
from colabsnippets import BatchLoader

'''
--------------------------------------------------------------------------------

Data Loader

--------------------------------------------------------------------------------
'''
  
def transform_boxes(boxes):
  out_boxes = []
  for box in boxes:
    out_box = (box['x'], box['y'], box['width'], box['height'])
    for val in out_box:
      if abs(val) > 1.0:
        raise Exception("box is probably not a valid relative box: {}".format(out_box))
    out_boxes.append(out_box)
  return out_boxes
  
def extract_data_labels(data):
  db = data['db']
  img_file = data['file']
  boxes_file = img_file.replace('.jpg', '.json')
  boxes_dir = "boxes-shard{}".format(data['shard']) if 'shard' in data else 'boxes'
  boxes_path = "./data/{}/{}/{}".format(db, boxes_dir, boxes_file)
  boxes = load_json(boxes_path)
  return transform_boxes(boxes)
    
def resolve_image_path(data):
  db = data['db']
  img_file = data['file']
  img_dir = "images-shard{}".format(data['shard']) if 'shard' in data else 'images'
  img_path = "./data/{}/{}/{}".format(db, img_dir, img_file)
  return img_path

def min_bbox(boxes):
  min_x, min_y, max_x, max_y = 1.0, 1.0, 0, 0
  for box in boxes:
    x, y, w, h = box
    pts = [(x, y), (x + w, y + h)]
    for x, y in pts:
      min_x = x if x < min_x else min_x
      min_y = y if y < min_y else min_y
      max_x = max_x if x < max_x else x
      max_y = max_y if y < max_y else y

  return [min_x, min_y, max_x, max_y]

class DataLoader(BatchLoader):
  def __init__(self, data, image_augmentor = None, start_epoch = None, is_test = False):  
    self.image_augmentor = image_augmentor
    BatchLoader.__init__(
      self, 
      data if type(data) is types.FunctionType else lambda: data, 
      resolve_image_path, 
      extract_data_labels,
      start_epoch = start_epoch, 
      is_test = is_test
    )
      
  def load_image_and_labels_batch(self, datas, image_size):
    batch_x, batch_y = [], []
    for data in datas:
      boxes = self.extract_data_labels(data)
      image = self.load_image(data)
      roi = min_bbox(boxes)
      if self.image_augmentor is not None:
        image, boxes = self.image_augmentor.augment(image, boxes = boxes, random_crop = roi, pad_to_square = True, resize = image_size)
      else:
        image, boxes = augment(image, boxes = boxes, random_crop = roi, pad_to_square = True, resize = image_size)
      batch_x.append(image)
      batch_y.append(boxes)
        
    return batch_x, batch_y


'''
--------------------------------------------------------------------------------

utility

--------------------------------------------------------------------------------
'''

def gpu_session(callback):
  config = tf.ConfigProto()
  config.gpu_options.allow_growth = True
  config.allow_soft_placement = True
  config.log_device_placement = True
  with tf.Session(config = config) as session:
    with tf.device('/gpu:0'):
      return callback(session)

def get_checkpoint(model_name, epoch):
  return model_name + '.ckpt-' + str(epoch)

def draw_box(img, box):
  x, y, w, h = abs_coords(box, img)

  cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 1)
  cv2.circle(img, (x, y), 2, (0, 0, 255), -1)
  cv2.circle(img, (x, y + h), 2, (0, 0, 255), -1)
  cv2.circle(img, (x + w, y), 2, (0, 0, 255), -1)
  cv2.circle(img, (x + w, y + h), 2, (0, 0, 255), -1)

# Train

## Neural Network

In [0]:
import tensorflow as tf

from colabsnippets import NeuralNetwork
from colabsnippets.ops import normalize, conv2d, depthwise_separable_conv2d

default_anchors = [
  (1.603231, 2.094468),
  (6.041143, 7.080126),
  (2.882459, 3.518061),
  (4.266906, 5.178857),
  (9.041765, 10.66308)
]
default_mean_rgb = [117.001, 114.697, 97.404]

class TinyYolov2Mobilenet(NeuralNetwork):
  def __init__(self, name = 'tiny_yolov2_mobilenet', anchors = default_anchors, mean_rgb = default_mean_rgb):
    super().__init__(self.initialize_weights, name = name)
    self.anchors = anchors
    self.mean_rgb = mean_rgb
    
    self.box_encoding_size = 5
    num_downscale_ops = 5
    self.cell_size = 2 ** num_downscale_ops

  def initialize_weights(self, weight_processor):
    with tf.variable_scope(self.name):
      weight_processor.process_conv_weights(3, 32, 'conv_in')
      weight_processor.process_depthwise_separable_conv2d_weights(32, 64, 'separable_conv0')
      weight_processor.process_depthwise_separable_conv2d_weights(64, 128, 'separable_conv1')
      weight_processor.process_depthwise_separable_conv2d_weights(128, 256, 'separable_conv2')
      weight_processor.process_depthwise_separable_conv2d_weights(256, 512, 'separable_conv3')
      weight_processor.process_depthwise_separable_conv2d_weights(512, 1024, 'separable_conv4')
      weight_processor.process_conv_weights(1024, len(self.anchors) * self.box_encoding_size, 'conv_out', filter_size = 1)

  def depthwise_separable_conv2d(self, x, name):
    return depthwise_separable_conv2d(x, name, [1, 1, 1, 1])
    
  def leaky_relu(self, x):
    return tf.nn.leaky_relu(x, alpha = 0.1)
  
  def max_pool(self, x, is_scale_down = True):
    stride = [1, 2, 2, 1] if is_scale_down else [1, 1, 1, 1]
    return tf.nn.max_pool(x, [1, 2, 2, 1], stride, 'SAME')
    
  def forward(self, batch_tensor):
    out = normalize(batch_tensor, self.mean_rgb)

    with tf.variable_scope(self.name, reuse = True):
      out = self.max_pool(self.leaky_relu(conv2d(out, 'conv_in', [1, 1, 1, 1])))
      out = self.max_pool(self.leaky_relu(self.depthwise_separable_conv2d(out, 'separable_conv0')))
      out = self.max_pool(self.leaky_relu(self.depthwise_separable_conv2d(out, 'separable_conv1')))
      out = self.max_pool(self.leaky_relu(self.depthwise_separable_conv2d(out, 'separable_conv2')))
      out = self.max_pool(self.leaky_relu(self.depthwise_separable_conv2d(out, 'separable_conv3')))
      out = self.max_pool(self.leaky_relu(self.depthwise_separable_conv2d(out, 'separable_conv4')), is_scale_down = False)
      out = conv2d(out, 'conv_out', [1, 1, 1, 1])

    return out

## Loss Function

In [63]:
import math
import numpy as np

def calculate_iou(box0, box1):
  x0, y0, w0, h0 = box0
  x1, y1, w1, h1 = box1
  
  inter_ul_x, inter_ul_y = max(x0, x1), max(y0, y1)
  inter_br_x, inter_br_y = (min(x0 + w0, x1 + w1), min(y0 + h0, y1 + h1))
  inter_w, inter_h = inter_br_x - inter_ul_x, inter_br_y - inter_ul_y
  
  area0 = w0 * h0
  area1 = w1 * h1
  inter_area = inter_w * inter_h
  
  return inter_area / float(area0 + area1 - inter_area)

def create_ground_truth_mask(cell_size, box_encoding_size, anchors, input_size, batch_size, batch_gt_boxes):
  num_cells = input_size / cell_size
  mask = np.zeros([batch_size, num_cells, num_cells, len(anchors)])
  
  for batch_idx, gt_boxes in enumerate(batch_gt_boxes):
    for gt_box in gt_boxes:
      x, y, w, h = gt_box
      ct_x = x + (w / 2)
      ct_y = y + (h / 2)
      col = math.floor(ct_x / num_cells)
      row = math.floor(ct_y / num_cells)

      highest_iou = 0
      highest_iou_anchor_idx = 0
      for anchor_idx, anchor in enumerate(anchors):
        anchor_box = (0, 0, cell_size, cell_size)
        abs_box = (0, 0, w * input_size, h * input_size)
        iou = calculate_iou(anchor_box, abs_box)
        if highest_iou < iou:
          highest_iou = iou
          highest_iou_anchor_idx = anchor_idx

      mask[batch_idx, row, col, anchor_idx] = 1

0.7957712638154734
1.0
0.5
0.25
0.1111111111111111


## Train

In [43]:
tf.reset_default_graph()

net = TinyYolov2Mobilenet()
model_name = net.name

# training parameters
learning_rate = 0.001
start_epoch = 0
batch_size = 1
image_size = 416

image_augmentor = ImageAugmentor.load('./augmentor_4.json')
train_data = load_json('./data/trainData.json')
data_loader = DataLoader(train_data, start_epoch = start_epoch, image_augmentor = image_augmentor)
net.init_trainable_weights()

X = tf.placeholder(tf.float32, [batch_size, image_size, image_size, 3])
out_op = net.forward(X)

saver = tf.train.Saver(max_to_keep = None)
  
print(len(train_data))
log_file = open('./log.txt', 'w')

def train(sess):
  total_loss = 0
  iteration_count = 0
  ts_epoch = time.time()
  
  sess.run(tf.global_variables_initializer())

  if (start_epoch != 0):
    checkpoint = get_checkpoint(start_epoch - 1)
    saver.restore(sess, checkpoint)
    print('done restoring session')

  while True:
    epoch = data_loader.epoch
    current_idx = data_loader.current_idx
    end_idx = data_loader.get_end_idx()

    ts = time.time()

    batch_x, batch_boxes = data_loader.next_batch(batch_size, image_size)
    out, = sess.run([out_op], feed_dict = { X: batch_x })
    print(out.shape)

    loss = 0
    total_loss += loss
    iteration_count += 1
    
    log_file.write("epoch " + str(epoch) + ", (" + str(current_idx) + " of " + str(end_idx) + "), loss= " + "{:.4f}".format(loss) 
          + ", time= " + str((time.time() - ts) * 1000) + "ms \n")

    if epoch != data_loader.epoch:
      avg_loss = total_loss / iteration_count
      print('next epoch: ' + str(data_loader.epoch))
      print('avg_loss= ' + str(avg_loss))
      saver.save(sess, model_name + '.ckpt', global_step = epoch)

      epoch_txt_file_path = 'epoch_' + str(epoch) + '.txt'
      epoch_txt = open(epoch_txt_file_path, 'w')
      epoch_txt.write('total_loss= ' + str(total_loss) + '\n')
      epoch_txt.write('avg_loss= ' + str(avg_loss) + '\n')
      epoch_txt.write('learning_rate= ' + str(learning_rate) + '\n')
      epoch_txt.write('batch_size= ' + str(batch_size) + '\n')
      epoch_txt.write('epoch_time= ' + str(time.time() - ts_epoch) + 's \n')
      epoch_txt.close()

      total_loss = 0
      iteration_count = 0        
      ts_epoch = time.time()
        
  print('done!')
  log_file.close() 
    
gpu_session(train)

12880
(1, 13, 13, 25)


NameError: ignored

# Debug

## Check Inputs

In [0]:
!rm -rf ./check_inputs && mkdir ./check_inputs

from IPython.display import Image, display

num_inputs = 10
image_size = 400
num_images_per_row = 2
db = 'WIDER'

image_augmentor = ImageAugmentor.load('./augmentor_4.json')
train_data = load_json('./data/trainData.json')

db_data = []
for data in train_data:
  if db is None or data['db'] == db:
    db_data.append(data)
    
data_loader = DataLoader(db_data, start_epoch = 0, image_augmentor = image_augmentor)
batch_x, batch_y = data_loader.next_batch(num_inputs, image_size)

file_idx = 0
idx = 0
while idx < num_inputs:
  imgs = np.stack(batch_x[idx : idx + num_images_per_row], axis = 0)
  all_boxes = batch_y[idx : idx + num_images_per_row]
  for i, boxes in enumerate(all_boxes):
    for box in boxes:
      draw_box(imgs[i], box)
  
  merged_img = np.concatenate(imgs, axis = 1)
  
  file = './check_inputs/' + str(file_idx) + '.jpg'
  cv2.imwrite(file, merged_img)
  display(Image(file))
  
  file_idx += 1
  idx += num_images_per_row

!rm -rf ./check_inputs