In [1]:
!git clone https://github.com/sa-y-an/sfdt.git

Cloning into 'sfdt'...
remote: Enumerating objects: 18, done.[K
remote: Counting objects: 100% (18/18), done.[K
remote: Compressing objects: 100% (17/17), done.[K
remote: Total 18 (delta 1), reused 18 (delta 1), pack-reused 0[K
Unpacking objects: 100% (18/18), done.


In [2]:
!ls

sample_data  sfdt


In [3]:
cd sfdt

/content/sfdt


In [4]:
!ls

dataset				mn_model.py
face_generator.py		README.md
keras_layer_AnchorBoxes.py	ssd_box_encode_decode_utils.py
keras_layer_L2Normalization.py	wider_extract.py
keras_ssd_loss.py


In [5]:
cd dataset/

/content/sfdt/dataset


# Downloading Dataset

Test data

In [6]:
!gdown --id 0B6eKvaijfFUDQUUwd21EckhUbWs

Downloading...
From: https://drive.google.com/uc?id=0B6eKvaijfFUDQUUwd21EckhUbWs
To: /content/sfdt/dataset/WIDER_train.zip
1.47GB [00:16, 88.5MB/s]


In [7]:
!unzip -q WIDER_train.zip

Validation data

In [9]:
!gdown --id 0B6eKvaijfFUDd3dIRmpvSk8tLUk

Downloading...
From: https://drive.google.com/uc?id=0B6eKvaijfFUDd3dIRmpvSk8tLUk
To: /content/sfdt/dataset/WIDER_val.zip
363MB [00:02, 174MB/s]


In [10]:
!unzip -q WIDER_val.zip

In [11]:
cd ..

/content/sfdt


# Main Code Starts Here

In [12]:
import warnings
warnings.filterwarnings("ignore")

from tensorflow.keras.optimizers import Adam, SGD, Nadam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TensorBoard, LearningRateScheduler
from tensorflow.keras.callbacks import Callback
from tensorflow.keras import backend as K 
from tensorflow.keras.models import load_model
from math import ceil 
import numpy as np 
from termcolor import colored

from mn_model import mn_model
from face_generator import BatchGenerator
from keras_ssd_loss import SSDLoss
from ssd_box_encode_decode_utils import SSDBoxEncoder, decode_y, decode_y2

import scipy.misc as sm
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # choose gpu

In [13]:
img_height = 512
img_width = 512
img_channels = 3

n_classes = 2 
class_names = ["background","face"]

scales = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # anchorboxes for coco dataset
aspect_ratios = [[0.5, 1.0, 2.0],
                 [1.0/3.0, 0.5, 1.0, 2.0, 3.0],
                 [1.0/3.0, 0.5, 1.0, 2.0, 3.0],
                 [1.0/3.0, 0.5, 1.0, 2.0, 3.0],
                 [0.5, 1.0, 2.0],
                 [0.5, 1.0, 2.0]] # The anchor box aspect ratios used in the original SSD300
two_boxes_for_ar1 = True
limit_boxes = False # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries
variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are scaled as in the original implementation
coords = 'centroids' # Whether the box coordinates to be used as targets for the model should be in the 'centroids' or 'minmax' format, see documentation
normalize_coords = True

data_path = 'dataset/'
det_model_path = "./models/" 
train_data = data_path + 'wider_train_small.npy'
test_data = data_path + 'wider_val_small.npy'


In [14]:
# downloading the base model (Used Mobile Net V1 for Faster Training)
!gdown --id 1Wor0430SL7-_NRxGHTVBZJ21JlGdJoOq

Downloading...
From: https://drive.google.com/uc?id=1Wor0430SL7-_NRxGHTVBZJ21JlGdJoOq
To: /content/sfdt/mobilenet_1_0_224_tf.h5
17.2MB [00:00, 53.5MB/s]


In [18]:
!ls

dataset				mobilenet_1_0_224_tf.h5
face_generator.py		__pycache__
keras_layer_AnchorBoxes.py	README.md
keras_layer_L2Normalization.py	ssd_box_encode_decode_utils.py
keras_ssd_loss.py		wider_extract.py
mn_model.py


In [19]:
mkdir models

In [20]:
!ls

dataset				mobilenet_1_0_224_tf.h5
face_generator.py		models
keras_layer_AnchorBoxes.py	__pycache__
keras_layer_L2Normalization.py	README.md
keras_ssd_loss.py		ssd_box_encode_decode_utils.py
mn_model.py			wider_extract.py


In [21]:
# build the keras model
# this model is not retrained, we are doing it from scratch 

model, model_layer, img_input, predictor_sizes = mn_model(image_size=(img_height, img_width, img_channels), 
                                                                      n_classes = n_classes,
                                                                      min_scale = None, 
                                                                      max_scale = None, 
                                                                      scales = scales, 
                                                                      aspect_ratios_global = None, 
                                                                      aspect_ratios_per_layer = aspect_ratios, 
                                                                      two_boxes_for_ar1= two_boxes_for_ar1, 
                                                                      limit_boxes=limit_boxes, 
                                                                      variances= variances, 
                                                                      coords=coords, 
                                                                      normalize_coords=normalize_coords)

#model.summary()

print ("Freezing classification layers")
#Freeze layers
for layer_key in model_layer:
  if('detection'  not in layer_key): #prefix detection to freeze layers which does not have detection
    model_layer[layer_key].trainable = False
print (colored("classification layers freezed", 'green'))

# for layer in model.layers:
#   print (colored(layer.name, 'blue'))
#   print (colored(layer.trainable, 'green'))

print ("loading classification weights")
classification_model = 'mobilenet_1_0_224_tf.h5'
model.load_weights(classification_model,  by_name= True)

print (colored( ('classification weights %s loaded' % classification_model), 'green'))

Height, Width, Channels : 512 512 3
The following Variables were used a Lambda layer's call (tf.math.multiply_1), but
are not present in its tracked objects:
  <tf.Variable 'detection_conv4_3_norm/detection_conv4_3_norm_gamma:0' shape=(512,) dtype=float32>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
Freezing classification layers
[32mclassification layers freezed[0m
loading classification weights
[32mclassification weights mobilenet_1_0_224_tf.h5 loaded[0m


In [33]:
# setting up taining 

batch_size = 64
num_epochs = 100

#Adam
base_lr = 0.002
adam = Adam(lr=base_lr, beta_1=0.9, beta_2=0.999, epsilon=1e-6, decay = 0.0)
ssd_loss = SSDLoss(neg_pos_ratio=2, n_neg_min=0, alpha=1.0, beta = 1.0)
model.compile(optimizer=adam, loss=ssd_loss.compute_loss)


ssd_box_encoder = SSDBoxEncoder(img_height=img_height,
                                img_width=img_width,
                                n_classes=n_classes, 
                                predictor_sizes=predictor_sizes,
                                min_scale=None,
                                max_scale=None,
                                scales=scales,
                                aspect_ratios_global=None,
                                aspect_ratios_per_layer=aspect_ratios,
                                two_boxes_for_ar1=two_boxes_for_ar1,
                                limit_boxes=limit_boxes,
                                variances=variances,
                                pos_iou_threshold=0.5,
                                neg_iou_threshold=0.2,
                                coords=coords,
                                normalize_coords=normalize_coords)

train_dataset = BatchGenerator(images_path=train_data, 
                               include_classes='all', 
                               box_output_format = ['class_id', 'xmin', 'xmax', 'ymin', 'ymax'])

print ("TRAINING DATA")

train_dataset.parse_xml(
                  annotations_path=train_data,
                  image_set_path=data_path,
                  image_set='None',
                  classes = class_names, 
                  exclude_truncated=False,
                  exclude_difficult=False,
                  ret=False, 
                  debug = False)

train_generator = train_dataset.generate(
                 batch_size=batch_size,
                 train=True,
                 ssd_box_encoder=ssd_box_encoder,
                 equalize=True,
                 brightness=(0.5,2,0.5),
                 flip=0.5,
                 translate=((0, 20), (0, 30), 0.5),
                 scale=(0.75, 1.2, 0.5),
                 crop=False,
                 #random_crop = (img_height,img_width,1,3), 
                 random_crop=False,
                 resize=(img_height, img_width),
                 #resize=False,
                 gray=False,
                 limit_boxes=True,
                 include_thresh=0.4,
                 diagnostics=False)

n_train_samples = train_dataset.get_n_samples()

print ("Total number of training samples = {}".format(n_train_samples))


print ("VALIDATION DATA")

val_dataset = BatchGenerator(images_path=test_data, include_classes='all', 
                box_output_format = ['class_id', 'xmin', 'xmax', 'ymin', 'ymax'])


val_dataset.parse_xml(
                  annotations_path=test_data,
                  image_set_path=data_path,
                  image_set='None',
                  classes = class_names, 
                  exclude_truncated=False,
                  exclude_difficult=False,
                  ret=False, 
                  debug = False)


val_generator = val_dataset.generate(
                 batch_size=batch_size,
                 train=True,
                 ssd_box_encoder=ssd_box_encoder,
                 equalize=False,
                 brightness=False,
                 flip=False,
                 translate=False,
                 scale=False,
                 crop=False,
                 #random_crop = (img_height,img_width,1,3), 
                 random_crop=False, 
                 resize=(img_height, img_width), 
                 #resize=False, 
                 gray=False,
                 limit_boxes=True,
                 include_thresh=0.4,
                 diagnostics=False)

n_val_samples = val_dataset.get_n_samples()

print ("Total number of validation samples = {}".format(n_val_samples))

TRAINING DATA
Total number of training samples = 128
VALIDATION DATA
Total number of validation samples = 60


In [34]:
# now start the training 

def scheduler(epoch):
  if epoch%10==0 and epoch!=0:
    lr = K.get_value(model.optimizer.lr)
    K.set_value(model.optimizer.lr, lr*.95)
    print("lr changed to {}".format(lr*.95))
  else: 
    print("lr remains {}".format(K.get_value(model.optimizer.lr)))

  return K.get_value(model.optimizer.lr)

lr_schedule = LearningRateScheduler(scheduler)

plateau = ReduceLROnPlateau(monitor='val_loss', factor = 0.3, patience =4, epsilon=0.001, cooldown=0)
tensorboard = TensorBoard(log_dir='./logs/trial1/', histogram_freq=1, batch_size=16, write_graph=True, write_grads=True, 
                          write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=100)
model_checkpoint =  ModelCheckpoint(det_model_path + 'ssd_mobilenet_face_epoch_{epoch:02d}_loss{val_loss:.4f}.h5',
                                                           monitor='val_loss',
                                                           verbose=1,
                                                           save_best_only=True,
                                                           save_weights_only=True,
                                                           mode='auto',
                                                           period=1)


history = model.fit_generator(generator = train_generator,
                              steps_per_epoch = ceil(n_train_samples/batch_size)*2,
                              epochs = num_epochs,
                              callbacks = [model_checkpoint, lr_schedule, early_stopping],                      
                              validation_data = val_generator,
                              validation_steps = ceil(n_val_samples/batch_size))

model.save_weights(det_model_path + 'ssd_mobilenet_weights_epoch_{}.h5'.format(num_epochs))

print ("model and weight files saved at : " + det_model_path)

Epoch 1/100
lr remains 0.0020000000949949026

Epoch 00001: val_loss improved from inf to 0.12927, saving model to ./models/ssd_mobilenet_face_epoch_01_loss0.1293.h5
Epoch 2/100
lr remains 0.0020000000949949026

Epoch 00002: val_loss improved from 0.12927 to 0.12902, saving model to ./models/ssd_mobilenet_face_epoch_02_loss0.1290.h5
Epoch 3/100
lr remains 0.0020000000949949026

Epoch 00003: val_loss improved from 0.12902 to 0.12855, saving model to ./models/ssd_mobilenet_face_epoch_03_loss0.1285.h5
Epoch 4/100
lr remains 0.0020000000949949026

Epoch 00004: val_loss improved from 0.12855 to 0.12786, saving model to ./models/ssd_mobilenet_face_epoch_04_loss0.1279.h5
Epoch 5/100
lr remains 0.0020000000949949026

Epoch 00005: val_loss improved from 0.12786 to 0.12711, saving model to ./models/ssd_mobilenet_face_epoch_05_loss0.1271.h5
Epoch 6/100
lr remains 0.0020000000949949026

Epoch 00006: val_loss improved from 0.12711 to 0.12645, saving model to ./models/ssd_mobilenet_face_epoch_06_loss

In [35]:

model_path = './models/'
model_name = 'ssd_mobilenet_face_epoch_25_loss0.0916.h5'

model.load_weights('./models/ssd_mobilenet_weights_epoch_100.h5',  by_name= True)

print (colored('weights %s loaded' % (model_path + model_name), 'green'))

def save_bb(path, filename, results, prediction=True):
  
  # print filename

  img = image.load_img(filename, target_size=(img_height, img_width))
  img = image.img_to_array(img)

  filename = filename.split("/")[-1]

  if(not prediction):
    filename = filename[:-4] + "_gt" + ".jpg"

  #fig,currentAxis = plt.subplots(1)
  currentAxis = plt.gca()

 # Get detections with confidence higher than 0.6.
  colors = plt.cm.hsv(np.linspace(0, 1, 25)).tolist()
  color_code = min(len(results), 16)
  print (colored("total number of bbs: %d" % len(results), "yellow"))
  for result in results:
    # Parse the outputs.

    if(prediction):
      det_label = result[0]
      det_conf = result[1]
      det_xmin = result[2]
      det_xmax = result[3]
      det_ymin = result[4]
      det_ymax = result[5]
    else :
      det_label = result[0]
      det_xmin = result[1]
      det_xmax = result[2]
      det_ymin = result[3]
      det_ymax = result[4]

    xmin = int(det_xmin)
    ymin = int(det_ymin)
    xmax = int(det_xmax)
    ymax = int(det_ymax)

    if(prediction):
      score = det_conf
    
    plt.imshow(img / 255.)
    
    label = int(int(det_label))
    label_name = class_names[label]
    # print label_name 
    # print label

    if(prediction):
      display_txt = '{:0.2f}'.format(score)
    else:
      display_txt = '{}'.format(label_name)

      
    # print (xmin, ymin, ymin, ymax)
    coords = (xmin, ymin), (xmax-xmin), (ymax-ymin)
    color_code = color_code-1 
    color = colors[color_code]
    currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
    currentAxis.text(xmin, ymin, display_txt, bbox={'facecolor':color, 'alpha':0.2})

  # y
  currentAxis.axes.get_yaxis().set_visible(False)
  # x 
  currentAxis.axes.get_xaxis().set_visible(False)
  plt.savefig(path + filename, bbox_inches='tight')

  print ('saved' , path + filename)

  plt.clf()


[32mweights ./models/ssd_mobilenet_face_epoch_25_loss0.0916.h5 loaded[0m


In [36]:
!rm -r output_test

In [37]:
mkdir output_test

In [38]:
from keras.preprocessing import image
from matplotlib import pyplot as plt

test_size = 10
test_generator = val_dataset.generate(
                 batch_size=test_size,
                 train=False,
                 ssd_box_encoder=ssd_box_encoder,
                 equalize=False,
                 brightness=False,
                 flip=False,
                 translate=False,
                 scale=False,
                 crop=False,
                 #random_crop = (img_height,img_width,1,3), 
                 random_crop=False, 
                 resize=(img_height, img_width), 
                 #resize=False,
                 gray=False,
                 limit_boxes=True,
                 include_thresh=0.4,
                 diagnostics=False)

print (colored("done.", "green"))

print (colored("now predicting...", "yellow"))

_CONF = 0.60 
_IOU = 0.15

for i in range(test_size):
  X, y, filenames = next(test_generator)

  y_pred = model.predict(X)


  y_pred_decoded = decode_y2(y_pred,
                             confidence_thresh=_CONF,
                            iou_threshold=_IOU,
                            top_k='all',
                            input_coords=coords,
                            normalize_coords=normalize_coords,
                            img_height=img_height,
                            img_width=img_width)


  np.set_printoptions(suppress=True)

  save_bb("./output_test/", filenames[i], y_pred_decoded[i])
  save_bb("./output_test/", filenames[i], y[i], prediction=False)

[32mdone.[0m
[33mnow predicting...[0m
[33mtotal number of bbs: 2[0m
saved ./output_test/34_Baseball_Baseball_34_608.jpg
[33mtotal number of bbs: 1[0m
saved ./output_test/34_Baseball_Baseball_34_608_gt.jpg
[33mtotal number of bbs: 0[0m
saved ./output_test/33_Running_Running_33_475.jpg
[33mtotal number of bbs: 1[0m
saved ./output_test/33_Running_Running_33_475_gt.jpg
[33mtotal number of bbs: 0[0m
saved ./output_test/35_Basketball_playingbasketball_35_417.jpg
[33mtotal number of bbs: 16[0m
saved ./output_test/35_Basketball_playingbasketball_35_417_gt.jpg
[33mtotal number of bbs: 1[0m
saved ./output_test/49_Greeting_peoplegreeting_49_589.jpg
[33mtotal number of bbs: 1[0m
saved ./output_test/49_Greeting_peoplegreeting_49_589_gt.jpg
[33mtotal number of bbs: 3[0m
saved ./output_test/2_Demonstration_Demonstration_Or_Protest_2_306.jpg
[33mtotal number of bbs: 10[0m
saved ./output_test/2_Demonstration_Demonstration_Or_Protest_2_306_gt.jpg
[33mtotal number of bbs: 1[0m
s