In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import os, time, pickle
from tqdm.notebook import tqdm

from tb_model import TB300
from ssd_utils import PriorUtil
from ssd_data import InputGenerator
from ssd_training import SSDLoss

from utils.model import load_weights
from utils.training import MetricUtility

### Data

#### Dataset SynthText

In [None]:
from data_synthtext import GTUtility
with open('gt_util_synthtext_horizontal10.pkl', 'rb') as f:
    gt_util = pickle.load(f)

gt_util_train, gt_util_val = gt_util.split(0.9)

#### Dataset ICDAR

In [None]:
from data_icdar2015fst import GTUtility
gt_util_train = GTUtility('data/ICDAR2015_FST/')
gt_util_val = GTUtility('data/ICDAR2015_FST/', test=True)

### Model

In [None]:
model = TB300()

prior_util = PriorUtil(model)

In [None]:
initial_epoch = 0

#!wget -O ./models/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5 https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
weights_path = './models/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
layer_list = [('block1_conv1', 'conv1_1'),
              ('block1_conv2', 'conv1_2'),
              ('block2_conv1', 'conv2_1'),
              ('block2_conv2', 'conv2_2'),
              ('block3_conv1', 'conv3_1'),
              ('block3_conv2', 'conv3_2'),
              ('block3_conv3', 'conv3_3'),
              ('block4_conv1', 'conv4_1'),
              ('block4_conv2', 'conv4_2'),
              ('block4_conv3', 'conv4_3'),
              ('block5_conv1', 'conv5_1'),
              ('block5_conv2', 'conv5_2'),
              ('block5_conv3', 'conv5_3')]
#load_weights(model, weights_path, layer_list)

weights_path = './models/ssd300_voc_weights_fixed.hdf5'
#weights_path = './checkpoints/201710132146_tb300_synthtext_horizontal10/weights.004.h5'; initial_epoch = 5
#weights_path = './checkpoints/201710141431_tb300_synthtext_horizontal10/weights.019.h5'; initial_epoch = 20
load_weights(model, weights_path)

freeze = ['conv1_1', 'conv1_2',
          'conv2_1', 'conv2_2',
          'conv3_1', 'conv3_2', 'conv3_3',
          #'conv4_1', 'conv4_2', 'conv4_3',
          #'conv5_1', 'conv5_2', 'conv5_3',
         ]

### Training

In [None]:
# TextBoxes paper
# Momentum 0.9, weight decay 5e-4
# lerning rate initially set to 1e−3 and decayed to 1e−4 after 40k iterations
# SynthText for 50k iterations, finetune on ICDAR 2013 (ICDAR 2015 FST) for 2k iterations

experiment = 'tb300_synthtext_horizontal10'
#experiment = 'tb300_icdar'

epochs = 100
batch_size = 32

#optimizer = tf.optimizers.SGD(learning_rate=1e-3, momentum=0.9, decay=0, nesterov=True)
optimizer = tf.optimizers.Adam(learning_rate=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

loss = SSDLoss(alpha=1.0, neg_pos_ratio=3.0)
#loss = SSDFocalLoss()

#regularizer = None
regularizer = keras.regularizers.l2(5e-4) # None if disabled

gen_train = InputGenerator(gt_util_train, prior_util, batch_size, model.image_size, augmentation=True, 
                           hflip_prob=0.0, vflip_prob=0.0, do_crop=False)
gen_val = InputGenerator(gt_util_val, prior_util, batch_size, model.image_size, augmentation=True, 
                         hflip_prob=0.0, vflip_prob=0.0, do_crop=False)


dataset_train, dataset_val = gen_train.get_dataset(), gen_val.get_dataset()
iterator_train, iterator_val = iter(dataset_train), iter(dataset_val)

checkdir = './checkpoints/' + time.strftime('%Y%m%d%H%M') + '_' + experiment

if not os.path.exists(checkdir):
    os.makedirs(checkdir)

with open(checkdir+'/source.py','wb') as f:
    source = ''.join(['# In[%i]\n%s\n\n' % (i, In[i]) for i in range(len(In))])
    f.write(source.encode())

print(checkdir)

for l in model.layers:
    l.trainable = not l.name in freeze
    if regularizer and l.__class__.__name__.startswith('Conv'):
        model.add_loss(lambda l=l: regularizer(l.kernel))

metric_util = MetricUtility(loss.metric_names, logdir=checkdir)

@tf.function
def step(x, y_true, training=False):
    if training:
        with tf.GradientTape() as tape:
            y_pred = model(x, training=True)
            metric_values = loss.compute(y_true, y_pred)
            total_loss = metric_values['loss']
            if len(model.losses):
                total_loss += tf.add_n(model.losses)
        gradients = tape.gradient(total_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    else:
        y_pred = model(x, training=True)
        metric_values = loss.compute(y_true, y_pred)
    return metric_values

for k in tqdm(range(initial_epoch, epochs), 'total', leave=False):
    print('\nepoch %i/%i' % (k+1, epochs))
    metric_util.on_epoch_begin()

    for i in tqdm(range(gen_train.num_batches), 'training', leave=False):
        x, y_true = next(iterator_train)
        metric_values = step(x, y_true, training=True)
        metric_util.update(metric_values, training=True)
    
    model.save_weights(checkdir+'/weights.%03i.h5' % (k+1,))

    for i in tqdm(range(gen_val.num_batches), 'validation', leave=False):
        x, y_true = next(iterator_val)
        metric_values = step(x, y_true, training=False)
        metric_util.update(metric_values, training=False)

    metric_util.on_epoch_end(verbose=1)

### Prediction

In [None]:
_, inputs, images, data = gt_util_val.sample_random_batch(batch_size=16, input_size=model.image_size)

preds = model.predict(inputs, batch_size=1, verbose=1)

for i in range(3):
    res = prior_util.decode(preds[i], confidence_threshold=0.6, keep_top_k=100)
    if len(data[i]) > 0:
        plt.figure(figsize=[10]*2)
        plt.imshow(images[i])
        prior_util.plot_results(res, classes=gt_util_val.classes, show_labels=True, gt_data=data[i])
        plt.show()