In [2]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import keras
import keras.backend as K
from keras.callbacks import Callback
import tensorflow as tf
import random
import os
import time
import pickle

from tb_model import TB300
from ssd_utils import PriorUtil
from ssd_utils import load_weights
from ssd_data import InputGenerator
from ssd_data import preprocess
from ssd_training import SSDLoss, LearningRateDecay, Logger

%matplotlib inline
plt.rcParams['figure.figsize'] = [10]*2
plt.rcParams['image.interpolation'] = 'nearest'

np.set_printoptions(suppress=True, precision=5)

#config = tf.ConfigProto()
#config.gpu_options.per_process_gpu_memory_fraction = 0.9
#set_session(tf.Session(config=config))

### Data

#### Dataset SynthText

In [3]:
from data_synthtext import GTUtility
with open('gt_util_synthtext_horizontal10.pkl', 'rb') as f:
    gt_util = pickle.load(f)

gt_util_train, gt_util_val = gt_util.split(gt_util, split=0.8)

#### Dataset ICDAR

In [2]:
from data_icdar2013 import GTUtility
gt_util_train = GTUtility('data/ICDAR2013/')
gt_util_val = GTUtility('data/ICDAR2013/', test=True)

### Model

In [3]:
model = TB300()

prior_util = PriorUtil(model)

In [4]:
initial_epoch = 0

weights_path = '~/.keras/models/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
layer_list = [('block1_conv1', 'conv1_1'),
              ('block1_conv2', 'conv1_2'),
              ('block2_conv1', 'conv2_1'),
              ('block2_conv2', 'conv2_2'),
              ('block3_conv1', 'conv3_1'),
              ('block3_conv2', 'conv3_2'),
              ('block3_conv3', 'conv3_3'),
              ('block4_conv1', 'conv4_1'),
              ('block4_conv2', 'conv4_2'),
              ('block4_conv3', 'conv4_3'),
              ('block5_conv1', 'conv5_1'),
              ('block5_conv2', 'conv5_2'),
              ('block5_conv3', 'conv5_3')]
#load_weights(model, weights_path, layer_list)

weights_path = 'ssd300_voc_weights_fixed.hdf5'
weights_path = 'checkpoints/201710132146_tb300_synthtext_horizontal10/weights.004.h5'; initial_epoch = 5
weights_path = 'checkpoints/201710141431_tb300_synthtext_horizontal10/weights.019.h5'; initial_epoch = 20

load_weights(model, weights_path)

freeze = ['conv1_1', 'conv1_2',
          'conv2_1', 'conv2_2',
          'conv3_1', 'conv3_2', 'conv3_3',
          #'conv4_1', 'conv4_2', 'conv4_3',
          #'conv5_1', 'conv5_2', 'conv5_3',
         ]

### Training

In [5]:
# TextBoxes paper
# Momentum 0.9, weight decay 5e-4
# lerning rate initially set to 1e−3 and decayed to 1e−4 after 40k iterations
# SynthText for 50k iterations, finetune on ICDAR 2013 for 2k iterations

experiment_name = 'tb300_synthtext_horizontal10'
#experiment_name = 'tb300_icdar'

epochs = 100
batch_size = 32

gen = InputGenerator(gt_util_train, gt_util_val, prior_util, 
        batch_size, model.image_size, hflip_prob=0.0, vflip_prob=0.0,
        do_crop=False)

# freeze layers
for l in model.layers:
    l.trainable = not l.name in freeze

checkdir = './checkpoints/' + time.strftime('%Y%m%d%H%M') + '_' + experiment_name
if not os.path.exists(checkdir):
    os.makedirs(checkdir)

with open(checkdir+'/source.py','wb') as f:
    source = ''.join(['# In[%i]\n%s\n\n' % (i, In[i]) for i in range(len(In))])
    f.write(source.encode())

#optim = keras.optimizers.SGD(lr=1e-3, momentum=0.9, decay=0, nesterov=True)
#optim = keras.optimizers.Adam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
optim = keras.optimizers.Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

# weight decay
regularizer = keras.regularizers.l2(5e-4) # None if disabled
for l in model.layers:
    if l.__class__.__name__.startswith('Conv'):
        l.kernel_regularizer = regularizer

loss = SSDLoss(alpha=1.0, neg_pos_ratio=3.0)

model.compile(optimizer=optim,
              loss=loss.compute,
              metrics=loss.metrics)

callbacks = [keras.callbacks.ModelCheckpoint(checkdir+'/weights.{epoch:03d}.h5',
                                             verbose=1, save_weights_only=True),
             Logger(checkdir),
             # learning rate decay usesd with sgd
             #LearningRateDecay(methode='linear', base_lr=1e-3, n_desired=40000, desired=0.1, bias=0.0, minimum=0.1)
            ]

history = model.fit_generator(gen.generate(train=True, augmentation=True), #generator, 
                              gen.num_train_batches, #steps_per_epoch, 
                              epochs=epochs, 
                              verbose=1, 
                              callbacks=callbacks, 
                              validation_data=gen.generate(train=False, augmentation=False), 
                              validation_steps=gen.num_val_batches, 
                              class_weight=None, 
                              #max_queue_size=10, 
                              workers=1, 
                              #use_multiprocessing=False, 
                              initial_epoch=initial_epoch)


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 21/100
Epoch 22/100

KeyboardInterrupt: 

### Evaluation

In [None]:
from data_icdar2013 import GTUtility as IcdarGTUtility
gt_util_icdar = IcdarGTUtility('data/ICDAR2013/')

In [None]:
gt_util = gt_util_val
gt_util = gt_util_icdar

#np.random.seed(1337)
idxs = np.random.randint(0, gt_util.num_samples, 32)
#idxs = np.arange(gt_util.num_samples)
img_h, img_w = model.image_size

data = []
inputs = []
images = []
for i in idxs:
    img_path = os.path.join(gt_util.image_path, gt_util.image_names[i])
    img = cv2.imread(img_path)
    inputs.append(preprocess(img, model.image_size))
    img = cv2.resize(img, (img_w, img_h), cv2.INTER_LINEAR)
    img = img[:, :, (2,1,0)] # BGR to RGB
    img = img / 256.
    images.append(img)
    data.append(gt_util.data[i])
inputs = np.asarray(inputs)

preds = model.predict(inputs, batch_size=1, verbose=1)

In [None]:
for i in range(len(preds)):
    res = prior_util.decode(preds[i], confidence_threshold=0.6, keep_top_k=100)
    if len(data[i]) > 0:
        plt.figure()
        plt.imshow(images[i])
        prior_util.plot_results(res, classes=gt_util.classes, show_labels=True, gt_data=data[i])
        plt.show()