In [None]:
import os
import math
import random
import time
import numpy as np
import tensorflow as tf
import cv2
from tqdm import tqdm
slim = tf.contrib.slim

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [None]:
import sys
sys.path.append('../')

In [None]:
from nets import s3d as net
from nets import ssd_common, np_methods
from preprocessing import s3d_preprocessing, ssd_vgg_preprocessing
from notebooks import visualization

In [None]:
# TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
gpu_options = tf.GPUOptions(allow_growth=True)
config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
isess = tf.InteractiveSession(config=config)

## SSD 300 Model

The SSD 300 network takes 300x300 image inputs. In order to feed any image, the latter is resize to this input shape (i.e.`Resize.WARP_RESIZE`). Note that even though it may change the ratio width / height, the SSD model performs well on resized images (and it is the default behaviour in the original Caffe implementation).

SSD anchors correspond to the default bounding boxes encoded in the network. The SSD net output provides offset on the coordinates and dimensions of these anchors.

In [None]:
##### Input placeholder.
net_shape = (512, 512)
data_format = 'NHWC'
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img = s3d_preprocessing.preprocess_for_eval(
    img_input, None, None, net_shape, data_format)
image_4d = tf.expand_dims(image_pre, 0)

# Define the SSD model.
reuse = True if 'ssd_net' in locals() else None
ssd_net = net.SSDNet()
# SSD default anchor boxes.
ssd_anchors = ssd_net.anchors(net_shape)
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
    predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)
# model_path = '/workspace/OCR/models/train_models_s3d_3/checkpoint'
# with open(model_path, 'r') as f:
#     ckpts = f.readlines()
# ckpt_filename = ckpts[-1].split(':')[1][2:-2]
ckpt_filename = '/workspace/OCR/models/s3d/model.ckpt-30324'
# Restore SSD model.
# ckpt_filename = '../My_Model/model.ckpt-52918'
# ckpt_filename = '/workspace/OCR/models/train_models_1class/model.ckpt-207004'
# ckpt_filename = '/workspace/OCR/models/vgg/model.ckpt-229866'
isess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(isess, ckpt_filename)

## Post-processing pipeline

The SSD outputs need to be post-processed to provide proper detections. Namely, we follow these common steps:

* Select boxes above a classification threshold;
* Clip boxes to the image shape;
* Apply the Non-Maximum-Selection algorithm: fuse together boxes whose Jaccard score > threshold;
* If necessary, resize bounding boxes to original image shape.

In [None]:
class_info = {
    "1" : 'hs',
    "2" : 'ss',
    "3" : 'ts',
    "4" : 'jfx'
}

In [None]:
def plt_bboxes(img, classes, classes_info, scores, bboxes, figsize=(10,10), linewidth=1.5, save=False, name='demo.jpg'):
    """Visualize bounding boxes. Largely inspired by SSD-MXNET!
    """
    height = img.shape[0]
    width = img.shape[1]
    if height < width:
        cv2.resize(img,(512, int(width/height*512)))
    else:
        cv2.resize(img,(int(height/width*512), 512))
    colors = {
        1 : (255, 0, 0),
        2 : (0, 255, 0),
        3 : (0, 0, 255)
    }
    for i in range(classes.shape[0]):
        cls_id = int(classes[i])
        if cls_id >= 1:
            score = scores[i]

            ymin = int(bboxes[i, 0] * height)
            xmin = int(bboxes[i, 1] * width)
            ymax = int(bboxes[i, 2] * height)
            xmax = int(bboxes[i, 3] * width)
            cv2.rectangle(img, (xmin, ymin), (xmax, ymax), colors[cls_id], 4)
#             class_name = str(classes_info[str(cls_id)])
#             s = '%s/%.3f' % (class_name, score)
#             p1 = (xmin-5, ymin)
#             cv2.putText(img, s, p1, cv2.FONT_HERSHEY_DUPLEX, 0.3, colors[cls_id], 1)
    if save:
        cv2.imwrite(name, img)
    else:
        plt.figure(figsize = (16, 16))
        cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        plt.imshow(img)

In [None]:
def bboxes_area(bboxes):
    h = bboxes[2] - bboxes[0]
    w = bboxes[3] - bboxes[1]
    return h*w

def is_contain(box1, box2):
    # if box1 is contained in box2
    if (box1[0] > box2[0] and box1[1] > box2[1] and box1[2] < box2[2] and box1[3] < box2[3]):
        return True
    return False

In [None]:
def bboxes_sort(classes, scores, bboxes, top_k=400):
    """Sort bounding boxes by decreasing order and keep only the top_k
    """
    # if priority_inside:
    #     inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \
    #         (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin)
    #     idxes = np.argsort(-scores)
    #     inside = inside[idxes]
    #     idxes = np.concatenate([idxes[inside], idxes[~inside]])
    idxes = np.argsort(-scores)
    classes = classes[idxes][:top_k]
    scores = scores[idxes][:top_k]
    bboxes = bboxes[idxes][:top_k]
    keep_bboxes = np.ones(scores.shape, dtype=np.bool)
    for i in range(scores.size):
        if bboxes_area(bboxes[i]) > 0.4:
            keep_bboxes[i] = False
        if classes[i] ==1 and bboxes_area(bboxes[i]) > 0.2:
            keep_bboxes[i] = False
        elif classes[i] != 1 and scores[i] < 0.2:
            keep_bboxes[i] = False
    return classes[keep_bboxes], scores[keep_bboxes], bboxes[keep_bboxes]


def bboxes_IOS(bboxes1, bboxes2):
    """Computing jaccard index between bboxes1 and bboxes2.
    Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
    """
    bboxes1 = np.transpose(bboxes1)
    bboxes2 = np.transpose(bboxes2)
    # Intersection bbox and volume.
    int_ymin = np.maximum(bboxes1[0], bboxes2[0])
    int_xmin = np.maximum(bboxes1[1], bboxes2[1])
    int_ymax = np.minimum(bboxes1[2], bboxes2[2])
    int_xmax = np.minimum(bboxes1[3], bboxes2[3])

    int_h = np.maximum(int_ymax - int_ymin, 0.)
    int_w = np.maximum(int_xmax - int_xmin, 0.)
    int_vol = int_h * int_w
    # Union volume.
    vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1])
    vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1])
    jaccard = int_vol / np.minimum(vol1, vol2)
    return jaccard

def bboxes_nms(classes, scores, bboxes, nms_threshold=0.45):
    """Apply non-maximum selection to bounding boxes.
    """
    keep_bboxes = np.ones(scores.shape, dtype=np.bool)
    for i in range(scores.size-1):
        if keep_bboxes[i]:
            if classes[i] == 1:
                # Computer overlap with bboxes which are following.
                overlap = bboxes_IOS(bboxes[i], bboxes[(i+1):])
                # Overlap threshold for keeping + checking part of the same class

                keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i])
                keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)
    #             keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], overlap < nms_threshold)
            else:
                overlap = bboxes_IOS(bboxes[i], bboxes[(i+1):])
                keep_overlap = np.logical_or(overlap < (nms_threshold / 2), classes[(i+1):] == 1)
                keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)
            

    idxes = np.where(keep_bboxes)
    return classes[idxes], scores[idxes], bboxes[idxes]

def bboxes_filter(classes, scores, bboxes):
    keep_bboxes = np.ones(scores.shape, dtype=np.bool)
    for i in range(scores.size):
        if classes[i] != 1:
            for j in range(i):
                if classes[j] != 1 and keep_bboxes[j]:
                    ios = IOS_calculation(bboxes[i], bboxes[j])
                    if ios > 0.9:
                        keep_bboxes[i] = False
    return classes[keep_bboxes], scores[keep_bboxes], bboxes[keep_bboxes]

In [None]:
##### Main image processing routine.
def process_image(img, select_threshold=0.1, nms_threshold=.45, net_shape=(512, 512)):
    # Run SSD network.
    rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img],
                                                              feed_dict={img_input: img})
    net_time = time.time()
    # Get classes and bboxes from the net outputs.
    rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
            rpredictions, rlocalisations, ssd_anchors,
            select_threshold=select_threshold, img_shape=net_shape, num_classes=4
        , decode=True)
    
    rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
#     print(rbboxes.shape)
#     print(rclasses, rscores)
    rclasses, rscores, rbboxes = bboxes_sort(rclasses, rscores, rbboxes, top_k=-1)
#     print(rclasses, rscores)
#     print(rclasses.shape, rscores.shape, rbboxes.shape)
    rclasses, rscores, rbboxes = bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
#     rclasses, rscores, rbboxes = bboxes_filter(rclasses, rscores, rbboxes)
#     print(rclasses.shape, rscores.shape, rbboxes.shape)
    # Resize bboxes to original image shape. Note: useless for Resize.WARP!
    rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
    nms_time = time.time()
    return rclasses, rscores, rbboxes, net_time, nms_time

In [None]:
# Test on some demo image and visualize output.
path = '../../datasets/TAL_OCR/190326_4122/test/'
# path = '../../datasets/TAL_OCR/xcs_highgrade/'
# path = '../../datasets/TAL_OCR/190423_5594/imgs/'
# path = './'
# path = '../../datasets/TAL_OCR/badcase_img/'
# path = '../../datasets/TAL_OCR/xc_test_imgs/'
image_names = sorted(os.listdir(path))
# print(image_names)s.size):0
#         if bboxes_area(bboxes[i]) > 0.3:
#             keep_bboxes[

In [None]:
total_num = len(image_names)
total_num = 10
for i in tqdm(range(total_num)):
#     image_names[i] = '3.jpg'
#     print(image_names[i])
    try:
        img = cv2.imread(path + image_names[i])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    except:
        print(image_names[i])
        continue
    rclasses, rscores, rbboxes, net_time, nms_time =  process_image(img)
    height = img.shape[0]
    width = img.shape[1]
#     if height < width:
#         img = cv2.resize(img,(int(width/height*512), 512))
#     else:
#         img = cv2.resize(img,(512, int(height/width*512)))
    plt_bboxes(img, rclasses, class_info, rscores, rbboxes, 
                    save = False, name = '/workspace/OCR/datasets/TAL_OCR/pa_result/' + str(i) + '.jpg')
    

In [None]:
0.92/0.92