# split train test

In [None]:
import numpy as np
import os

path = '../data/tocs/'
files = [path+i for i in os.listdir(path)]
np.random.seed(1)
np.random.shuffle(files)

# train
for file in files[:-20]:
    img = cv2.imread(file)
    cv2.imwrite('../data/train/'+file.split('/')[-1], img)

# test
for file in files[-20:]:
    img = cv2.imread(file)
    cv2.imwrite('../data/test/'+file.split('/')[-1], img)
print('Done')

# train model

In [None]:
!python train.py

In [None]:
!python eval.py --checkpoint_path=./checkpoint/model-5000

# Test

In [None]:
import os
import numpy as np
import tensorflow as tf

import cv2
from matplotlib import pyplot as plt
import json
import codecs


from PIL import Image, ImageDraw, ImageFont 
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

import config as cfg

def visualization(image_path, points, label, vis_color = (255,255,255)):
    """
    Visualize groundtruth label to image.
    """
    points = np.asarray(points, dtype=np.int32)
    points = np.reshape(points, [-1,2])
    image = cv2.imread(image_path)
    cv2.polylines(image, [points], 1, (0,255,0), 2)
    image = Image.fromarray(image)
    FONT = ImageFont.truetype(font_path, 20, encoding='utf-8')   
    DRAW = ImageDraw.Draw(image)  
    
    DRAW.text(points[0], label, vis_color, font=FONT)
    return np.array(image)

In [13]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os

import cv2
import time
import argparse
import numpy as np
from matplotlib import pyplot as plt

import config as cfg
from common import polygons_to_mask

from model.tensorpack_model import *

from tensorpack.predict import MultiTowerOfflinePredictor, OfflinePredictor, PredictConfig
from tensorpack.tfutils import SmartInit, get_tf_version_tuple
from tensorpack.tfutils.export import ModelExporter

def cal_sim(str1, str2):
    """
    Normalized Edit Distance metric (1-N.E.D specifically)
    """
    m = len(str1) + 1
    n = len(str2) + 1
    matrix = np.zeros((m, n))
    for i in range(m):
        matrix[i][0] = i
        
    for j in range(n):
        matrix[0][j] = j

    for i in range(1, m):
        for j in range(1, n):
            if str1[i - 1] == str2[j - 1]:
                matrix[i][j] = matrix[i - 1][j - 1]
            else:
                matrix[i][j] = min(matrix[i - 1][j - 1], min(matrix[i][j - 1], matrix[i - 1][j])) + 1
    
    lev = matrix[m-1][n-1]
    if (max(m-1,n-1)) == 0:
        sim = 1.0
    else:
        sim = 1.0-lev/(max(m-1,n-1))
    return sim


def preprocess(image, points, size=cfg.image_size):
    """
    Preprocess for test.
    Args:
        image: test image
        points: text polygon
        size: test image size
    """
    height, width = image.shape[:2]
    mask = polygons_to_mask([np.asarray(points, np.float32)], height, width)
    x, y, w, h = cv2.boundingRect(mask)
    mask = np.expand_dims(np.float32(mask), axis=-1)
    image = image * mask
    image = image[y:y+h, x:x+w,:]

    new_height, new_width = (size, int(w*size/h)) if h>w else (int(h*size/w), size)
    image = cv2.resize(image, (new_width, new_height))

    if new_height > new_width:
        padding_top, padding_down = 0, 0
        padding_left = (size - new_width)//2
        padding_right = size - padding_left - new_width
    else:
        padding_left, padding_right = 0, 0
        padding_top = (size - new_height)//2
        padding_down = size - padding_top - new_height

    image = cv2.copyMakeBorder(image, padding_top, padding_down, padding_left, padding_right, borderType=cv2.BORDER_CONSTANT, value=[0,0,0])

    image = image/255.
    return image


def label2str(preds, probs, label_dict, eos='EOS'):
    """
    Predicted sequence to string. 
    """
    results = []
    for idx in preds:
        if label_dict[idx] == eos:
            break
        results.append(label_dict[idx])

    probabilities = probs[:min(len(results)+1, cfg.seq_len+1)]
    return ''.join(results), probabilities

def eval(args, filenames, polygons, labels, label_dict=cfg.label_dict):
    Normalized_ED = 0.
    total_num = 0
    total_time = 0

    model = AttentionOCR()
    predcfg = PredictConfig(
        model=model,
        session_init=SmartInit(args['checkpoint_path']),
        input_names=model.get_inferene_tensor_names()[0],
        output_names=model.get_inferene_tensor_names()[1])

    predictor = OfflinePredictor(predcfg)

    for filename, points, label in zip(filenames, polygons, labels):
        image = cv2.imread(filename)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = preprocess(image, points, cfg.image_size)

        before = time.time()
        preds, probs = predictor(np.expand_dims(image, axis=0), np.ones([1,cfg.seq_len+1], np.int32), False, 1.)
        after = time.time()

        total_time += after - before
        preds, probs = label2str(preds[0], probs[0], label_dict)
        # print('label:',label)
        # print('pred:',preds, probs)

        # sim = cal_sim(preds, label)
        sim = 1 if str(preds) in label else 0

        total_num += 1
        Normalized_ED += sim
        
        # show img
        # img = visualization(filename, polygon, transcript)
        # plt.imshow(image)
        # plt.show()

    print("total_num: %d, 1-N.E.D: %.4f, average time: %.4f" % (total_num, Normalized_ED/total_num, total_time/total_num))



args = {'checkpoint_path':'./checkpoint/model-5000'}
from dataset import DY

DY = DY('dy_test')
DY.load_data()
print(len(DY.filenames))

eval(args, DY.filenames, DY.points, DY.transcripts)

../data/dy/test
51
[32m[0321 01:44:06 @collection.py:146][0m New collections created in tower : tf.GraphKeys.MODEL_VARIABLES of size 452, tf.GraphKeys.REGULARIZATION_LOSSES of size 124
[32m[0321 01:44:06 @collection.py:165][0m These collections were modified but restored in : (tf.GraphKeys.SUMMARIES: 0->45), (tf.GraphKeys.UPDATE_OPS: 0->226)
[32m[0321 01:44:06 @sessinit.py:87][0m [5m[31mWRN[0m The following variables are in the checkpoint, but not found in the graph: global_step, learning_rate
[32m[0321 01:44:10 @sessinit.py:114][0m Restoring checkpoint from ./checkpoint/model-5000 ...
INFO:tensorflow:Restoring parameters from ./checkpoint/model-5000
total_num: 51, 1-N.E.D: 0.9020, average time: 0.1203


In [None]:
!python export.py --pb_path=./pb/dingyu.pb --checkpoint_path=./checkpoint/model-5000

In [None]:
!python test.py --pb_path=./pb/dingyu.pb --img_folder=../data/dy/test

In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os

import cv2
import time
import argparse
import numpy as np
from matplotlib import pyplot as plt

import config as cfg
import tensorflow as tf
from common import polygons_to_mask

class TextRecognition(object):
    """
    AttentionOCR with tensorflow pb model.
    """
    def __init__(self, pb_file, seq_len):
        self.pb_file = pb_file
        self.seq_len = seq_len
        self.init_model()
        
    def init_model(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            with tf.gfile.FastGFile(self.pb_file, 'rb') as f:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
                _ = tf.import_graph_def(graph_def, name='')
        
        
        self.sess = tf.Session(graph=self.graph)
        
        self.img_ph = self.sess.graph.get_tensor_by_name('image:0')
        self.label_ph = self.sess.graph.get_tensor_by_name('label:0')
        self.is_training = self.sess.graph.get_tensor_by_name('is_training:0')
        self.dropout = self.sess.graph.get_tensor_by_name('dropout_keep_prob:0')
        self.preds = self.sess.graph.get_tensor_by_name('sequence_preds:0')
        self.probs = self.sess.graph.get_tensor_by_name('sequence_probs:0')
        
    def predict(self, image, label_dict, EOS='EOS'):
        results = []
        probabilities = []
        
        pred_sentences, pred_probs = self.sess.run([self.preds, self.probs], \
                    feed_dict={self.is_training: False, self.dropout: 1.0, self.img_ph: image, self.label_ph: np.ones((1,self.seq_len), np.int32)})

        for char in pred_sentences[0]:
            if label_dict[char] == EOS:
                break
            results.append(label_dict[char])
        probabilities = pred_probs[0][:min(len(results)+1,self.seq_len)]
        
        return results, probabilities

def preprocess(image, points, size=cfg.image_size):
    """
    Preprocess for test.
    Args:
        image: test image
        points: text polygon
        size: test image size
    """
    height, width = image.shape[:2]
    mask = polygons_to_mask([np.asarray(points, np.float32)], height, width)
    x, y, w, h = cv2.boundingRect(mask)
    mask = np.expand_dims(np.float32(mask), axis=-1)
    image = image * mask
    image = image[y:y+h, x:x+w,:]

    new_height, new_width = (size, int(w*size/h)) if h>w else (int(h*size/w), size)
    image = cv2.resize(image, (new_width, new_height))

    if new_height > new_width:
        padding_top, padding_down = 0, 0
        padding_left = (size - new_width)//2
        padding_right = size - padding_left - new_width
    else:
        padding_left, padding_right = 0, 0
        padding_top = (size - new_height)//2
        padding_down = size - padding_top - new_height

    image = cv2.copyMakeBorder(image, padding_top, padding_down, padding_left, padding_right, borderType=cv2.BORDER_CONSTANT, value=[0,0,0])

    image = image/255.
    return image

def test(args):
    model = TextRecognition(args['pb_path'], cfg.seq_len+1)
    
    
    for filename in os.listdir(args['img_folder']):
        img_path = os.path.join(args['img_folder'], filename)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        height, width = image.shape[:2]

        points = [[0,0], [width-1,0], [width-1,height-1], [0,height-1]]

        image = preprocess(image, points, cfg.image_size)
        image = np.expand_dims(image, 0)
        
        before = time.time()
        preds, probs = model.predict(image, cfg.label_dict)
        after = time.time()

        print(preds, probs)

        plt.imshow(image[0,:,:,:])
        plt.show()




args = {'pb_path':'./pb/dingyu.pb','img_folder':'../data/dy/test'}
test(args)